253 lines
8.8 KiB
Haskell
Raw Normal View History

-- | Exports a geolog-lang join plan as JSON for the Rust runner in
-- @crates/glog-runner@.
--
-- Invocation:
--
-- @
-- cabal run glog-export -- <scenario> > plan.json
-- @
--
-- Available scenarios: @three-atom-chain@.
--
-- The output shape is documented in @crates\/glog-runner\/src\/lib.rs@.
-- This program is the canonical producer: any change to the IR should
-- start here, with the Rust runner updated to match.
module Main (main) where
import Algebra.Graph qualified as AG
import Data.Aeson ((.=))
import Data.Aeson qualified as Aeson
import Data.Aeson.Encode.Pretty qualified as AesonPretty
import Data.Aeson.Key qualified as Key
import Data.ByteString.Lazy.Char8 qualified as LBS8
import Data.List (sortOn)
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (Text)
import Data.Text qualified as T
import Geolog.DB.InMemory
import Geolog.DB.Plan
import Geolog.IR qualified as IR
import System.Environment (getArgs)
import System.Exit (die)
import System.IO (hPutStrLn, stderr)
-- * Scenario plumbing
--
-- A scenario fixes a schema, a set of ground facts, and a conjunction of
-- query atoms. The exporter is intentionally code-driven (not @.glog@
-- driven): @.glog@ files declare theories, not queries, so the query
-- side has to live in Haskell either way.
data Scenario = Scenario
{ scName :: String
, scTheory :: IR.FlatTheory
, scFacts :: [(IR.Path, [Val])]
, scAtoms :: [QAtom]
}
-- * three-atom-chain
--
-- Mirrors @DB.InMemoryTest@ "matches evalConjunction on three-atom chain".
-- node = {e1, e2, e3}, edge = {(e1,e2,ee1), (e2,e3,ee2)}.
-- Conjunction: node(a), edge(a, b, _), edge(b, c, _).
nodePath, edgePath :: IR.Path
nodePath = ["node"]
edgePath = ["edge"]
threeAtomChain :: Scenario
threeAtomChain =
Scenario
{ scName = "three-atom-chain"
, scTheory =
IR.FlatTheory
{ tables =
Map.fromList
[ (nodePath, IR.Table {columns = [IR.EntityType nodePath], primaryKey = Nothing})
, (edgePath, IR.Table {columns = [IR.EntityType nodePath, IR.EntityType nodePath, IR.EntityType edgePath], primaryKey = Nothing})
]
, laws = Map.empty
}
, scFacts =
[ (nodePath, [ValEntity nodePath 1])
, (nodePath, [ValEntity nodePath 2])
, (nodePath, [ValEntity nodePath 3])
, (edgePath, [ValEntity nodePath 1, ValEntity nodePath 2, ValEntity edgePath 1])
, (edgePath, [ValEntity nodePath 2, ValEntity nodePath 3, ValEntity edgePath 2])
]
, scAtoms =
[ QAtom {qaTable = nodePath, qaRowId = Nothing, qaValues = Map.singleton 0 (QVar (Var "a"))}
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "a")), (1, QVar (Var "b"))]}
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "b")), (1, QVar (Var "c"))]}
]
}
scenarios :: [Scenario]
scenarios = [threeAtomChain]
-- * JSON encoding
--
-- The shape mirrors the IR in @crates/glog-runner/src/lib.rs@:
--
-- > {
-- > "schema": {<name>: <arity>, ...},
-- > "facts": {<name>: [[<value>, ...], ...], ...},
-- > "query": {"root": <id>, "nodes": [{"id": <id>, "action": <action>}, ...]}
-- > }
-- | Render a 'Geolog.IR.Path' (a list of 'FNotation.Names.Name') as a flat
-- string for use as a relation name on the Rust side. Each 'Name' is
-- already shown with @\/@ between its own init segments and last, so we
-- reuse 'show' and join Names with @\/@ too.
pathText :: IR.Path -> Text
pathText = T.intercalate "/" . map (T.pack . show)
pathKey :: IR.Path -> Aeson.Key
pathKey = Key.fromText . pathText
encodeValue :: Val -> Aeson.Value
encodeValue =
Aeson.object . pure . \case
ValInt n -> "int" .= n
ValText t -> "str" .= t
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
encodeTerm :: QVal -> Aeson.Value
encodeTerm = \case
QVar (Var name) -> Aeson.object ["var" .= name]
QLit v -> Aeson.object ["lit" .= encodeValue v]
-- | Flatten an atom into one term per stored column, mirroring
-- @Geolog.DB.InMemory.toFlatArgs@: @qaValues@ keys map to positions
-- @0..n-2@, @qaRowId@ (if present) maps to position @n-1@, and any
-- missing positions become wildcard variables with locally-unique names.
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
flattenAtom atomIdx arity qa =
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
| pos <- [0 .. arity - 1]
]
where
merged = case qa.qaRowId of
Nothing -> qa.qaValues
Just v -> Map.insert (arity - 1) v qa.qaValues
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
encodeAtom tables atomIdx qa =
Aeson.object
[ "table" .= pathText qa.qaTable
, "columns" .= flattenAtom atomIdx arity qa
]
where
arity = case Map.lookup qa.qaTable tables of
Just t -> length t.columns
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
-- | Stable atom indexing keyed by atom identity, so the wildcard names in
-- @flattenAtom@ are deterministic across runs even if the planner's node
-- ordering changes.
atomIndex :: [QAtom] -> Map QAtom Int
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
encodeJoinOp :: JoinType -> Aeson.Value
encodeJoinOp = \case
LeftJoin -> "left"
RightJoin -> "right"
NaturalJoin -> "natural"
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
encodeNode tables idx n =
Aeson.object
[ "id" .= n.graphId.unPlanNodeId
, "action" .= case n.action of
PlanEvalAtom qa ->
let i = Map.findWithDefault 0 qa idx
in Aeson.object ["scan" .= encodeAtom tables i qa]
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
Aeson.object
[ "join"
.= Aeson.object
[ "op" .= encodeJoinOp jt
, "left" .= a
, "right" .= b
]
]
]
-- | Render a 'PlanGraph' as the JSON the runner consumes. Empty graphs
-- produce @{"root": 0, "nodes": []}@, which the runner treats as a
-- well-formed but empty query.
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
encodeQuery tables idx (PlanGraph g)
| null nodes =
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
| otherwise =
Aeson.object
[ "root" .= rootId
, "nodes" .= map (encodeNode tables idx) nodes
]
where
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
rootId = case graphRoot (PlanGraph g) of
Just (PlanNodeId i) -> i
-- Non-empty graph with no topological root means a cycle, which
-- planConjunction never produces. Fall back to the last id rather
-- than crashing so a bug here is still inspectable.
Nothing -> (.graphId.unPlanNodeId) (last nodes)
encodePlan :: Scenario -> Aeson.Value
encodePlan sc =
Aeson.object
[ "_scenario" .= sc.scName
, "schema" .= Aeson.object
[pathKey p .= length t.columns | (p, t) <- Map.toList sc.scTheory.tables]
, "facts" .= Aeson.object
[pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts]
, "query" .= encodeQuery sc.scTheory.tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
]
-- | Group facts by table while preserving table-first-seen order and
-- per-table insertion order.
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
groupedFacts = go []
where
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
go acc ((p, row) : rest) =
let acc' = case break (\(q, _) -> q == p) acc of
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
(before, []) -> before ++ [(p, [row])]
in go acc' rest
-- * Self-check
--
-- Run the planner's @evalConjunctionPlanned@ against the scenario's DB
-- to confirm the plan we're about to emit is well-formed and produces
-- non-error output. Catches malformed scenarios before they hand a bad
-- plan to the Rust runner.
selfCheck :: Scenario -> IO ()
selfCheck sc = do
let db = foldl (\d (p, row) -> insertRow p row d) (fromTheory sc.scTheory) sc.scFacts
case evalConjunctionPlanned db sc.scAtoms of
Left err -> die ("self-check failed for " <> sc.scName <> ": " <> show err)
Right _ -> pure ()
-- * Entry point
main :: IO ()
main = do
args <- getArgs
case args of
[name] -> case lookup name [(s.scName, s) | s <- scenarios] of
Just sc -> do
selfCheck sc
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
Nothing ->
die ("unknown scenario: " <> name <> "\navailable: " <> unwords (map (.scName) scenarios))
_ -> do
hPutStrLn stderr "usage: glog-export <scenario>"
hPutStrLn stderr ("scenarios: " <> unwords (map (.scName) scenarios))
die ""