-- | Exports a geolog-lang join plan as JSON for the Rust runner in -- @crates/glog-runner@. -- -- Invocation: -- -- @ -- cabal run glog-export -- > plan.json -- @ -- -- Available scenarios: @three-atom-chain@. -- -- The output shape is documented in @crates\/glog-runner\/src\/lib.rs@. -- This program is the canonical producer: any change to the IR should -- start here, with the Rust runner updated to match. module Main (main) where import Algebra.Graph qualified as AG import Data.Aeson ((.=)) import Data.Aeson qualified as Aeson import Data.Aeson.Encode.Pretty qualified as AesonPretty import Data.Aeson.Key qualified as Key import Data.ByteString.Lazy.Char8 qualified as LBS8 import Data.List (sortOn) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as T import Geolog.DB.InMemory import Geolog.DB.Plan import Geolog.IR qualified as IR import System.Environment (getArgs) import System.Exit (die) import System.IO (hPutStrLn, stderr) -- * Scenario plumbing -- -- A scenario fixes a schema, a set of ground facts, and a conjunction of -- query atoms. The exporter is intentionally code-driven (not @.glog@ -- driven): @.glog@ files declare theories, not queries, so the query -- side has to live in Haskell either way. data Scenario = Scenario { scName :: String , scTheory :: IR.FlatTheory , scFacts :: [(IR.Path, [Val])] , scAtoms :: [QAtom] } -- * three-atom-chain -- -- Mirrors @DB.InMemoryTest@ "matches evalConjunction on three-atom chain". -- node = {e1, e2, e3}, edge = {(e1,e2,ee1), (e2,e3,ee2)}. -- Conjunction: node(a), edge(a, b, _), edge(b, c, _). nodePath, edgePath :: IR.Path nodePath = ["node"] edgePath = ["edge"] threeAtomChain :: Scenario threeAtomChain = Scenario { scName = "three-atom-chain" , scTheory = IR.FlatTheory { tables = Map.fromList [ (nodePath, IR.Table {columns = [IR.EntityType nodePath], primaryKey = Nothing}) , (edgePath, IR.Table {columns = [IR.EntityType nodePath, IR.EntityType nodePath, IR.EntityType edgePath], primaryKey = Nothing}) ] , laws = Map.empty } , scFacts = [ (nodePath, [ValEntity nodePath 1]) , (nodePath, [ValEntity nodePath 2]) , (nodePath, [ValEntity nodePath 3]) , (edgePath, [ValEntity nodePath 1, ValEntity nodePath 2, ValEntity edgePath 1]) , (edgePath, [ValEntity nodePath 2, ValEntity nodePath 3, ValEntity edgePath 2]) ] , scAtoms = [ QAtom {qaTable = nodePath, qaRowId = Nothing, qaValues = Map.singleton 0 (QVar (Var "a"))} , QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "a")), (1, QVar (Var "b"))]} , QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "b")), (1, QVar (Var "c"))]} ] } scenarios :: [Scenario] scenarios = [threeAtomChain] -- * JSON encoding -- -- The shape mirrors the IR in @crates/glog-runner/src/lib.rs@: -- -- > { -- > "schema": {: , ...}, -- > "facts": {: [[, ...], ...], ...}, -- > "query": {"root": , "nodes": [{"id": , "action": }, ...]} -- > } -- | Render a 'Geolog.IR.Path' (a list of 'FNotation.Names.Name') as a flat -- string for use as a relation name on the Rust side. Each 'Name' is -- already shown with @\/@ between its own init segments and last, so we -- reuse 'show' and join Names with @\/@ too. pathText :: IR.Path -> Text pathText = T.intercalate "/" . map (T.pack . show) pathKey :: IR.Path -> Aeson.Key pathKey = Key.fromText . pathText encodeValue :: Val -> Aeson.Value encodeValue = Aeson.object . pure . \case ValInt n -> "int" .= n ValText t -> "str" .= t ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n)) encodeTerm :: QVal -> Aeson.Value encodeTerm = \case QVar (Var name) -> Aeson.object ["var" .= name] QLit v -> Aeson.object ["lit" .= encodeValue v] -- | Flatten an atom into one term per stored column, mirroring -- @Geolog.DB.InMemory.toFlatArgs@: @qaValues@ keys map to positions -- @0..n-2@, @qaRowId@ (if present) maps to position @n-1@, and any -- missing positions become wildcard variables with locally-unique names. flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value] flattenAtom atomIdx arity qa = [ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged) | pos <- [0 .. arity - 1] ] where merged = case qa.qaRowId of Nothing -> qa.qaValues Just v -> Map.insert (arity - 1) v qa.qaValues wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p))) encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value encodeAtom tables atomIdx qa = Aeson.object [ "table" .= pathText qa.qaTable , "columns" .= flattenAtom atomIdx arity qa ] where arity = case Map.lookup qa.qaTable tables of Just t -> length t.columns Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable) -- | Stable atom indexing keyed by atom identity, so the wildcard names in -- @flattenAtom@ are deterministic across runs even if the planner's node -- ordering changes. atomIndex :: [QAtom] -> Map QAtom Int atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..]) encodeJoinOp :: JoinType -> Aeson.Value encodeJoinOp = \case LeftJoin -> "left" RightJoin -> "right" NaturalJoin -> "natural" encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value encodeNode tables idx n = Aeson.object [ "id" .= n.graphId.unPlanNodeId , "action" .= case n.action of PlanEvalAtom qa -> let i = Map.findWithDefault 0 qa idx in Aeson.object ["scan" .= encodeAtom tables i qa] PlanJoin jt (PlanNodeId a) (PlanNodeId b) -> Aeson.object [ "join" .= Aeson.object [ "op" .= encodeJoinOp jt , "left" .= a , "right" .= b ] ] ] -- | Render a 'PlanGraph' as the JSON the runner consumes. Empty graphs -- produce @{"root": 0, "nodes": []}@, which the runner treats as a -- well-formed but empty query. encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value encodeQuery tables idx (PlanGraph g) | null nodes = Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])] | otherwise = Aeson.object [ "root" .= rootId , "nodes" .= map (encodeNode tables idx) nodes ] where nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g) rootId = case graphRoot (PlanGraph g) of Just (PlanNodeId i) -> i -- Non-empty graph with no topological root means a cycle, which -- planConjunction never produces. Fall back to the last id rather -- than crashing so a bug here is still inspectable. Nothing -> (.graphId.unPlanNodeId) (last nodes) encodePlan :: Scenario -> Aeson.Value encodePlan sc = Aeson.object [ "_scenario" .= sc.scName , "schema" .= Aeson.object [pathKey p .= length t.columns | (p, t) <- Map.toList sc.scTheory.tables] , "facts" .= Aeson.object [pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts] , "query" .= encodeQuery sc.scTheory.tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms) ] -- | Group facts by table while preserving table-first-seen order and -- per-table insertion order. groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])] groupedFacts = go [] where go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc] go acc ((p, row) : rest) = let acc' = case break (\(q, _) -> q == p) acc of (before, (q, rs) : after) -> before ++ (q, row : rs) : after (before, []) -> before ++ [(p, [row])] in go acc' rest -- * Self-check -- -- Run the planner's @evalConjunctionPlanned@ against the scenario's DB -- to confirm the plan we're about to emit is well-formed and produces -- non-error output. Catches malformed scenarios before they hand a bad -- plan to the Rust runner. selfCheck :: Scenario -> IO () selfCheck sc = do let db = foldl (\d (p, row) -> insertRow p row d) (fromTheory sc.scTheory) sc.scFacts case evalConjunctionPlanned db sc.scAtoms of Left err -> die ("self-check failed for " <> sc.scName <> ": " <> show err) Right _ -> pure () -- * Entry point main :: IO () main = do args <- getArgs case args of [name] -> case lookup name [(s.scName, s) | s <- scenarios] of Just sc -> do selfCheck sc LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc)) Nothing -> die ("unknown scenario: " <> name <> "\navailable: " <> unwords (map (.scName) scenarios)) _ -> do hPutStrLn stderr "usage: glog-export " hPutStrLn stderr ("scenarios: " <> unwords (map (.scName) scenarios)) die ""