253 lines
8.8 KiB
Haskell
253 lines
8.8 KiB
Haskell
|
|
-- | Exports a geolog-lang join plan as JSON for the Rust runner in
|
||
|
|
-- @crates/glog-runner@.
|
||
|
|
--
|
||
|
|
-- Invocation:
|
||
|
|
--
|
||
|
|
-- @
|
||
|
|
-- cabal run glog-export -- <scenario> > plan.json
|
||
|
|
-- @
|
||
|
|
--
|
||
|
|
-- Available scenarios: @three-atom-chain@.
|
||
|
|
--
|
||
|
|
-- The output shape is documented in @crates\/glog-runner\/src\/lib.rs@.
|
||
|
|
-- This program is the canonical producer: any change to the IR should
|
||
|
|
-- start here, with the Rust runner updated to match.
|
||
|
|
module Main (main) where
|
||
|
|
|
||
|
|
import Algebra.Graph qualified as AG
|
||
|
|
import Data.Aeson ((.=))
|
||
|
|
import Data.Aeson qualified as Aeson
|
||
|
|
import Data.Aeson.Encode.Pretty qualified as AesonPretty
|
||
|
|
import Data.Aeson.Key qualified as Key
|
||
|
|
import Data.ByteString.Lazy.Char8 qualified as LBS8
|
||
|
|
import Data.List (sortOn)
|
||
|
|
import Data.Map.Strict (Map)
|
||
|
|
import Data.Map.Strict qualified as Map
|
||
|
|
import Data.Set qualified as Set
|
||
|
|
import Data.Text (Text)
|
||
|
|
import Data.Text qualified as T
|
||
|
|
import Geolog.DB.InMemory
|
||
|
|
import Geolog.DB.Plan
|
||
|
|
import Geolog.IR qualified as IR
|
||
|
|
import System.Environment (getArgs)
|
||
|
|
import System.Exit (die)
|
||
|
|
import System.IO (hPutStrLn, stderr)
|
||
|
|
|
||
|
|
-- * Scenario plumbing
|
||
|
|
--
|
||
|
|
-- A scenario fixes a schema, a set of ground facts, and a conjunction of
|
||
|
|
-- query atoms. The exporter is intentionally code-driven (not @.glog@
|
||
|
|
-- driven): @.glog@ files declare theories, not queries, so the query
|
||
|
|
-- side has to live in Haskell either way.
|
||
|
|
|
||
|
|
data Scenario = Scenario
|
||
|
|
{ scName :: String
|
||
|
|
, scTheory :: IR.FlatTheory
|
||
|
|
, scFacts :: [(IR.Path, [Val])]
|
||
|
|
, scAtoms :: [QAtom]
|
||
|
|
}
|
||
|
|
|
||
|
|
-- * three-atom-chain
|
||
|
|
--
|
||
|
|
-- Mirrors @DB.InMemoryTest@ "matches evalConjunction on three-atom chain".
|
||
|
|
-- node = {e1, e2, e3}, edge = {(e1,e2,ee1), (e2,e3,ee2)}.
|
||
|
|
-- Conjunction: node(a), edge(a, b, _), edge(b, c, _).
|
||
|
|
|
||
|
|
nodePath, edgePath :: IR.Path
|
||
|
|
nodePath = ["node"]
|
||
|
|
edgePath = ["edge"]
|
||
|
|
|
||
|
|
threeAtomChain :: Scenario
|
||
|
|
threeAtomChain =
|
||
|
|
Scenario
|
||
|
|
{ scName = "three-atom-chain"
|
||
|
|
, scTheory =
|
||
|
|
IR.FlatTheory
|
||
|
|
{ tables =
|
||
|
|
Map.fromList
|
||
|
|
[ (nodePath, IR.Table {columns = [IR.EntityType nodePath], primaryKey = Nothing})
|
||
|
|
, (edgePath, IR.Table {columns = [IR.EntityType nodePath, IR.EntityType nodePath, IR.EntityType edgePath], primaryKey = Nothing})
|
||
|
|
]
|
||
|
|
, laws = Map.empty
|
||
|
|
}
|
||
|
|
, scFacts =
|
||
|
|
[ (nodePath, [ValEntity nodePath 1])
|
||
|
|
, (nodePath, [ValEntity nodePath 2])
|
||
|
|
, (nodePath, [ValEntity nodePath 3])
|
||
|
|
, (edgePath, [ValEntity nodePath 1, ValEntity nodePath 2, ValEntity edgePath 1])
|
||
|
|
, (edgePath, [ValEntity nodePath 2, ValEntity nodePath 3, ValEntity edgePath 2])
|
||
|
|
]
|
||
|
|
, scAtoms =
|
||
|
|
[ QAtom {qaTable = nodePath, qaRowId = Nothing, qaValues = Map.singleton 0 (QVar (Var "a"))}
|
||
|
|
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "a")), (1, QVar (Var "b"))]}
|
||
|
|
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "b")), (1, QVar (Var "c"))]}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
scenarios :: [Scenario]
|
||
|
|
scenarios = [threeAtomChain]
|
||
|
|
|
||
|
|
-- * JSON encoding
|
||
|
|
--
|
||
|
|
-- The shape mirrors the IR in @crates/glog-runner/src/lib.rs@:
|
||
|
|
--
|
||
|
|
-- > {
|
||
|
|
-- > "schema": {<name>: <arity>, ...},
|
||
|
|
-- > "facts": {<name>: [[<value>, ...], ...], ...},
|
||
|
|
-- > "query": {"root": <id>, "nodes": [{"id": <id>, "action": <action>}, ...]}
|
||
|
|
-- > }
|
||
|
|
|
||
|
|
-- | Render a 'Geolog.IR.Path' (a list of 'FNotation.Names.Name') as a flat
|
||
|
|
-- string for use as a relation name on the Rust side. Each 'Name' is
|
||
|
|
-- already shown with @\/@ between its own init segments and last, so we
|
||
|
|
-- reuse 'show' and join Names with @\/@ too.
|
||
|
|
pathText :: IR.Path -> Text
|
||
|
|
pathText = T.intercalate "/" . map (T.pack . show)
|
||
|
|
|
||
|
|
pathKey :: IR.Path -> Aeson.Key
|
||
|
|
pathKey = Key.fromText . pathText
|
||
|
|
|
||
|
|
encodeValue :: Val -> Aeson.Value
|
||
|
|
encodeValue =
|
||
|
|
Aeson.object . pure . \case
|
||
|
|
ValInt n -> "int" .= n
|
||
|
|
ValText t -> "str" .= t
|
||
|
|
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
|
||
|
|
|
||
|
|
encodeTerm :: QVal -> Aeson.Value
|
||
|
|
encodeTerm = \case
|
||
|
|
QVar (Var name) -> Aeson.object ["var" .= name]
|
||
|
|
QLit v -> Aeson.object ["lit" .= encodeValue v]
|
||
|
|
|
||
|
|
-- | Flatten an atom into one term per stored column, mirroring
|
||
|
|
-- @Geolog.DB.InMemory.toFlatArgs@: @qaValues@ keys map to positions
|
||
|
|
-- @0..n-2@, @qaRowId@ (if present) maps to position @n-1@, and any
|
||
|
|
-- missing positions become wildcard variables with locally-unique names.
|
||
|
|
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
|
||
|
|
flattenAtom atomIdx arity qa =
|
||
|
|
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
|
||
|
|
| pos <- [0 .. arity - 1]
|
||
|
|
]
|
||
|
|
where
|
||
|
|
merged = case qa.qaRowId of
|
||
|
|
Nothing -> qa.qaValues
|
||
|
|
Just v -> Map.insert (arity - 1) v qa.qaValues
|
||
|
|
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
|
||
|
|
|
||
|
|
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
|
||
|
|
encodeAtom tables atomIdx qa =
|
||
|
|
Aeson.object
|
||
|
|
[ "table" .= pathText qa.qaTable
|
||
|
|
, "columns" .= flattenAtom atomIdx arity qa
|
||
|
|
]
|
||
|
|
where
|
||
|
|
arity = case Map.lookup qa.qaTable tables of
|
||
|
|
Just t -> length t.columns
|
||
|
|
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
|
||
|
|
|
||
|
|
-- | Stable atom indexing keyed by atom identity, so the wildcard names in
|
||
|
|
-- @flattenAtom@ are deterministic across runs even if the planner's node
|
||
|
|
-- ordering changes.
|
||
|
|
atomIndex :: [QAtom] -> Map QAtom Int
|
||
|
|
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
|
||
|
|
|
||
|
|
encodeJoinOp :: JoinType -> Aeson.Value
|
||
|
|
encodeJoinOp = \case
|
||
|
|
LeftJoin -> "left"
|
||
|
|
RightJoin -> "right"
|
||
|
|
NaturalJoin -> "natural"
|
||
|
|
|
||
|
|
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
|
||
|
|
encodeNode tables idx n =
|
||
|
|
Aeson.object
|
||
|
|
[ "id" .= n.graphId.unPlanNodeId
|
||
|
|
, "action" .= case n.action of
|
||
|
|
PlanEvalAtom qa ->
|
||
|
|
let i = Map.findWithDefault 0 qa idx
|
||
|
|
in Aeson.object ["scan" .= encodeAtom tables i qa]
|
||
|
|
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
|
||
|
|
Aeson.object
|
||
|
|
[ "join"
|
||
|
|
.= Aeson.object
|
||
|
|
[ "op" .= encodeJoinOp jt
|
||
|
|
, "left" .= a
|
||
|
|
, "right" .= b
|
||
|
|
]
|
||
|
|
]
|
||
|
|
]
|
||
|
|
|
||
|
|
-- | Render a 'PlanGraph' as the JSON the runner consumes. Empty graphs
|
||
|
|
-- produce @{"root": 0, "nodes": []}@, which the runner treats as a
|
||
|
|
-- well-formed but empty query.
|
||
|
|
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
|
||
|
|
encodeQuery tables idx (PlanGraph g)
|
||
|
|
| null nodes =
|
||
|
|
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
|
||
|
|
| otherwise =
|
||
|
|
Aeson.object
|
||
|
|
[ "root" .= rootId
|
||
|
|
, "nodes" .= map (encodeNode tables idx) nodes
|
||
|
|
]
|
||
|
|
where
|
||
|
|
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
|
||
|
|
rootId = case graphRoot (PlanGraph g) of
|
||
|
|
Just (PlanNodeId i) -> i
|
||
|
|
-- Non-empty graph with no topological root means a cycle, which
|
||
|
|
-- planConjunction never produces. Fall back to the last id rather
|
||
|
|
-- than crashing so a bug here is still inspectable.
|
||
|
|
Nothing -> (.graphId.unPlanNodeId) (last nodes)
|
||
|
|
|
||
|
|
encodePlan :: Scenario -> Aeson.Value
|
||
|
|
encodePlan sc =
|
||
|
|
Aeson.object
|
||
|
|
[ "_scenario" .= sc.scName
|
||
|
|
, "schema" .= Aeson.object
|
||
|
|
[pathKey p .= length t.columns | (p, t) <- Map.toList sc.scTheory.tables]
|
||
|
|
, "facts" .= Aeson.object
|
||
|
|
[pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts]
|
||
|
|
, "query" .= encodeQuery sc.scTheory.tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
|
||
|
|
]
|
||
|
|
|
||
|
|
-- | Group facts by table while preserving table-first-seen order and
|
||
|
|
-- per-table insertion order.
|
||
|
|
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
|
||
|
|
groupedFacts = go []
|
||
|
|
where
|
||
|
|
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
|
||
|
|
go acc ((p, row) : rest) =
|
||
|
|
let acc' = case break (\(q, _) -> q == p) acc of
|
||
|
|
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
|
||
|
|
(before, []) -> before ++ [(p, [row])]
|
||
|
|
in go acc' rest
|
||
|
|
|
||
|
|
-- * Self-check
|
||
|
|
--
|
||
|
|
-- Run the planner's @evalConjunctionPlanned@ against the scenario's DB
|
||
|
|
-- to confirm the plan we're about to emit is well-formed and produces
|
||
|
|
-- non-error output. Catches malformed scenarios before they hand a bad
|
||
|
|
-- plan to the Rust runner.
|
||
|
|
|
||
|
|
selfCheck :: Scenario -> IO ()
|
||
|
|
selfCheck sc = do
|
||
|
|
let db = foldl (\d (p, row) -> insertRow p row d) (fromTheory sc.scTheory) sc.scFacts
|
||
|
|
case evalConjunctionPlanned db sc.scAtoms of
|
||
|
|
Left err -> die ("self-check failed for " <> sc.scName <> ": " <> show err)
|
||
|
|
Right _ -> pure ()
|
||
|
|
|
||
|
|
-- * Entry point
|
||
|
|
|
||
|
|
main :: IO ()
|
||
|
|
main = do
|
||
|
|
args <- getArgs
|
||
|
|
case args of
|
||
|
|
[name] -> case lookup name [(s.scName, s) | s <- scenarios] of
|
||
|
|
Just sc -> do
|
||
|
|
selfCheck sc
|
||
|
|
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
|
||
|
|
Nothing ->
|
||
|
|
die ("unknown scenario: " <> name <> "\navailable: " <> unwords (map (.scName) scenarios))
|
||
|
|
_ -> do
|
||
|
|
hPutStrLn stderr "usage: glog-export <scenario>"
|
||
|
|
hPutStrLn stderr ("scenarios: " <> unwords (map (.scName) scenarios))
|
||
|
|
die ""
|