355 lines
12 KiB
Haskell
Raw Normal View History

2026-06-05 11:31:18 +02:00
-- | Reads a @.scenario.json@ example, plans its conjunction with
-- @Geolog.DB.Plan.planConjunction@, and writes a runner-IR JSON plan that
-- @crates\/plan-runner@ consumes.
--
-- Invocation:
--
-- @
2026-06-05 11:31:18 +02:00
-- cabal run plan-export -- <scenario.json>
-- @
--
2026-06-05 11:31:18 +02:00
-- The scenario format is documented in @examples\/README@ or by example
-- (@examples\/*.scenario.json@); the output shape is documented in
-- @crates\/plan-runner\/src\/lib.rs@.
--
2026-06-05 11:31:18 +02:00
-- The exporter is also a self-check: before emitting, it runs the planned
-- query through @evalConjunctionPlanned@ and verifies the bindings match
-- the scenario's @expected_bindings@. A mismatched scenario fails loudly
-- here rather than handing a bad fixture to the Rust runner.
module Main (main) where
import Algebra.Graph qualified as AG
2026-06-05 11:31:18 +02:00
import Control.Monad (unless)
import Data.Aeson ((.!=), (.:), (.:?), (.=))
import Data.Aeson qualified as Aeson
import Data.Aeson.Encode.Pretty qualified as AesonPretty
import Data.Aeson.Key qualified as Key
2026-06-05 11:31:18 +02:00
import Data.Aeson.KeyMap qualified as KM
import Data.Aeson.Types (Parser)
import Data.ByteString.Lazy.Char8 qualified as LBS8
2026-06-05 11:31:18 +02:00
import Data.Foldable (toList)
import Data.List (sortOn)
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (Text)
import Data.Text qualified as T
2026-06-05 11:31:18 +02:00
import Data.String (fromString)
import FNotation.Names (Name)
import Geolog.DB.InMemory
import Geolog.DB.Plan
import Geolog.IR qualified as IR
import System.Environment (getArgs)
import System.Exit (die)
import System.IO (hPutStrLn, stderr)
2026-06-05 11:31:18 +02:00
-- * Scenario file format
--
2026-06-05 11:31:18 +02:00
-- Mirrors @Geolog.IR.FlatTheory@ + @[(Path, [Val])]@ + @[QAtom]@. The
-- 'Expected' block is optional but, when present, the exporter cross-
-- checks it against the planner's own evaluation before emitting.
data Scenario = Scenario
2026-06-05 11:31:18 +02:00
{ scName :: Text
, scSchema :: Map IR.Path SchemaEntry
, scFacts :: [(IR.Path, [Val])]
, scAtoms :: [QAtom]
2026-06-05 11:31:18 +02:00
, scExpected :: Maybe Expected
}
2026-06-05 11:31:18 +02:00
deriving (Show)
2026-06-05 11:31:18 +02:00
data SchemaEntry = SchemaEntry
{ seColumns :: [IR.ColType]
, sePrimaryKey :: Maybe [Int]
}
deriving (Show)
data Expected = Expected
{ exColumns :: [Text]
, exRows :: [[Val]]
}
deriving (Show)
-- ** JSON parsers
parsePath :: Aeson.Value -> Parser IR.Path
parsePath = Aeson.withText "path" \t -> pure [nameFromText t]
-- | Build a single-segment 'Name' from text. Multi-segment names (which
-- would carry a non-empty 'init' field) aren't needed by any current
-- example; if a scenario wants @"a/b"@-style paths, extend this helper.
nameFromText :: Text -> Name
nameFromText = fromString . T.unpack
instance Aeson.FromJSON SchemaEntry where
parseJSON = Aeson.withObject "SchemaEntry" \o ->
SchemaEntry <$> o .: "columns" <*> o .:? "primaryKey"
instance Aeson.FromJSON IR.ColType where
parseJSON = Aeson.withObject "ColType" \o -> do
case KM.toList o of
[("entity", v)] -> IR.EntityType <$> parsePath v
[("prim", v)] -> IR.PrimType <$> parsePrim v
_ -> fail "ColType: expected {\"entity\": <path>} or {\"prim\": \"int\"|\"string\"}"
parsePrim :: Aeson.Value -> Parser IR.PrimType
parsePrim = Aeson.withText "prim type" \case
"int" -> pure IR.PrimInt
"string" -> pure IR.PrimString
other -> fail ("unknown primitive type: " <> T.unpack other)
parseVal :: Aeson.Value -> Parser Val
parseVal = Aeson.withObject "Val" \o ->
case KM.toList o of
[("int", v)] -> ValInt <$> Aeson.parseJSON v
[("str", v)] -> ValText <$> Aeson.parseJSON v
[("entity", v)] -> parseEntity v
_ -> fail "Val: expected {\"int\": ..} | {\"str\": ..} | {\"entity\": [<path>, <id>]}"
where
parseEntity = Aeson.withArray "entity" \arr -> case toList arr of
[pv, nv] -> do
p <- parsePath pv
n <- Aeson.parseJSON nv
pure (ValEntity p n)
_ -> fail "entity: expected [<path>, <id>]"
parseQVal :: Aeson.Value -> Parser QVal
parseQVal = Aeson.withObject "QVal" \o ->
case KM.toList o of
[("var", v)] -> QVar . Var <$> Aeson.parseJSON v
[("lit", v)] -> QLit <$> parseVal v
_ -> fail "QVal: expected {\"var\": \"name\"} or {\"lit\": <value>}"
parseAtom :: Aeson.Value -> Parser QAtom
parseAtom = Aeson.withObject "QAtom" \o -> do
qaTable <- o .: "table" >>= parsePath
qaRowId <- o .:? "rowId" >>= traverse parseQVal
values <- o .: "values" :: Parser (Map Text Aeson.Value)
qaValues <-
Map.fromList
<$> traverse
( \(k, v) -> case reads (T.unpack k) of
[(i, "")] -> (i,) <$> parseQVal v
_ -> fail ("non-integer key in atom values: " <> T.unpack k)
)
(Map.toList values)
pure QAtom {qaTable, qaRowId, qaValues}
parseExpected :: Aeson.Value -> Parser Expected
parseExpected = Aeson.withObject "Expected" \o -> do
exColumns <- o .: "columns"
rawRows <- o .: "rows" :: Parser [[Aeson.Value]]
exRows <- traverse (traverse parseVal) rawRows
pure Expected {exColumns, exRows}
instance Aeson.FromJSON Scenario where
parseJSON = Aeson.withObject "Scenario" \o -> do
scName <- o .:? "name" .!= "unnamed"
rawSchema <- o .: "schema" :: Parser (Map Text SchemaEntry)
let scSchema = Map.fromList [([nameFromText k], v) | (k, v) <- Map.toList rawSchema]
rawFacts <- o .:? "facts" .!= mempty :: Parser (Map Text [[Aeson.Value]])
scFacts <-
concat
<$> traverse
( \(name, rows) -> do
let path = [nameFromText name]
parsedRows <- traverse (traverse parseVal) rows
pure [(path, row) | row <- parsedRows]
)
(Map.toList rawFacts)
rawAtoms <- o .: "atoms" :: Parser [Aeson.Value]
scAtoms <- traverse parseAtom rawAtoms
scExpected <- o .:? "expected_bindings" >>= traverse parseExpected
pure Scenario {scName, scSchema, scFacts, scAtoms, scExpected}
-- * Scenario → FlatTheory + DB + atoms
toFlatTheory :: Scenario -> IR.FlatTheory
toFlatTheory sc =
IR.FlatTheory
{ tables = Map.map (\e -> IR.Table {columns = seColumns e, primaryKey = sePrimaryKey e}) sc.scSchema
, laws = Map.empty
}
2026-06-05 11:31:18 +02:00
populateDB :: Scenario -> DB
populateDB sc = foldl (\d (p, row) -> insertRow p row d) (fromTheory (toFlatTheory sc)) sc.scFacts
2026-06-05 11:31:18 +02:00
-- * JSON encoding for the plan-runner IR
--
2026-06-05 11:31:18 +02:00
-- The shape is the same one we settled on earlier; see
-- @crates/plan-runner/src/lib.rs@.
pathText :: IR.Path -> Text
pathText = T.intercalate "/" . map (T.pack . show)
pathKey :: IR.Path -> Aeson.Key
pathKey = Key.fromText . pathText
encodeValue :: Val -> Aeson.Value
encodeValue =
Aeson.object . pure . \case
ValInt n -> "int" .= n
ValText t -> "str" .= t
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
encodeTerm :: QVal -> Aeson.Value
encodeTerm = \case
QVar (Var name) -> Aeson.object ["var" .= name]
QLit v -> Aeson.object ["lit" .= encodeValue v]
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
flattenAtom atomIdx arity qa =
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
| pos <- [0 .. arity - 1]
]
where
merged = case qa.qaRowId of
Nothing -> qa.qaValues
Just v -> Map.insert (arity - 1) v qa.qaValues
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
encodeAtom tables atomIdx qa =
Aeson.object
[ "table" .= pathText qa.qaTable
, "columns" .= flattenAtom atomIdx arity qa
]
where
arity = case Map.lookup qa.qaTable tables of
Just t -> length t.columns
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
atomIndex :: [QAtom] -> Map QAtom Int
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
encodeJoinOp :: JoinType -> Aeson.Value
encodeJoinOp = \case
LeftJoin -> "left"
RightJoin -> "right"
NaturalJoin -> "natural"
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
encodeNode tables idx n =
Aeson.object
[ "id" .= n.graphId.unPlanNodeId
, "action" .= case n.action of
PlanEvalAtom qa ->
let i = Map.findWithDefault 0 qa idx
in Aeson.object ["scan" .= encodeAtom tables i qa]
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
Aeson.object
[ "join"
.= Aeson.object
[ "op" .= encodeJoinOp jt
, "left" .= a
, "right" .= b
]
]
]
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
encodeQuery tables idx (PlanGraph g)
| null nodes =
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
| otherwise =
Aeson.object
[ "root" .= rootId
, "nodes" .= map (encodeNode tables idx) nodes
]
where
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
rootId = case graphRoot (PlanGraph g) of
Just (PlanNodeId i) -> i
Nothing -> (.graphId.unPlanNodeId) (last nodes)
2026-06-05 11:31:18 +02:00
encodeExpected :: Expected -> Aeson.Value
encodeExpected ex =
Aeson.object
[ "columns" .= exColumns ex
, "rows" .= map (map encodeValue) (exRows ex)
]
encodePlan :: Scenario -> Aeson.Value
encodePlan sc =
Aeson.object
2026-06-05 11:31:18 +02:00
( [ "_scenario" .= scName sc
, "schema" .= Aeson.object [pathKey p .= length (seColumns t) | (p, t) <- Map.toList sc.scSchema]
, "facts"
.= Aeson.object
[ pathKey p .= map (map encodeValue) rows
| (p, rows) <- groupedFacts sc.scFacts
]
, "query" .= encodeQuery (toFlatTheory sc).tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
]
++ maybe [] (\e -> ["expected_bindings" .= encodeExpected e]) sc.scExpected
)
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
groupedFacts = go []
where
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
go acc ((p, row) : rest) =
let acc' = case break (\(q, _) -> q == p) acc of
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
(before, []) -> before ++ [(p, [row])]
in go acc' rest
-- * Self-check
--
2026-06-05 11:31:18 +02:00
-- Cross-check the planned bindings against any user-supplied
-- 'expected_bindings'. Detects two classes of bug before they reach the
-- Rust side: a scenario whose 'expected' is wrong, and a planner output
-- that disagrees with 'evalConjunction'.
selfCheck :: Scenario -> IO ()
selfCheck sc = do
2026-06-05 11:31:18 +02:00
let db = populateDB sc
case evalConjunctionPlanned db sc.scAtoms of
2026-06-05 11:31:18 +02:00
Left err -> die ("self-check failed for " <> T.unpack sc.scName <> ": " <> show err)
Right actual -> case sc.scExpected of
Nothing -> pure ()
Just expected -> verifyAgainstExpected sc.scName expected actual
verifyAgainstExpected :: Text -> Expected -> Bindings -> IO ()
verifyAgainstExpected name expected actual = do
let actualCols = actual.cols
expectedCols = Set.fromList (map Var (exColumns expected))
unless (Set.isSubsetOf expectedCols actualCols) $
die $
"self-check failed for "
<> T.unpack name
<> ": expected_bindings names columns not produced by the plan: "
<> show (Set.difference expectedCols actualCols)
let projectedActual = Set.map (`projectOn` exColumns expected) actual.table
expectedProjected = Set.fromList (map (zip (exColumns expected)) (exRows expected))
expectedSet = Set.map (Map.fromList . map (\(v, x) -> (Var v, x))) expectedProjected
unless (projectedActual == expectedSet) $
die $
"self-check failed for "
<> T.unpack name
<> ":\n expected: "
<> show expectedSet
<> "\n actual: "
<> show projectedActual
projectOn :: Map Var Val -> [Text] -> Map Var Val
projectOn row keys =
Map.fromList [(Var k, v) | k <- keys, Just v <- [Map.lookup (Var k) row]]
-- * Entry point
main :: IO ()
main = do
args <- getArgs
case args of
2026-06-05 11:31:18 +02:00
[path] -> do
raw <- LBS8.readFile path
sc <- case Aeson.eitherDecode raw of
Left err -> die ("failed to parse " <> path <> ": " <> err)
Right sc -> pure sc
selfCheck sc
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
_ -> do
2026-06-05 11:31:18 +02:00
hPutStrLn stderr "usage: plan-export <scenario.json>"
die ""