Hassan Abedi 4b866067a4 WIP
2026-06-05 12:50:43 +02:00

355 lines
12 KiB
Haskell

-- | Reads a @.scenario.json@ example, plans its conjunction with
-- @Geolog.DB.Plan.planConjunction@, and writes a runner-IR JSON plan that
-- @crates\/plan-runner@ consumes.
--
-- Invocation:
--
-- @
-- cabal run plan-export -- <scenario.json>
-- @
--
-- The scenario format is documented in @examples\/README@ or by example
-- (@examples\/*.scenario.json@); the output shape is documented in
-- @crates\/plan-runner\/src\/lib.rs@.
--
-- The exporter is also a self-check: before emitting, it runs the planned
-- query through @evalConjunctionPlanned@ and verifies the bindings match
-- the scenario's @expected_bindings@. A mismatched scenario fails loudly
-- here rather than handing a bad fixture to the Rust runner.
module Main (main) where
import Algebra.Graph qualified as AG
import Control.Monad (unless)
import Data.Aeson ((.!=), (.:), (.:?), (.=))
import Data.Aeson qualified as Aeson
import Data.Aeson.Encode.Pretty qualified as AesonPretty
import Data.Aeson.Key qualified as Key
import Data.Aeson.KeyMap qualified as KM
import Data.Aeson.Types (Parser)
import Data.ByteString.Lazy.Char8 qualified as LBS8
import Data.Foldable (toList)
import Data.List (sortOn)
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (Text)
import Data.Text qualified as T
import Data.String (fromString)
import FNotation.Names (Name)
import Geolog.DB.InMemory
import Geolog.DB.Plan
import Geolog.IR qualified as IR
import System.Environment (getArgs)
import System.Exit (die)
import System.IO (hPutStrLn, stderr)
-- * Scenario file format
--
-- Mirrors @Geolog.IR.FlatTheory@ + @[(Path, [Val])]@ + @[QAtom]@. The
-- 'Expected' block is optional but, when present, the exporter cross-
-- checks it against the planner's own evaluation before emitting.
data Scenario = Scenario
{ scName :: Text
, scSchema :: Map IR.Path SchemaEntry
, scFacts :: [(IR.Path, [Val])]
, scAtoms :: [QAtom]
, scExpected :: Maybe Expected
}
deriving (Show)
data SchemaEntry = SchemaEntry
{ seColumns :: [IR.ColType]
, sePrimaryKey :: Maybe [Int]
}
deriving (Show)
data Expected = Expected
{ exColumns :: [Text]
, exRows :: [[Val]]
}
deriving (Show)
-- ** JSON parsers
parsePath :: Aeson.Value -> Parser IR.Path
parsePath = Aeson.withText "path" \t -> pure [nameFromText t]
-- | Build a single-segment 'Name' from text. Multi-segment names (which
-- would carry a non-empty 'init' field) aren't needed by any current
-- example; if a scenario wants @"a/b"@-style paths, extend this helper.
nameFromText :: Text -> Name
nameFromText = fromString . T.unpack
instance Aeson.FromJSON SchemaEntry where
parseJSON = Aeson.withObject "SchemaEntry" \o ->
SchemaEntry <$> o .: "columns" <*> o .:? "primaryKey"
instance Aeson.FromJSON IR.ColType where
parseJSON = Aeson.withObject "ColType" \o -> do
case KM.toList o of
[("entity", v)] -> IR.EntityType <$> parsePath v
[("prim", v)] -> IR.PrimType <$> parsePrim v
_ -> fail "ColType: expected {\"entity\": <path>} or {\"prim\": \"int\"|\"string\"}"
parsePrim :: Aeson.Value -> Parser IR.PrimType
parsePrim = Aeson.withText "prim type" \case
"int" -> pure IR.PrimInt
"string" -> pure IR.PrimString
other -> fail ("unknown primitive type: " <> T.unpack other)
parseVal :: Aeson.Value -> Parser Val
parseVal = Aeson.withObject "Val" \o ->
case KM.toList o of
[("int", v)] -> ValInt <$> Aeson.parseJSON v
[("str", v)] -> ValText <$> Aeson.parseJSON v
[("entity", v)] -> parseEntity v
_ -> fail "Val: expected {\"int\": ..} | {\"str\": ..} | {\"entity\": [<path>, <id>]}"
where
parseEntity = Aeson.withArray "entity" \arr -> case toList arr of
[pv, nv] -> do
p <- parsePath pv
n <- Aeson.parseJSON nv
pure (ValEntity p n)
_ -> fail "entity: expected [<path>, <id>]"
parseQVal :: Aeson.Value -> Parser QVal
parseQVal = Aeson.withObject "QVal" \o ->
case KM.toList o of
[("var", v)] -> QVar . Var <$> Aeson.parseJSON v
[("lit", v)] -> QLit <$> parseVal v
_ -> fail "QVal: expected {\"var\": \"name\"} or {\"lit\": <value>}"
parseAtom :: Aeson.Value -> Parser QAtom
parseAtom = Aeson.withObject "QAtom" \o -> do
qaTable <- o .: "table" >>= parsePath
qaRowId <- o .:? "rowId" >>= traverse parseQVal
values <- o .: "values" :: Parser (Map Text Aeson.Value)
qaValues <-
Map.fromList
<$> traverse
( \(k, v) -> case reads (T.unpack k) of
[(i, "")] -> (i,) <$> parseQVal v
_ -> fail ("non-integer key in atom values: " <> T.unpack k)
)
(Map.toList values)
pure QAtom {qaTable, qaRowId, qaValues}
parseExpected :: Aeson.Value -> Parser Expected
parseExpected = Aeson.withObject "Expected" \o -> do
exColumns <- o .: "columns"
rawRows <- o .: "rows" :: Parser [[Aeson.Value]]
exRows <- traverse (traverse parseVal) rawRows
pure Expected {exColumns, exRows}
instance Aeson.FromJSON Scenario where
parseJSON = Aeson.withObject "Scenario" \o -> do
scName <- o .:? "name" .!= "unnamed"
rawSchema <- o .: "schema" :: Parser (Map Text SchemaEntry)
let scSchema = Map.fromList [([nameFromText k], v) | (k, v) <- Map.toList rawSchema]
rawFacts <- o .:? "facts" .!= mempty :: Parser (Map Text [[Aeson.Value]])
scFacts <-
concat
<$> traverse
( \(name, rows) -> do
let path = [nameFromText name]
parsedRows <- traverse (traverse parseVal) rows
pure [(path, row) | row <- parsedRows]
)
(Map.toList rawFacts)
rawAtoms <- o .: "atoms" :: Parser [Aeson.Value]
scAtoms <- traverse parseAtom rawAtoms
scExpected <- o .:? "expected_bindings" >>= traverse parseExpected
pure Scenario {scName, scSchema, scFacts, scAtoms, scExpected}
-- * Scenario → FlatTheory + DB + atoms
toFlatTheory :: Scenario -> IR.FlatTheory
toFlatTheory sc =
IR.FlatTheory
{ tables = Map.map (\e -> IR.Table {columns = seColumns e, primaryKey = sePrimaryKey e}) sc.scSchema
, laws = Map.empty
}
populateDB :: Scenario -> DB
populateDB sc = foldl (\d (p, row) -> insertRow p row d) (fromTheory (toFlatTheory sc)) sc.scFacts
-- * JSON encoding for the plan-runner IR
--
-- The shape is the same one we settled on earlier; see
-- @crates/plan-runner/src/lib.rs@.
pathText :: IR.Path -> Text
pathText = T.intercalate "/" . map (T.pack . show)
pathKey :: IR.Path -> Aeson.Key
pathKey = Key.fromText . pathText
encodeValue :: Val -> Aeson.Value
encodeValue =
Aeson.object . pure . \case
ValInt n -> "int" .= n
ValText t -> "str" .= t
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
encodeTerm :: QVal -> Aeson.Value
encodeTerm = \case
QVar (Var name) -> Aeson.object ["var" .= name]
QLit v -> Aeson.object ["lit" .= encodeValue v]
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
flattenAtom atomIdx arity qa =
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
| pos <- [0 .. arity - 1]
]
where
merged = case qa.qaRowId of
Nothing -> qa.qaValues
Just v -> Map.insert (arity - 1) v qa.qaValues
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
encodeAtom tables atomIdx qa =
Aeson.object
[ "table" .= pathText qa.qaTable
, "columns" .= flattenAtom atomIdx arity qa
]
where
arity = case Map.lookup qa.qaTable tables of
Just t -> length t.columns
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
atomIndex :: [QAtom] -> Map QAtom Int
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
encodeJoinOp :: JoinType -> Aeson.Value
encodeJoinOp = \case
LeftJoin -> "left"
RightJoin -> "right"
NaturalJoin -> "natural"
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
encodeNode tables idx n =
Aeson.object
[ "id" .= n.graphId.unPlanNodeId
, "action" .= case n.action of
PlanEvalAtom qa ->
let i = Map.findWithDefault 0 qa idx
in Aeson.object ["scan" .= encodeAtom tables i qa]
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
Aeson.object
[ "join"
.= Aeson.object
[ "op" .= encodeJoinOp jt
, "left" .= a
, "right" .= b
]
]
]
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
encodeQuery tables idx (PlanGraph g)
| null nodes =
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
| otherwise =
Aeson.object
[ "root" .= rootId
, "nodes" .= map (encodeNode tables idx) nodes
]
where
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
rootId = case graphRoot (PlanGraph g) of
Just (PlanNodeId i) -> i
Nothing -> (.graphId.unPlanNodeId) (last nodes)
encodeExpected :: Expected -> Aeson.Value
encodeExpected ex =
Aeson.object
[ "columns" .= exColumns ex
, "rows" .= map (map encodeValue) (exRows ex)
]
encodePlan :: Scenario -> Aeson.Value
encodePlan sc =
Aeson.object
( [ "_scenario" .= scName sc
, "schema" .= Aeson.object [pathKey p .= length (seColumns t) | (p, t) <- Map.toList sc.scSchema]
, "facts"
.= Aeson.object
[ pathKey p .= map (map encodeValue) rows
| (p, rows) <- groupedFacts sc.scFacts
]
, "query" .= encodeQuery (toFlatTheory sc).tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
]
++ maybe [] (\e -> ["expected_bindings" .= encodeExpected e]) sc.scExpected
)
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
groupedFacts = go []
where
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
go acc ((p, row) : rest) =
let acc' = case break (\(q, _) -> q == p) acc of
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
(before, []) -> before ++ [(p, [row])]
in go acc' rest
-- * Self-check
--
-- Cross-check the planned bindings against any user-supplied
-- 'expected_bindings'. Detects two classes of bug before they reach the
-- Rust side: a scenario whose 'expected' is wrong, and a planner output
-- that disagrees with 'evalConjunction'.
selfCheck :: Scenario -> IO ()
selfCheck sc = do
let db = populateDB sc
case evalConjunctionPlanned db sc.scAtoms of
Left err -> die ("self-check failed for " <> T.unpack sc.scName <> ": " <> show err)
Right actual -> case sc.scExpected of
Nothing -> pure ()
Just expected -> verifyAgainstExpected sc.scName expected actual
verifyAgainstExpected :: Text -> Expected -> Bindings -> IO ()
verifyAgainstExpected name expected actual = do
let actualCols = actual.cols
expectedCols = Set.fromList (map Var (exColumns expected))
unless (Set.isSubsetOf expectedCols actualCols) $
die $
"self-check failed for "
<> T.unpack name
<> ": expected_bindings names columns not produced by the plan: "
<> show (Set.difference expectedCols actualCols)
let projectedActual = Set.map (`projectOn` exColumns expected) actual.table
expectedProjected = Set.fromList (map (zip (exColumns expected)) (exRows expected))
expectedSet = Set.map (Map.fromList . map (\(v, x) -> (Var v, x))) expectedProjected
unless (projectedActual == expectedSet) $
die $
"self-check failed for "
<> T.unpack name
<> ":\n expected: "
<> show expectedSet
<> "\n actual: "
<> show projectedActual
projectOn :: Map Var Val -> [Text] -> Map Var Val
projectOn row keys =
Map.fromList [(Var k, v) | k <- keys, Just v <- [Map.lookup (Var k) row]]
-- * Entry point
main :: IO ()
main = do
args <- getArgs
case args of
[path] -> do
raw <- LBS8.readFile path
sc <- case Aeson.eitherDecode raw of
Left err -> die ("failed to parse " <> path <> ": " <> err)
Right sc -> pure sc
selfCheck sc
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
_ -> do
hPutStrLn stderr "usage: plan-export <scenario.json>"
die ""