-- | Reads a @.scenario.json@ example, plans its conjunction with -- @Geolog.DB.Plan.planConjunction@, and writes a runner-IR JSON plan that -- @crates\/plan-runner@ consumes. -- -- Invocation: -- -- @ -- cabal run plan-export -- -- @ -- -- The scenario format is documented in @examples\/README@ or by example -- (@examples\/*.scenario.json@); the output shape is documented in -- @crates\/plan-runner\/src\/lib.rs@. -- -- The exporter is also a self-check: before emitting, it runs the planned -- query through @evalConjunctionPlanned@ and verifies the bindings match -- the scenario's @expected_bindings@. A mismatched scenario fails loudly -- here rather than handing a bad fixture to the Rust runner. module Main (main) where import Algebra.Graph qualified as AG import Control.Monad (unless) import Data.Aeson ((.!=), (.:), (.:?), (.=)) import Data.Aeson qualified as Aeson import Data.Aeson.Encode.Pretty qualified as AesonPretty import Data.Aeson.Key qualified as Key import Data.Aeson.KeyMap qualified as KM import Data.Aeson.Types (Parser) import Data.ByteString.Lazy.Char8 qualified as LBS8 import Data.Foldable (toList) import Data.List (sortOn) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as T import Data.String (fromString) import FNotation.Names (Name) import Geolog.DB.InMemory import Geolog.DB.Plan import Geolog.IR qualified as IR import System.Environment (getArgs) import System.Exit (die) import System.IO (hPutStrLn, stderr) -- * Scenario file format -- -- Mirrors @Geolog.IR.FlatTheory@ + @[(Path, [Val])]@ + @[QAtom]@. The -- 'Expected' block is optional but, when present, the exporter cross- -- checks it against the planner's own evaluation before emitting. data Scenario = Scenario { scName :: Text , scSchema :: Map IR.Path SchemaEntry , scFacts :: [(IR.Path, [Val])] , scAtoms :: [QAtom] , scExpected :: Maybe Expected } deriving (Show) data SchemaEntry = SchemaEntry { seColumns :: [IR.ColType] , sePrimaryKey :: Maybe [Int] } deriving (Show) data Expected = Expected { exColumns :: [Text] , exRows :: [[Val]] } deriving (Show) -- ** JSON parsers parsePath :: Aeson.Value -> Parser IR.Path parsePath = Aeson.withText "path" \t -> pure [nameFromText t] -- | Build a single-segment 'Name' from text. Multi-segment names (which -- would carry a non-empty 'init' field) aren't needed by any current -- example; if a scenario wants @"a/b"@-style paths, extend this helper. nameFromText :: Text -> Name nameFromText = fromString . T.unpack instance Aeson.FromJSON SchemaEntry where parseJSON = Aeson.withObject "SchemaEntry" \o -> SchemaEntry <$> o .: "columns" <*> o .:? "primaryKey" instance Aeson.FromJSON IR.ColType where parseJSON = Aeson.withObject "ColType" \o -> do case KM.toList o of [("entity", v)] -> IR.EntityType <$> parsePath v [("prim", v)] -> IR.PrimType <$> parsePrim v _ -> fail "ColType: expected {\"entity\": } or {\"prim\": \"int\"|\"string\"}" parsePrim :: Aeson.Value -> Parser IR.PrimType parsePrim = Aeson.withText "prim type" \case "int" -> pure IR.PrimInt "string" -> pure IR.PrimString other -> fail ("unknown primitive type: " <> T.unpack other) parseVal :: Aeson.Value -> Parser Val parseVal = Aeson.withObject "Val" \o -> case KM.toList o of [("int", v)] -> ValInt <$> Aeson.parseJSON v [("str", v)] -> ValText <$> Aeson.parseJSON v [("entity", v)] -> parseEntity v _ -> fail "Val: expected {\"int\": ..} | {\"str\": ..} | {\"entity\": [, ]}" where parseEntity = Aeson.withArray "entity" \arr -> case toList arr of [pv, nv] -> do p <- parsePath pv n <- Aeson.parseJSON nv pure (ValEntity p n) _ -> fail "entity: expected [, ]" parseQVal :: Aeson.Value -> Parser QVal parseQVal = Aeson.withObject "QVal" \o -> case KM.toList o of [("var", v)] -> QVar . Var <$> Aeson.parseJSON v [("lit", v)] -> QLit <$> parseVal v _ -> fail "QVal: expected {\"var\": \"name\"} or {\"lit\": }" parseAtom :: Aeson.Value -> Parser QAtom parseAtom = Aeson.withObject "QAtom" \o -> do qaTable <- o .: "table" >>= parsePath qaRowId <- o .:? "rowId" >>= traverse parseQVal values <- o .: "values" :: Parser (Map Text Aeson.Value) qaValues <- Map.fromList <$> traverse ( \(k, v) -> case reads (T.unpack k) of [(i, "")] -> (i,) <$> parseQVal v _ -> fail ("non-integer key in atom values: " <> T.unpack k) ) (Map.toList values) pure QAtom {qaTable, qaRowId, qaValues} parseExpected :: Aeson.Value -> Parser Expected parseExpected = Aeson.withObject "Expected" \o -> do exColumns <- o .: "columns" rawRows <- o .: "rows" :: Parser [[Aeson.Value]] exRows <- traverse (traverse parseVal) rawRows pure Expected {exColumns, exRows} instance Aeson.FromJSON Scenario where parseJSON = Aeson.withObject "Scenario" \o -> do scName <- o .:? "name" .!= "unnamed" rawSchema <- o .: "schema" :: Parser (Map Text SchemaEntry) let scSchema = Map.fromList [([nameFromText k], v) | (k, v) <- Map.toList rawSchema] rawFacts <- o .:? "facts" .!= mempty :: Parser (Map Text [[Aeson.Value]]) scFacts <- concat <$> traverse ( \(name, rows) -> do let path = [nameFromText name] parsedRows <- traverse (traverse parseVal) rows pure [(path, row) | row <- parsedRows] ) (Map.toList rawFacts) rawAtoms <- o .: "atoms" :: Parser [Aeson.Value] scAtoms <- traverse parseAtom rawAtoms scExpected <- o .:? "expected_bindings" >>= traverse parseExpected pure Scenario {scName, scSchema, scFacts, scAtoms, scExpected} -- * Scenario → FlatTheory + DB + atoms toFlatTheory :: Scenario -> IR.FlatTheory toFlatTheory sc = IR.FlatTheory { tables = Map.map (\e -> IR.Table {columns = seColumns e, primaryKey = sePrimaryKey e}) sc.scSchema , laws = Map.empty } populateDB :: Scenario -> DB populateDB sc = foldl (\d (p, row) -> insertRow p row d) (fromTheory (toFlatTheory sc)) sc.scFacts -- * JSON encoding for the plan-runner IR -- -- The shape is the same one we settled on earlier; see -- @crates/plan-runner/src/lib.rs@. pathText :: IR.Path -> Text pathText = T.intercalate "/" . map (T.pack . show) pathKey :: IR.Path -> Aeson.Key pathKey = Key.fromText . pathText encodeValue :: Val -> Aeson.Value encodeValue = Aeson.object . pure . \case ValInt n -> "int" .= n ValText t -> "str" .= t ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n)) encodeTerm :: QVal -> Aeson.Value encodeTerm = \case QVar (Var name) -> Aeson.object ["var" .= name] QLit v -> Aeson.object ["lit" .= encodeValue v] flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value] flattenAtom atomIdx arity qa = [ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged) | pos <- [0 .. arity - 1] ] where merged = case qa.qaRowId of Nothing -> qa.qaValues Just v -> Map.insert (arity - 1) v qa.qaValues wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p))) encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value encodeAtom tables atomIdx qa = Aeson.object [ "table" .= pathText qa.qaTable , "columns" .= flattenAtom atomIdx arity qa ] where arity = case Map.lookup qa.qaTable tables of Just t -> length t.columns Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable) atomIndex :: [QAtom] -> Map QAtom Int atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..]) encodeJoinOp :: JoinType -> Aeson.Value encodeJoinOp = \case LeftJoin -> "left" RightJoin -> "right" NaturalJoin -> "natural" encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value encodeNode tables idx n = Aeson.object [ "id" .= n.graphId.unPlanNodeId , "action" .= case n.action of PlanEvalAtom qa -> let i = Map.findWithDefault 0 qa idx in Aeson.object ["scan" .= encodeAtom tables i qa] PlanJoin jt (PlanNodeId a) (PlanNodeId b) -> Aeson.object [ "join" .= Aeson.object [ "op" .= encodeJoinOp jt , "left" .= a , "right" .= b ] ] ] encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value encodeQuery tables idx (PlanGraph g) | null nodes = Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])] | otherwise = Aeson.object [ "root" .= rootId , "nodes" .= map (encodeNode tables idx) nodes ] where nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g) rootId = case graphRoot (PlanGraph g) of Just (PlanNodeId i) -> i Nothing -> (.graphId.unPlanNodeId) (last nodes) encodeExpected :: Expected -> Aeson.Value encodeExpected ex = Aeson.object [ "columns" .= exColumns ex , "rows" .= map (map encodeValue) (exRows ex) ] encodePlan :: Scenario -> Aeson.Value encodePlan sc = Aeson.object ( [ "_scenario" .= scName sc , "schema" .= Aeson.object [pathKey p .= length (seColumns t) | (p, t) <- Map.toList sc.scSchema] , "facts" .= Aeson.object [ pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts ] , "query" .= encodeQuery (toFlatTheory sc).tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms) ] ++ maybe [] (\e -> ["expected_bindings" .= encodeExpected e]) sc.scExpected ) groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])] groupedFacts = go [] where go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc] go acc ((p, row) : rest) = let acc' = case break (\(q, _) -> q == p) acc of (before, (q, rs) : after) -> before ++ (q, row : rs) : after (before, []) -> before ++ [(p, [row])] in go acc' rest -- * Self-check -- -- Cross-check the planned bindings against any user-supplied -- 'expected_bindings'. Detects two classes of bug before they reach the -- Rust side: a scenario whose 'expected' is wrong, and a planner output -- that disagrees with 'evalConjunction'. selfCheck :: Scenario -> IO () selfCheck sc = do let db = populateDB sc case evalConjunctionPlanned db sc.scAtoms of Left err -> die ("self-check failed for " <> T.unpack sc.scName <> ": " <> show err) Right actual -> case sc.scExpected of Nothing -> pure () Just expected -> verifyAgainstExpected sc.scName expected actual verifyAgainstExpected :: Text -> Expected -> Bindings -> IO () verifyAgainstExpected name expected actual = do let actualCols = actual.cols expectedCols = Set.fromList (map Var (exColumns expected)) unless (Set.isSubsetOf expectedCols actualCols) $ die $ "self-check failed for " <> T.unpack name <> ": expected_bindings names columns not produced by the plan: " <> show (Set.difference expectedCols actualCols) let projectedActual = Set.map (`projectOn` exColumns expected) actual.table expectedProjected = Set.fromList (map (zip (exColumns expected)) (exRows expected)) expectedSet = Set.map (Map.fromList . map (\(v, x) -> (Var v, x))) expectedProjected unless (projectedActual == expectedSet) $ die $ "self-check failed for " <> T.unpack name <> ":\n expected: " <> show expectedSet <> "\n actual: " <> show projectedActual projectOn :: Map Var Val -> [Text] -> Map Var Val projectOn row keys = Map.fromList [(Var k, v) | k <- keys, Just v <- [Map.lookup (Var k) row]] -- * Entry point main :: IO () main = do args <- getArgs case args of [path] -> do raw <- LBS8.readFile path sc <- case Aeson.eitherDecode raw of Left err -> die ("failed to parse " <> path <> ": " <> err) Right sc -> pure sc selfCheck sc LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc)) _ -> do hPutStrLn stderr "usage: plan-export " die ""