355 lines
12 KiB
Haskell
355 lines
12 KiB
Haskell
-- | Reads a @.scenario.json@ example, plans its conjunction with
|
|
-- @Geolog.DB.Plan.planConjunction@, and writes a runner-IR JSON plan that
|
|
-- @crates\/plan-runner@ consumes.
|
|
--
|
|
-- Invocation:
|
|
--
|
|
-- @
|
|
-- cabal run plan-export -- <scenario.json>
|
|
-- @
|
|
--
|
|
-- The scenario format is documented in @examples\/README@ or by example
|
|
-- (@examples\/*.scenario.json@); the output shape is documented in
|
|
-- @crates\/plan-runner\/src\/lib.rs@.
|
|
--
|
|
-- The exporter is also a self-check: before emitting, it runs the planned
|
|
-- query through @evalConjunctionPlanned@ and verifies the bindings match
|
|
-- the scenario's @expected_bindings@. A mismatched scenario fails loudly
|
|
-- here rather than handing a bad fixture to the Rust runner.
|
|
module Main (main) where
|
|
|
|
import Algebra.Graph qualified as AG
|
|
import Control.Monad (unless)
|
|
import Data.Aeson ((.!=), (.:), (.:?), (.=))
|
|
import Data.Aeson qualified as Aeson
|
|
import Data.Aeson.Encode.Pretty qualified as AesonPretty
|
|
import Data.Aeson.Key qualified as Key
|
|
import Data.Aeson.KeyMap qualified as KM
|
|
import Data.Aeson.Types (Parser)
|
|
import Data.ByteString.Lazy.Char8 qualified as LBS8
|
|
import Data.Foldable (toList)
|
|
import Data.List (sortOn)
|
|
import Data.Map.Strict (Map)
|
|
import Data.Map.Strict qualified as Map
|
|
import Data.Set qualified as Set
|
|
import Data.Text (Text)
|
|
import Data.Text qualified as T
|
|
import Data.String (fromString)
|
|
import FNotation.Names (Name)
|
|
import Geolog.DB.InMemory
|
|
import Geolog.DB.Plan
|
|
import Geolog.IR qualified as IR
|
|
import System.Environment (getArgs)
|
|
import System.Exit (die)
|
|
import System.IO (hPutStrLn, stderr)
|
|
|
|
-- * Scenario file format
|
|
--
|
|
-- Mirrors @Geolog.IR.FlatTheory@ + @[(Path, [Val])]@ + @[QAtom]@. The
|
|
-- 'Expected' block is optional but, when present, the exporter cross-
|
|
-- checks it against the planner's own evaluation before emitting.
|
|
|
|
data Scenario = Scenario
|
|
{ scName :: Text
|
|
, scSchema :: Map IR.Path SchemaEntry
|
|
, scFacts :: [(IR.Path, [Val])]
|
|
, scAtoms :: [QAtom]
|
|
, scExpected :: Maybe Expected
|
|
}
|
|
deriving (Show)
|
|
|
|
data SchemaEntry = SchemaEntry
|
|
{ seColumns :: [IR.ColType]
|
|
, sePrimaryKey :: Maybe [Int]
|
|
}
|
|
deriving (Show)
|
|
|
|
data Expected = Expected
|
|
{ exColumns :: [Text]
|
|
, exRows :: [[Val]]
|
|
}
|
|
deriving (Show)
|
|
|
|
-- ** JSON parsers
|
|
|
|
parsePath :: Aeson.Value -> Parser IR.Path
|
|
parsePath = Aeson.withText "path" \t -> pure [nameFromText t]
|
|
|
|
-- | Build a single-segment 'Name' from text. Multi-segment names (which
|
|
-- would carry a non-empty 'init' field) aren't needed by any current
|
|
-- example; if a scenario wants @"a/b"@-style paths, extend this helper.
|
|
nameFromText :: Text -> Name
|
|
nameFromText = fromString . T.unpack
|
|
|
|
instance Aeson.FromJSON SchemaEntry where
|
|
parseJSON = Aeson.withObject "SchemaEntry" \o ->
|
|
SchemaEntry <$> o .: "columns" <*> o .:? "primaryKey"
|
|
|
|
instance Aeson.FromJSON IR.ColType where
|
|
parseJSON = Aeson.withObject "ColType" \o -> do
|
|
case KM.toList o of
|
|
[("entity", v)] -> IR.EntityType <$> parsePath v
|
|
[("prim", v)] -> IR.PrimType <$> parsePrim v
|
|
_ -> fail "ColType: expected {\"entity\": <path>} or {\"prim\": \"int\"|\"string\"}"
|
|
|
|
parsePrim :: Aeson.Value -> Parser IR.PrimType
|
|
parsePrim = Aeson.withText "prim type" \case
|
|
"int" -> pure IR.PrimInt
|
|
"string" -> pure IR.PrimString
|
|
other -> fail ("unknown primitive type: " <> T.unpack other)
|
|
|
|
parseVal :: Aeson.Value -> Parser Val
|
|
parseVal = Aeson.withObject "Val" \o ->
|
|
case KM.toList o of
|
|
[("int", v)] -> ValInt <$> Aeson.parseJSON v
|
|
[("str", v)] -> ValText <$> Aeson.parseJSON v
|
|
[("entity", v)] -> parseEntity v
|
|
_ -> fail "Val: expected {\"int\": ..} | {\"str\": ..} | {\"entity\": [<path>, <id>]}"
|
|
where
|
|
parseEntity = Aeson.withArray "entity" \arr -> case toList arr of
|
|
[pv, nv] -> do
|
|
p <- parsePath pv
|
|
n <- Aeson.parseJSON nv
|
|
pure (ValEntity p n)
|
|
_ -> fail "entity: expected [<path>, <id>]"
|
|
|
|
parseQVal :: Aeson.Value -> Parser QVal
|
|
parseQVal = Aeson.withObject "QVal" \o ->
|
|
case KM.toList o of
|
|
[("var", v)] -> QVar . Var <$> Aeson.parseJSON v
|
|
[("lit", v)] -> QLit <$> parseVal v
|
|
_ -> fail "QVal: expected {\"var\": \"name\"} or {\"lit\": <value>}"
|
|
|
|
parseAtom :: Aeson.Value -> Parser QAtom
|
|
parseAtom = Aeson.withObject "QAtom" \o -> do
|
|
qaTable <- o .: "table" >>= parsePath
|
|
qaRowId <- o .:? "rowId" >>= traverse parseQVal
|
|
values <- o .: "values" :: Parser (Map Text Aeson.Value)
|
|
qaValues <-
|
|
Map.fromList
|
|
<$> traverse
|
|
( \(k, v) -> case reads (T.unpack k) of
|
|
[(i, "")] -> (i,) <$> parseQVal v
|
|
_ -> fail ("non-integer key in atom values: " <> T.unpack k)
|
|
)
|
|
(Map.toList values)
|
|
pure QAtom {qaTable, qaRowId, qaValues}
|
|
|
|
parseExpected :: Aeson.Value -> Parser Expected
|
|
parseExpected = Aeson.withObject "Expected" \o -> do
|
|
exColumns <- o .: "columns"
|
|
rawRows <- o .: "rows" :: Parser [[Aeson.Value]]
|
|
exRows <- traverse (traverse parseVal) rawRows
|
|
pure Expected {exColumns, exRows}
|
|
|
|
instance Aeson.FromJSON Scenario where
|
|
parseJSON = Aeson.withObject "Scenario" \o -> do
|
|
scName <- o .:? "name" .!= "unnamed"
|
|
rawSchema <- o .: "schema" :: Parser (Map Text SchemaEntry)
|
|
let scSchema = Map.fromList [([nameFromText k], v) | (k, v) <- Map.toList rawSchema]
|
|
rawFacts <- o .:? "facts" .!= mempty :: Parser (Map Text [[Aeson.Value]])
|
|
scFacts <-
|
|
concat
|
|
<$> traverse
|
|
( \(name, rows) -> do
|
|
let path = [nameFromText name]
|
|
parsedRows <- traverse (traverse parseVal) rows
|
|
pure [(path, row) | row <- parsedRows]
|
|
)
|
|
(Map.toList rawFacts)
|
|
rawAtoms <- o .: "atoms" :: Parser [Aeson.Value]
|
|
scAtoms <- traverse parseAtom rawAtoms
|
|
scExpected <- o .:? "expected_bindings" >>= traverse parseExpected
|
|
pure Scenario {scName, scSchema, scFacts, scAtoms, scExpected}
|
|
|
|
-- * Scenario → FlatTheory + DB + atoms
|
|
|
|
toFlatTheory :: Scenario -> IR.FlatTheory
|
|
toFlatTheory sc =
|
|
IR.FlatTheory
|
|
{ tables = Map.map (\e -> IR.Table {columns = seColumns e, primaryKey = sePrimaryKey e}) sc.scSchema
|
|
, laws = Map.empty
|
|
}
|
|
|
|
populateDB :: Scenario -> DB
|
|
populateDB sc = foldl (\d (p, row) -> insertRow p row d) (fromTheory (toFlatTheory sc)) sc.scFacts
|
|
|
|
-- * JSON encoding for the plan-runner IR
|
|
--
|
|
-- The shape is the same one we settled on earlier; see
|
|
-- @crates/plan-runner/src/lib.rs@.
|
|
|
|
pathText :: IR.Path -> Text
|
|
pathText = T.intercalate "/" . map (T.pack . show)
|
|
|
|
pathKey :: IR.Path -> Aeson.Key
|
|
pathKey = Key.fromText . pathText
|
|
|
|
encodeValue :: Val -> Aeson.Value
|
|
encodeValue =
|
|
Aeson.object . pure . \case
|
|
ValInt n -> "int" .= n
|
|
ValText t -> "str" .= t
|
|
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
|
|
|
|
encodeTerm :: QVal -> Aeson.Value
|
|
encodeTerm = \case
|
|
QVar (Var name) -> Aeson.object ["var" .= name]
|
|
QLit v -> Aeson.object ["lit" .= encodeValue v]
|
|
|
|
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
|
|
flattenAtom atomIdx arity qa =
|
|
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
|
|
| pos <- [0 .. arity - 1]
|
|
]
|
|
where
|
|
merged = case qa.qaRowId of
|
|
Nothing -> qa.qaValues
|
|
Just v -> Map.insert (arity - 1) v qa.qaValues
|
|
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
|
|
|
|
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
|
|
encodeAtom tables atomIdx qa =
|
|
Aeson.object
|
|
[ "table" .= pathText qa.qaTable
|
|
, "columns" .= flattenAtom atomIdx arity qa
|
|
]
|
|
where
|
|
arity = case Map.lookup qa.qaTable tables of
|
|
Just t -> length t.columns
|
|
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
|
|
|
|
atomIndex :: [QAtom] -> Map QAtom Int
|
|
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
|
|
|
|
encodeJoinOp :: JoinType -> Aeson.Value
|
|
encodeJoinOp = \case
|
|
LeftJoin -> "left"
|
|
RightJoin -> "right"
|
|
NaturalJoin -> "natural"
|
|
|
|
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
|
|
encodeNode tables idx n =
|
|
Aeson.object
|
|
[ "id" .= n.graphId.unPlanNodeId
|
|
, "action" .= case n.action of
|
|
PlanEvalAtom qa ->
|
|
let i = Map.findWithDefault 0 qa idx
|
|
in Aeson.object ["scan" .= encodeAtom tables i qa]
|
|
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
|
|
Aeson.object
|
|
[ "join"
|
|
.= Aeson.object
|
|
[ "op" .= encodeJoinOp jt
|
|
, "left" .= a
|
|
, "right" .= b
|
|
]
|
|
]
|
|
]
|
|
|
|
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
|
|
encodeQuery tables idx (PlanGraph g)
|
|
| null nodes =
|
|
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
|
|
| otherwise =
|
|
Aeson.object
|
|
[ "root" .= rootId
|
|
, "nodes" .= map (encodeNode tables idx) nodes
|
|
]
|
|
where
|
|
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
|
|
rootId = case graphRoot (PlanGraph g) of
|
|
Just (PlanNodeId i) -> i
|
|
Nothing -> (.graphId.unPlanNodeId) (last nodes)
|
|
|
|
encodeExpected :: Expected -> Aeson.Value
|
|
encodeExpected ex =
|
|
Aeson.object
|
|
[ "columns" .= exColumns ex
|
|
, "rows" .= map (map encodeValue) (exRows ex)
|
|
]
|
|
|
|
encodePlan :: Scenario -> Aeson.Value
|
|
encodePlan sc =
|
|
Aeson.object
|
|
( [ "_scenario" .= scName sc
|
|
, "schema" .= Aeson.object [pathKey p .= length (seColumns t) | (p, t) <- Map.toList sc.scSchema]
|
|
, "facts"
|
|
.= Aeson.object
|
|
[ pathKey p .= map (map encodeValue) rows
|
|
| (p, rows) <- groupedFacts sc.scFacts
|
|
]
|
|
, "query" .= encodeQuery (toFlatTheory sc).tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
|
|
]
|
|
++ maybe [] (\e -> ["expected_bindings" .= encodeExpected e]) sc.scExpected
|
|
)
|
|
|
|
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
|
|
groupedFacts = go []
|
|
where
|
|
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
|
|
go acc ((p, row) : rest) =
|
|
let acc' = case break (\(q, _) -> q == p) acc of
|
|
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
|
|
(before, []) -> before ++ [(p, [row])]
|
|
in go acc' rest
|
|
|
|
-- * Self-check
|
|
--
|
|
-- Cross-check the planned bindings against any user-supplied
|
|
-- 'expected_bindings'. Detects two classes of bug before they reach the
|
|
-- Rust side: a scenario whose 'expected' is wrong, and a planner output
|
|
-- that disagrees with 'evalConjunction'.
|
|
|
|
selfCheck :: Scenario -> IO ()
|
|
selfCheck sc = do
|
|
let db = populateDB sc
|
|
case evalConjunctionPlanned db sc.scAtoms of
|
|
Left err -> die ("self-check failed for " <> T.unpack sc.scName <> ": " <> show err)
|
|
Right actual -> case sc.scExpected of
|
|
Nothing -> pure ()
|
|
Just expected -> verifyAgainstExpected sc.scName expected actual
|
|
|
|
verifyAgainstExpected :: Text -> Expected -> Bindings -> IO ()
|
|
verifyAgainstExpected name expected actual = do
|
|
let actualCols = actual.cols
|
|
expectedCols = Set.fromList (map Var (exColumns expected))
|
|
unless (Set.isSubsetOf expectedCols actualCols) $
|
|
die $
|
|
"self-check failed for "
|
|
<> T.unpack name
|
|
<> ": expected_bindings names columns not produced by the plan: "
|
|
<> show (Set.difference expectedCols actualCols)
|
|
let projectedActual = Set.map (`projectOn` exColumns expected) actual.table
|
|
expectedProjected = Set.fromList (map (zip (exColumns expected)) (exRows expected))
|
|
expectedSet = Set.map (Map.fromList . map (\(v, x) -> (Var v, x))) expectedProjected
|
|
unless (projectedActual == expectedSet) $
|
|
die $
|
|
"self-check failed for "
|
|
<> T.unpack name
|
|
<> ":\n expected: "
|
|
<> show expectedSet
|
|
<> "\n actual: "
|
|
<> show projectedActual
|
|
|
|
projectOn :: Map Var Val -> [Text] -> Map Var Val
|
|
projectOn row keys =
|
|
Map.fromList [(Var k, v) | k <- keys, Just v <- [Map.lookup (Var k) row]]
|
|
|
|
-- * Entry point
|
|
|
|
main :: IO ()
|
|
main = do
|
|
args <- getArgs
|
|
case args of
|
|
[path] -> do
|
|
raw <- LBS8.readFile path
|
|
sc <- case Aeson.eitherDecode raw of
|
|
Left err -> die ("failed to parse " <> path <> ": " <> err)
|
|
Right sc -> pure sc
|
|
selfCheck sc
|
|
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
|
|
_ -> do
|
|
hPutStrLn stderr "usage: plan-export <scenario.json>"
|
|
die ""
|