Add the infrastructure to run the plan on Rust
This commit is contained in:
parent
693151b89e
commit
510662e7c9
1
.gitignore
vendored
1
.gitignore
vendored
@ -81,3 +81,4 @@ tarpaulin-report.html
|
|||||||
.claude/
|
.claude/
|
||||||
.codex
|
.codex
|
||||||
.agents/
|
.agents/
|
||||||
|
dist-newstyle/
|
||||||
|
|||||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -5,4 +5,4 @@
|
|||||||
[submodule "external/geolog"]
|
[submodule "external/geolog"]
|
||||||
path = external/geolog
|
path = external/geolog
|
||||||
url = gitlab@git.sgai.uk:creators/geolog.git
|
url = gitlab@git.sgai.uk:creators/geolog.git
|
||||||
branch = query-plan-ir-draft-1
|
branch = query-plan-algebraic
|
||||||
|
|||||||
17
Makefile
17
Makefile
@ -76,6 +76,23 @@ clean: ## Remove build output
|
|||||||
cargo clean; \
|
cargo clean; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
EXPORTER_DIR := tools/exporter
|
||||||
|
EXPORTER_FIXTURES := crates/glog-runner/fixtures
|
||||||
|
EXPORTER_SCENARIOS := three-atom-chain
|
||||||
|
|
||||||
|
.PHONY: export-fixtures
|
||||||
|
export-fixtures: ## Regenerate JSON plan fixtures from the Haskell exporter (needs Cabal and GHC; use `make shell` first).
|
||||||
|
@if ! command -v cabal >/dev/null 2>&1; then \
|
||||||
|
echo "cabal not found. Enter the dev shell with 'make shell' (or 'nix develop') first."; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@cd $(EXPORTER_DIR) && cabal build glog-export
|
||||||
|
@for sc in $(EXPORTER_SCENARIOS); do \
|
||||||
|
out=$(EXPORTER_FIXTURES)/$$(echo $$sc | tr '-' '_').json; \
|
||||||
|
echo "exporting $$sc -> $$out"; \
|
||||||
|
(cd $(EXPORTER_DIR) && cabal run -v0 glog-export -- $$sc) > $$out; \
|
||||||
|
done
|
||||||
|
|
||||||
.PHONY: shell
|
.PHONY: shell
|
||||||
shell: ## Enter the Nix dev shell defined in flake.nix
|
shell: ## Enter the Nix dev shell defined in flake.nix
|
||||||
@nix develop
|
@nix develop
|
||||||
|
|||||||
19
crates/glog-runner/Cargo.toml
Normal file
19
crates/glog-runner/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "glog-runner"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
rust-version.workspace = true
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
storage = { path = "../storage" }
|
||||||
|
query-ops = { path = "../query-ops" }
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
serde_json = "1"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "glog-run"
|
||||||
|
path = "src/main.rs"
|
||||||
166
crates/glog-runner/fixtures/three_atom_chain.json
Normal file
166
crates/glog-runner/fixtures/three_atom_chain.json
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
{
|
||||||
|
"_scenario": "three-atom-chain",
|
||||||
|
"facts": {
|
||||||
|
"edge": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"str": "node:1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"str": "node:2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"str": "edge:1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"str": "node:2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"str": "node:3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"str": "edge:2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"node": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"str": "node:1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"str": "node:2"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"str": "node:3"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"query": {
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"scan": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"var": "a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"var": "b"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"var": "_w0_2"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"table": "edge"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"scan": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"var": "b"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"var": "c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"var": "_w1_2"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"table": "edge"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"scan": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"var": "a"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"table": "node"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 1,
|
||||||
|
"op": "left",
|
||||||
|
"right": 3
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 2,
|
||||||
|
"op": "left",
|
||||||
|
"right": 4
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 5,
|
||||||
|
"op": "right",
|
||||||
|
"right": 4
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 6,
|
||||||
|
"op": "right",
|
||||||
|
"right": 3
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 6,
|
||||||
|
"op": "natural",
|
||||||
|
"right": 7
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": {
|
||||||
|
"join": {
|
||||||
|
"left": 5,
|
||||||
|
"op": "natural",
|
||||||
|
"right": 8
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"root": 9
|
||||||
|
},
|
||||||
|
"schema": {
|
||||||
|
"edge": 3,
|
||||||
|
"node": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
344
crates/glog-runner/src/lib.rs
Normal file
344
crates/glog-runner/src/lib.rs
Normal file
@ -0,0 +1,344 @@
|
|||||||
|
//! End-to-end runner that executes a `geolog-lang` conjunctive-query plan
|
||||||
|
//! against this workspace's storage and `query-ops` operators.
|
||||||
|
//!
|
||||||
|
//! The upstream Haskell planner in `external/geolog/geolog-lang`
|
||||||
|
//! (`Geolog.DB.Plan`) builds a Yannakakis-style join DAG over `QAtom`s. This
|
||||||
|
//! crate accepts that DAG as JSON, materializes the input relations through
|
||||||
|
//! the [`Storage`] trait, and walks the DAG using
|
||||||
|
//! [`query_ops::atom::scan_atom`], [`query_ops::join::semijoin`], and
|
||||||
|
//! [`query_ops::join::natural_join`]. The result is a binding
|
||||||
|
//! [`Relation`](query_ops::relation::Relation) over the query's variables.
|
||||||
|
//!
|
||||||
|
//! The JSON IR mirrors `Geolog.DB.Plan.JoinPlan` and `Geolog.DB.InMemory.QAtom`
|
||||||
|
//! without depending on the Haskell side at build time. A Haskell exporter
|
||||||
|
//! that dumps `(schema, facts, JoinPlan)` to this shape is the planned
|
||||||
|
//! follow-up that completes the round trip; the IR is the contract.
|
||||||
|
//!
|
||||||
|
//! Mapping from the Haskell planner:
|
||||||
|
//!
|
||||||
|
//! | `Geolog.DB.Plan` | this crate |
|
||||||
|
//! |-----------------------------|-----------------------------------------------|
|
||||||
|
//! | `PlanEvalAtom` | [`Action::Scan`] → `scan_atom` |
|
||||||
|
//! | `PlanJoin LeftJoin a b` | [`Action::Join`] with [`JoinOp::Left`] → `semijoin(rel[a], rel[b])` |
|
||||||
|
//! | `PlanJoin RightJoin a b` | [`Action::Join`] with [`JoinOp::Right`] → `semijoin(rel[b], rel[a])` |
|
||||||
|
//! | `PlanJoin NaturalJoin a b` | [`Action::Join`] with [`JoinOp::Natural`] → `natural_join(rel[a], rel[b])` |
|
||||||
|
//!
|
||||||
|
//! The atom side covers `evalAtom` (`Geolog.DB.InMemory`): a [`Term::Var`]
|
||||||
|
//! repeated across positions enforces equality, [`Term::Lit`] filters by
|
||||||
|
//! constant, and distinct variables project in first-occurrence order.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
use query_ops::atom::{AtomPattern, Term, scan_atom};
|
||||||
|
use query_ops::join::{natural_join, semijoin};
|
||||||
|
use query_ops::relation::Relation;
|
||||||
|
use storage::value::Value;
|
||||||
|
use storage::{MemoryStorage, Storage, StorageError, scan_as_table};
|
||||||
|
|
||||||
|
/// A single fixture: schema, ground facts, and a query plan to execute.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct Plan {
|
||||||
|
/// Relation name → arity (column count).
|
||||||
|
pub schema: HashMap<String, usize>,
|
||||||
|
/// Relation name → list of ground tuples to insert before execution.
|
||||||
|
pub facts: HashMap<String, Vec<Vec<JsonValue>>>,
|
||||||
|
/// The join DAG itself.
|
||||||
|
pub query: Query,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mirrors `Geolog.DB.Plan.JoinPlan`: a set of nodes plus the id of the
|
||||||
|
/// rooted result node.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct Query {
|
||||||
|
pub root: u32,
|
||||||
|
pub nodes: Vec<Node>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One node of the plan DAG. `id`s are dense within a `Query` but don't need
|
||||||
|
/// to start at any particular value, mirroring the Haskell `PlanNodeId`.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct Node {
|
||||||
|
pub id: u32,
|
||||||
|
pub action: Action,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// What to compute at a node. Tagged externally so JSON reads as
|
||||||
|
/// `{"action": {"scan": {...}}}` or `{"action": {"join": {...}}}`.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum Action {
|
||||||
|
Scan(Atom),
|
||||||
|
Join(Join),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A flat atom pattern, one entry per column of the target relation.
|
||||||
|
/// Matches the `toFlatArgs` view used by `Geolog.DB.InMemory.evalAtom`:
|
||||||
|
/// `qaValues` positions are filled in directly, and the entity-id column
|
||||||
|
/// (if any) appears at the last position. Wildcard positions in the
|
||||||
|
/// Haskell `QAtom` (a `Map Int QVal` with a missing key) translate to a
|
||||||
|
/// fresh, unique variable name on this side, which the operator binds but
|
||||||
|
/// never joins against.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct Atom {
|
||||||
|
pub table: String,
|
||||||
|
pub columns: Vec<JsonTerm>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum JsonTerm {
|
||||||
|
Var(String),
|
||||||
|
Lit(JsonValue),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wire-level value tag. Restricted to what `storage::value::Value` carries.
|
||||||
|
/// Entity identities from the Haskell side (`ValEntity path id`) round-trip
|
||||||
|
/// through `Str` for now using a `"path:id"` convention; that's a fixture
|
||||||
|
/// concern, not a runner concern.
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum JsonValue {
|
||||||
|
Int(i64),
|
||||||
|
Str(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct Join {
|
||||||
|
pub op: JoinOp,
|
||||||
|
pub left: u32,
|
||||||
|
pub right: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Deserialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum JoinOp {
|
||||||
|
/// `Geolog.DB.Plan.LeftJoin`: result is `left` rows whose shared columns
|
||||||
|
/// appear in `right`. Lowered to `semijoin(left, right)`.
|
||||||
|
Left,
|
||||||
|
/// `Geolog.DB.Plan.RightJoin`: result is `right` rows whose shared
|
||||||
|
/// columns appear in `left`. Lowered to `semijoin(right, left)`.
|
||||||
|
Right,
|
||||||
|
/// `Geolog.DB.Plan.NaturalJoin`. Lowered to `natural_join(left, right)`.
|
||||||
|
Natural,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Errors a runner can produce in addition to storage failures.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum RunError {
|
||||||
|
/// A fact references a relation that isn't declared in `schema`.
|
||||||
|
UnknownRelation(String),
|
||||||
|
/// A node id appears in a `Join` action but no node with that id exists.
|
||||||
|
MissingNode(u32),
|
||||||
|
/// `Query.root` doesn't match any node in `nodes`.
|
||||||
|
MissingRoot(u32),
|
||||||
|
/// Two nodes share the same id.
|
||||||
|
DuplicateNode(u32),
|
||||||
|
/// A join node references its left or right side before that side has
|
||||||
|
/// been computed: the DAG isn't actually topologically sorted by id, or
|
||||||
|
/// it has a cycle.
|
||||||
|
UnresolvedDependency { node: u32, depends_on: u32 },
|
||||||
|
/// Storage layer rejected an operation.
|
||||||
|
Storage(StorageError),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for RunError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::UnknownRelation(name) => {
|
||||||
|
write!(f, "facts reference relation {name:?} not in schema")
|
||||||
|
}
|
||||||
|
Self::MissingNode(id) => write!(f, "plan references missing node id {id}"),
|
||||||
|
Self::MissingRoot(id) => write!(f, "plan root id {id} matches no node"),
|
||||||
|
Self::DuplicateNode(id) => write!(f, "duplicate node id {id} in plan"),
|
||||||
|
Self::UnresolvedDependency { node, depends_on } => write!(
|
||||||
|
f,
|
||||||
|
"node {node} depends on {depends_on}, which has not been computed yet"
|
||||||
|
),
|
||||||
|
Self::Storage(err) => write!(f, "storage error: {err}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for RunError {
|
||||||
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
Self::Storage(err) => Some(err),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<StorageError> for RunError {
|
||||||
|
fn from(err: StorageError) -> Self {
|
||||||
|
Self::Storage(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<JsonValue> for Value {
|
||||||
|
fn from(jv: JsonValue) -> Self {
|
||||||
|
match jv {
|
||||||
|
JsonValue::Int(n) => Self::Int(n),
|
||||||
|
JsonValue::Str(s) => Self::Str(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<JsonTerm> for Term {
|
||||||
|
fn from(t: JsonTerm) -> Self {
|
||||||
|
match t {
|
||||||
|
JsonTerm::Var(name) => Self::Var(name),
|
||||||
|
JsonTerm::Lit(value) => Self::Lit(value.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a [`Plan`] from a JSON string.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns a [`serde_json::Error`] if the input isn't valid JSON in the
|
||||||
|
/// expected shape.
|
||||||
|
pub fn parse_plan(json: &str) -> Result<Plan, serde_json::Error> {
|
||||||
|
serde_json::from_str(json)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load schema and facts from a [`Plan`] into a fresh [`MemoryStorage`].
|
||||||
|
///
|
||||||
|
/// All facts are inserted in a single transaction; commit is atomic so a
|
||||||
|
/// failure on row N leaves the storage empty.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns [`RunError::UnknownRelation`] if facts mention a relation not
|
||||||
|
/// declared in `schema`. Wraps storage failures (arity mismatch, transaction
|
||||||
|
/// errors) in [`RunError::Storage`].
|
||||||
|
pub fn load_into_memory(plan: &Plan) -> Result<MemoryStorage, RunError> {
|
||||||
|
let mut storage = MemoryStorage::default();
|
||||||
|
for (name, arity) in &plan.schema {
|
||||||
|
storage.create_relation(name, *arity)?;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut tx = storage.transaction()?;
|
||||||
|
for (name, rows) in &plan.facts {
|
||||||
|
if !plan.schema.contains_key(name) {
|
||||||
|
return Err(RunError::UnknownRelation(name.clone()));
|
||||||
|
}
|
||||||
|
for row in rows {
|
||||||
|
let cells: Vec<Value> = row.iter().cloned().map(Value::from).collect();
|
||||||
|
tx.insert(name, cells)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let _ = tx.commit()?;
|
||||||
|
}
|
||||||
|
Ok(storage)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Execute a plan against a storage backend, returning the bindings
|
||||||
|
/// [`Relation`] for the rooted plan node.
|
||||||
|
///
|
||||||
|
/// Nodes are executed in ascending `id` order. For a Yannakakis plan as
|
||||||
|
/// emitted by `Geolog.DB.Plan` this is equivalent to a topological sort,
|
||||||
|
/// since `insertJoin` only references node ids that have already been
|
||||||
|
/// allocated. A non-monotone id ordering is rejected with
|
||||||
|
/// [`RunError::UnresolvedDependency`].
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns [`RunError::DuplicateNode`] for repeated ids,
|
||||||
|
/// [`RunError::MissingNode`] for join references to unknown ids,
|
||||||
|
/// [`RunError::MissingRoot`] if `query.root` isn't present, and storage
|
||||||
|
/// errors during the per-scan `scan_as_table` call.
|
||||||
|
pub fn execute<S: Storage>(storage: &S, query: &Query) -> Result<Relation, RunError> {
|
||||||
|
let mut seen_ids: std::collections::HashSet<u32> =
|
||||||
|
std::collections::HashSet::with_capacity(query.nodes.len());
|
||||||
|
for node in &query.nodes {
|
||||||
|
if !seen_ids.insert(node.id) {
|
||||||
|
return Err(RunError::DuplicateNode(node.id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !seen_ids.contains(&query.root) {
|
||||||
|
return Err(RunError::MissingRoot(query.root));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ordered: Vec<&Node> = query.nodes.iter().collect();
|
||||||
|
ordered.sort_by_key(|n| n.id);
|
||||||
|
|
||||||
|
let mut results: HashMap<u32, Relation> = HashMap::with_capacity(ordered.len());
|
||||||
|
for node in ordered {
|
||||||
|
let computed = match &node.action {
|
||||||
|
Action::Scan(atom) => {
|
||||||
|
let table = scan_as_table(storage, &atom.table)?;
|
||||||
|
let pattern = AtomPattern {
|
||||||
|
columns: atom.columns.iter().cloned().map(Term::from).collect(),
|
||||||
|
};
|
||||||
|
scan_atom(&table, &pattern)
|
||||||
|
}
|
||||||
|
Action::Join(join) => {
|
||||||
|
let left = require_dep(&results, &seen_ids, node.id, join.left)?;
|
||||||
|
let right = require_dep(&results, &seen_ids, node.id, join.right)?;
|
||||||
|
match join.op {
|
||||||
|
JoinOp::Left => semijoin(left, right),
|
||||||
|
JoinOp::Right => semijoin(right, left),
|
||||||
|
JoinOp::Natural => natural_join(left, right),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
results.insert(node.id, computed);
|
||||||
|
}
|
||||||
|
|
||||||
|
results
|
||||||
|
.remove(&query.root)
|
||||||
|
.ok_or(RunError::MissingRoot(query.root))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn require_dep<'a>(
|
||||||
|
results: &'a HashMap<u32, Relation>,
|
||||||
|
seen: &std::collections::HashSet<u32>,
|
||||||
|
node: u32,
|
||||||
|
depends_on: u32,
|
||||||
|
) -> Result<&'a Relation, RunError> {
|
||||||
|
if let Some(rel) = results.get(&depends_on) {
|
||||||
|
Ok(rel)
|
||||||
|
} else if seen.contains(&depends_on) {
|
||||||
|
Err(RunError::UnresolvedDependency { node, depends_on })
|
||||||
|
} else {
|
||||||
|
Err(RunError::MissingNode(depends_on))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience: parse JSON, load it into a fresh in-memory storage, and
|
||||||
|
/// execute, returning the root binding relation.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns a JSON parse error if the input is malformed, or a [`RunError`]
|
||||||
|
/// for any later step.
|
||||||
|
pub fn run_json(json: &str) -> Result<Relation, RunFromJsonError> {
|
||||||
|
let plan = parse_plan(json).map_err(RunFromJsonError::Parse)?;
|
||||||
|
let storage = load_into_memory(&plan).map_err(RunFromJsonError::Run)?;
|
||||||
|
let bindings = execute(&storage, &plan.query).map_err(RunFromJsonError::Run)?;
|
||||||
|
Ok(bindings)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Combined error from [`run_json`].
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum RunFromJsonError {
|
||||||
|
Parse(serde_json::Error),
|
||||||
|
Run(RunError),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for RunFromJsonError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Parse(err) => write!(f, "parse error: {err}"),
|
||||||
|
Self::Run(err) => write!(f, "run error: {err}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for RunFromJsonError {
|
||||||
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
Self::Parse(err) => Some(err),
|
||||||
|
Self::Run(err) => Some(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
59
crates/glog-runner/src/main.rs
Normal file
59
crates/glog-runner/src/main.rs
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
//! `glog-run` CLI: read a JSON plan from a file (or stdin if `-`), execute
|
||||||
|
//! it against a fresh in-memory store, and print the resulting binding
|
||||||
|
//! relation as JSON on stdout.
|
||||||
|
|
||||||
|
use std::io::{self, Read};
|
||||||
|
use std::process::ExitCode;
|
||||||
|
|
||||||
|
fn main() -> ExitCode {
|
||||||
|
let mut args = std::env::args().skip(1);
|
||||||
|
let Some(path) = args.next() else {
|
||||||
|
eprintln!("usage: glog-run <plan.json | ->");
|
||||||
|
return ExitCode::from(2);
|
||||||
|
};
|
||||||
|
|
||||||
|
let input = match read_input(&path) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("failed to read {path}: {err}");
|
||||||
|
return ExitCode::from(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let relation = match glog_runner::run_json(&input) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("{err}");
|
||||||
|
return ExitCode::from(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let payload = serde_json::json!({
|
||||||
|
"columns": relation.columns,
|
||||||
|
"rows": relation
|
||||||
|
.rows
|
||||||
|
.iter()
|
||||||
|
.map(|row| row.iter().map(value_to_json).collect::<Vec<_>>())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
});
|
||||||
|
println!("{payload}");
|
||||||
|
ExitCode::SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_input(path: &str) -> io::Result<String> {
|
||||||
|
if path == "-" {
|
||||||
|
let mut buf = String::new();
|
||||||
|
io::stdin().read_to_string(&mut buf)?;
|
||||||
|
Ok(buf)
|
||||||
|
} else {
|
||||||
|
std::fs::read_to_string(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn value_to_json(value: &storage::value::Value) -> serde_json::Value {
|
||||||
|
match value {
|
||||||
|
storage::value::Value::Int(n) => serde_json::Value::Number((*n).into()),
|
||||||
|
storage::value::Value::Str(s) => serde_json::Value::String(s.clone()),
|
||||||
|
storage::value::Value::Id(id) => serde_json::Value::String(id.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
73
crates/glog-runner/tests/three_atom_chain.rs
Normal file
73
crates/glog-runner/tests/three_atom_chain.rs
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
//! End-to-end check: run the JSON fixture and verify the resulting bindings
|
||||||
|
//! match the `DB.InMemoryTest` "matches evalConjunction on three-atom chain"
|
||||||
|
//! case from `external/geolog/geolog-lang/test/DB/InMemoryTest.hs`.
|
||||||
|
//!
|
||||||
|
//! For `node = {e1, e2, e3}` and `edge = {(e1,e2,ee1), (e2,e3,ee2)}` the
|
||||||
|
//! conjunction `node(a), edge(a, b, _), edge(b, c, _)` has exactly one
|
||||||
|
//! solution: `(a=e1, b=e2, c=e3)`.
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use glog_runner::run_json;
|
||||||
|
use storage::value::Value;
|
||||||
|
|
||||||
|
fn fixture() -> &'static str {
|
||||||
|
include_str!("../fixtures/three_atom_chain.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ent(path: &str, id: u32) -> Value {
|
||||||
|
Value::Str(format!("{path}:{id}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn project<'a>(
|
||||||
|
columns: &'a [String],
|
||||||
|
row: &'a [Value],
|
||||||
|
keep: &'a [&'a str],
|
||||||
|
) -> BTreeMap<&'a str, &'a Value> {
|
||||||
|
keep.iter()
|
||||||
|
.map(|name| {
|
||||||
|
let pos = columns
|
||||||
|
.iter()
|
||||||
|
.position(|c| c == name)
|
||||||
|
.expect("column missing");
|
||||||
|
(*name, &row[pos])
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn three_atom_chain_matches_haskell_oracle() {
|
||||||
|
let result = run_json(fixture()).expect("fixture should execute");
|
||||||
|
|
||||||
|
// The plan's root keeps every variable, including the per-atom wildcards
|
||||||
|
// `_r1` and `_r2`. The oracle only asserts the (a, b, c) projection.
|
||||||
|
let keep = ["a", "b", "c"];
|
||||||
|
let mut projected: Vec<BTreeMap<&str, &Value>> = result
|
||||||
|
.rows
|
||||||
|
.iter()
|
||||||
|
.map(|row| project(&result.columns, row, &keep))
|
||||||
|
.collect();
|
||||||
|
projected.sort_by_key(|m| format!("{m:?}"));
|
||||||
|
|
||||||
|
let e1 = ent("node", 1);
|
||||||
|
let e2 = ent("node", 2);
|
||||||
|
let e3 = ent("node", 3);
|
||||||
|
let expected = vec![BTreeMap::from([("a", &e1), ("b", &e2), ("c", &e3)])];
|
||||||
|
|
||||||
|
assert_eq!(projected, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn root_columns_cover_a_b_c_plus_two_wildcards() {
|
||||||
|
// The exporter emits unique wildcard variable names for the entity-id
|
||||||
|
// column of each edge atom (e.g. `_w0_2`, `_w1_2`); their exact spelling
|
||||||
|
// is an implementation detail of the exporter, so this test only checks
|
||||||
|
// that the named variables are all present and that the total column
|
||||||
|
// count is the three named ones plus two anonymous wildcards.
|
||||||
|
let result = run_json(fixture()).expect("fixture should execute");
|
||||||
|
let cols: std::collections::HashSet<&str> = result.columns.iter().map(String::as_str).collect();
|
||||||
|
for expected in ["a", "b", "c"] {
|
||||||
|
assert!(cols.contains(expected), "missing column {expected}");
|
||||||
|
}
|
||||||
|
assert_eq!(result.columns.len(), 5, "expected 3 named + 2 wildcards");
|
||||||
|
}
|
||||||
2
external/geolog
vendored
2
external/geolog
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 99d4006f4655d8a6815a9156fe4d9304515f356d
|
Subproject commit 426d4c96d6031ccaf5e14c12c3dab496e3b4c365
|
||||||
21
flake.nix
21
flake.nix
@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
description = "Storage engine playground: Rust workspace for FlowLog, DBSP, CRDT, and Geomerge experiments.";
|
description = "Storage engine playground";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||||
@ -29,12 +29,13 @@
|
|||||||
|
|
||||||
packages = [
|
packages = [
|
||||||
rustToolchain
|
rustToolchain
|
||||||
# Diagram regeneration in crates/geomerge-demo/docs/diagrams.
|
|
||||||
pkgs.graphviz
|
pkgs.graphviz
|
||||||
# Cargo helpers.
|
|
||||||
pkgs.cargo-watch
|
pkgs.cargo-watch
|
||||||
pkgs.cargo-nextest
|
pkgs.cargo-nextest
|
||||||
# Pre-commit hooks (see .pre-commit-config.yaml, Makefile setup-hooks).
|
pkgs.haskell.compiler.ghc912
|
||||||
|
pkgs.cabal-install
|
||||||
|
pkgs.pkg-config
|
||||||
|
pkgs.zlib
|
||||||
pkgs.pre-commit
|
pkgs.pre-commit
|
||||||
pkgs.python3
|
pkgs.python3
|
||||||
];
|
];
|
||||||
@ -44,10 +45,14 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
echo "storage-engine-playground dev shell"
|
# Banner goes to stderr so `nix develop --command` invocations
|
||||||
echo " rustc: $(rustc --version)"
|
# that pipe stdout (e.g. tools/exporter producing JSON) stay clean.
|
||||||
echo " cargo: $(cargo --version)"
|
>&2 echo "storage-engine-playground dev shell"
|
||||||
echo " dot: $(dot -V 2>&1)"
|
>&2 echo " rustc: $(rustc --version)"
|
||||||
|
>&2 echo " cargo: $(cargo --version)"
|
||||||
|
>&2 echo " ghc: $(ghc --version)"
|
||||||
|
>&2 echo " cabal: $(cabal --version | head -1)"
|
||||||
|
>&2 echo " dot: $(dot -V 2>&1)"
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
24
tools/exporter/cabal.project
Normal file
24
tools/exporter/cabal.project
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
-- cabal.project for the geolog -> Rust JSON exporter.
|
||||||
|
--
|
||||||
|
-- This file points at the geolog-lang library inside the external/geolog
|
||||||
|
-- submodule, plus the sibling packages it depends on (data-partition,
|
||||||
|
-- diagnostician, fnotation). It mirrors the submodule's own cabal.project
|
||||||
|
-- so the exporter sees the same source set the submodule's tests build
|
||||||
|
-- against.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
glog-exporter.cabal
|
||||||
|
../../external/geolog/geolog-lang/geolog-lang.cabal
|
||||||
|
../../external/geolog/data-partition/data-partition.cabal
|
||||||
|
../../external/geolog/diagnostician/diagnostician.cabal
|
||||||
|
../../external/geolog/fnotation/fnotation.cabal
|
||||||
|
|
||||||
|
-- geolog-lang's DB.Plan.Render module uses a patched diagrams-graphviz.
|
||||||
|
-- Same pin as external/geolog/cabal.project.
|
||||||
|
source-repository-package
|
||||||
|
type: git
|
||||||
|
location: https://github.com/georgefst/diagrams-graphviz.git
|
||||||
|
tag: 993533c564861f9d0663d719eafd56efd95f59ba
|
||||||
|
|
||||||
|
jobs: $ncpus
|
||||||
|
semaphore: true
|
||||||
36
tools/exporter/glog-exporter.cabal
Normal file
36
tools/exporter/glog-exporter.cabal
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
cabal-version: 3.4
|
||||||
|
name: glog-exporter
|
||||||
|
version: 0.1.0.0
|
||||||
|
license: MIT OR Apache-2.0
|
||||||
|
author: storage-engine-playground
|
||||||
|
synopsis: Export geolog-lang join plans as JSON for the Rust runner.
|
||||||
|
description:
|
||||||
|
Builds a FlatTheory + facts + a list of QAtoms for a named scenario,
|
||||||
|
runs Geolog.DB.Plan.planConjunction, and emits a JSON document that
|
||||||
|
crates/glog-runner consumes. This allows the playground use query-ops and
|
||||||
|
storage end-to-end with a real Yannakakis plan produced by the geolog
|
||||||
|
frontend, not a hand-written fixture.
|
||||||
|
|
||||||
|
build-type: Simple
|
||||||
|
|
||||||
|
executable glog-export
|
||||||
|
main-is: Main.hs
|
||||||
|
hs-source-dirs: src
|
||||||
|
default-language: GHC2024
|
||||||
|
default-extensions:
|
||||||
|
BlockArguments
|
||||||
|
LambdaCase
|
||||||
|
OverloadedRecordDot
|
||||||
|
OverloadedStrings
|
||||||
|
|
||||||
|
ghc-options: -Wall
|
||||||
|
|
||||||
|
build-depends:
|
||||||
|
, aeson >=2.2
|
||||||
|
, aeson-pretty >=0.8
|
||||||
|
, algebraic-graphs >=0.7
|
||||||
|
, base
|
||||||
|
, bytestring
|
||||||
|
, containers
|
||||||
|
, geolog-lang
|
||||||
|
, text
|
||||||
252
tools/exporter/src/Main.hs
Normal file
252
tools/exporter/src/Main.hs
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
-- | Exports a geolog-lang join plan as JSON for the Rust runner in
|
||||||
|
-- @crates/glog-runner@.
|
||||||
|
--
|
||||||
|
-- Invocation:
|
||||||
|
--
|
||||||
|
-- @
|
||||||
|
-- cabal run glog-export -- <scenario> > plan.json
|
||||||
|
-- @
|
||||||
|
--
|
||||||
|
-- Available scenarios: @three-atom-chain@.
|
||||||
|
--
|
||||||
|
-- The output shape is documented in @crates\/glog-runner\/src\/lib.rs@.
|
||||||
|
-- This program is the canonical producer: any change to the IR should
|
||||||
|
-- start here, with the Rust runner updated to match.
|
||||||
|
module Main (main) where
|
||||||
|
|
||||||
|
import Algebra.Graph qualified as AG
|
||||||
|
import Data.Aeson ((.=))
|
||||||
|
import Data.Aeson qualified as Aeson
|
||||||
|
import Data.Aeson.Encode.Pretty qualified as AesonPretty
|
||||||
|
import Data.Aeson.Key qualified as Key
|
||||||
|
import Data.ByteString.Lazy.Char8 qualified as LBS8
|
||||||
|
import Data.List (sortOn)
|
||||||
|
import Data.Map.Strict (Map)
|
||||||
|
import Data.Map.Strict qualified as Map
|
||||||
|
import Data.Set qualified as Set
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Text qualified as T
|
||||||
|
import Geolog.DB.InMemory
|
||||||
|
import Geolog.DB.Plan
|
||||||
|
import Geolog.IR qualified as IR
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
import System.Exit (die)
|
||||||
|
import System.IO (hPutStrLn, stderr)
|
||||||
|
|
||||||
|
-- * Scenario plumbing
|
||||||
|
--
|
||||||
|
-- A scenario fixes a schema, a set of ground facts, and a conjunction of
|
||||||
|
-- query atoms. The exporter is intentionally code-driven (not @.glog@
|
||||||
|
-- driven): @.glog@ files declare theories, not queries, so the query
|
||||||
|
-- side has to live in Haskell either way.
|
||||||
|
|
||||||
|
data Scenario = Scenario
|
||||||
|
{ scName :: String
|
||||||
|
, scTheory :: IR.FlatTheory
|
||||||
|
, scFacts :: [(IR.Path, [Val])]
|
||||||
|
, scAtoms :: [QAtom]
|
||||||
|
}
|
||||||
|
|
||||||
|
-- * three-atom-chain
|
||||||
|
--
|
||||||
|
-- Mirrors @DB.InMemoryTest@ "matches evalConjunction on three-atom chain".
|
||||||
|
-- node = {e1, e2, e3}, edge = {(e1,e2,ee1), (e2,e3,ee2)}.
|
||||||
|
-- Conjunction: node(a), edge(a, b, _), edge(b, c, _).
|
||||||
|
|
||||||
|
nodePath, edgePath :: IR.Path
|
||||||
|
nodePath = ["node"]
|
||||||
|
edgePath = ["edge"]
|
||||||
|
|
||||||
|
threeAtomChain :: Scenario
|
||||||
|
threeAtomChain =
|
||||||
|
Scenario
|
||||||
|
{ scName = "three-atom-chain"
|
||||||
|
, scTheory =
|
||||||
|
IR.FlatTheory
|
||||||
|
{ tables =
|
||||||
|
Map.fromList
|
||||||
|
[ (nodePath, IR.Table {columns = [IR.EntityType nodePath], primaryKey = Nothing})
|
||||||
|
, (edgePath, IR.Table {columns = [IR.EntityType nodePath, IR.EntityType nodePath, IR.EntityType edgePath], primaryKey = Nothing})
|
||||||
|
]
|
||||||
|
, laws = Map.empty
|
||||||
|
}
|
||||||
|
, scFacts =
|
||||||
|
[ (nodePath, [ValEntity nodePath 1])
|
||||||
|
, (nodePath, [ValEntity nodePath 2])
|
||||||
|
, (nodePath, [ValEntity nodePath 3])
|
||||||
|
, (edgePath, [ValEntity nodePath 1, ValEntity nodePath 2, ValEntity edgePath 1])
|
||||||
|
, (edgePath, [ValEntity nodePath 2, ValEntity nodePath 3, ValEntity edgePath 2])
|
||||||
|
]
|
||||||
|
, scAtoms =
|
||||||
|
[ QAtom {qaTable = nodePath, qaRowId = Nothing, qaValues = Map.singleton 0 (QVar (Var "a"))}
|
||||||
|
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "a")), (1, QVar (Var "b"))]}
|
||||||
|
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "b")), (1, QVar (Var "c"))]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
scenarios :: [Scenario]
|
||||||
|
scenarios = [threeAtomChain]
|
||||||
|
|
||||||
|
-- * JSON encoding
|
||||||
|
--
|
||||||
|
-- The shape mirrors the IR in @crates/glog-runner/src/lib.rs@:
|
||||||
|
--
|
||||||
|
-- > {
|
||||||
|
-- > "schema": {<name>: <arity>, ...},
|
||||||
|
-- > "facts": {<name>: [[<value>, ...], ...], ...},
|
||||||
|
-- > "query": {"root": <id>, "nodes": [{"id": <id>, "action": <action>}, ...]}
|
||||||
|
-- > }
|
||||||
|
|
||||||
|
-- | Render a 'Geolog.IR.Path' (a list of 'FNotation.Names.Name') as a flat
|
||||||
|
-- string for use as a relation name on the Rust side. Each 'Name' is
|
||||||
|
-- already shown with @\/@ between its own init segments and last, so we
|
||||||
|
-- reuse 'show' and join Names with @\/@ too.
|
||||||
|
pathText :: IR.Path -> Text
|
||||||
|
pathText = T.intercalate "/" . map (T.pack . show)
|
||||||
|
|
||||||
|
pathKey :: IR.Path -> Aeson.Key
|
||||||
|
pathKey = Key.fromText . pathText
|
||||||
|
|
||||||
|
encodeValue :: Val -> Aeson.Value
|
||||||
|
encodeValue =
|
||||||
|
Aeson.object . pure . \case
|
||||||
|
ValInt n -> "int" .= n
|
||||||
|
ValText t -> "str" .= t
|
||||||
|
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
|
||||||
|
|
||||||
|
encodeTerm :: QVal -> Aeson.Value
|
||||||
|
encodeTerm = \case
|
||||||
|
QVar (Var name) -> Aeson.object ["var" .= name]
|
||||||
|
QLit v -> Aeson.object ["lit" .= encodeValue v]
|
||||||
|
|
||||||
|
-- | Flatten an atom into one term per stored column, mirroring
|
||||||
|
-- @Geolog.DB.InMemory.toFlatArgs@: @qaValues@ keys map to positions
|
||||||
|
-- @0..n-2@, @qaRowId@ (if present) maps to position @n-1@, and any
|
||||||
|
-- missing positions become wildcard variables with locally-unique names.
|
||||||
|
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
|
||||||
|
flattenAtom atomIdx arity qa =
|
||||||
|
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
|
||||||
|
| pos <- [0 .. arity - 1]
|
||||||
|
]
|
||||||
|
where
|
||||||
|
merged = case qa.qaRowId of
|
||||||
|
Nothing -> qa.qaValues
|
||||||
|
Just v -> Map.insert (arity - 1) v qa.qaValues
|
||||||
|
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
|
||||||
|
|
||||||
|
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
|
||||||
|
encodeAtom tables atomIdx qa =
|
||||||
|
Aeson.object
|
||||||
|
[ "table" .= pathText qa.qaTable
|
||||||
|
, "columns" .= flattenAtom atomIdx arity qa
|
||||||
|
]
|
||||||
|
where
|
||||||
|
arity = case Map.lookup qa.qaTable tables of
|
||||||
|
Just t -> length t.columns
|
||||||
|
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
|
||||||
|
|
||||||
|
-- | Stable atom indexing keyed by atom identity, so the wildcard names in
|
||||||
|
-- @flattenAtom@ are deterministic across runs even if the planner's node
|
||||||
|
-- ordering changes.
|
||||||
|
atomIndex :: [QAtom] -> Map QAtom Int
|
||||||
|
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
|
||||||
|
|
||||||
|
encodeJoinOp :: JoinType -> Aeson.Value
|
||||||
|
encodeJoinOp = \case
|
||||||
|
LeftJoin -> "left"
|
||||||
|
RightJoin -> "right"
|
||||||
|
NaturalJoin -> "natural"
|
||||||
|
|
||||||
|
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
|
||||||
|
encodeNode tables idx n =
|
||||||
|
Aeson.object
|
||||||
|
[ "id" .= n.graphId.unPlanNodeId
|
||||||
|
, "action" .= case n.action of
|
||||||
|
PlanEvalAtom qa ->
|
||||||
|
let i = Map.findWithDefault 0 qa idx
|
||||||
|
in Aeson.object ["scan" .= encodeAtom tables i qa]
|
||||||
|
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
|
||||||
|
Aeson.object
|
||||||
|
[ "join"
|
||||||
|
.= Aeson.object
|
||||||
|
[ "op" .= encodeJoinOp jt
|
||||||
|
, "left" .= a
|
||||||
|
, "right" .= b
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
-- | Render a 'PlanGraph' as the JSON the runner consumes. Empty graphs
|
||||||
|
-- produce @{"root": 0, "nodes": []}@, which the runner treats as a
|
||||||
|
-- well-formed but empty query.
|
||||||
|
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
|
||||||
|
encodeQuery tables idx (PlanGraph g)
|
||||||
|
| null nodes =
|
||||||
|
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
|
||||||
|
| otherwise =
|
||||||
|
Aeson.object
|
||||||
|
[ "root" .= rootId
|
||||||
|
, "nodes" .= map (encodeNode tables idx) nodes
|
||||||
|
]
|
||||||
|
where
|
||||||
|
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
|
||||||
|
rootId = case graphRoot (PlanGraph g) of
|
||||||
|
Just (PlanNodeId i) -> i
|
||||||
|
-- Non-empty graph with no topological root means a cycle, which
|
||||||
|
-- planConjunction never produces. Fall back to the last id rather
|
||||||
|
-- than crashing so a bug here is still inspectable.
|
||||||
|
Nothing -> (.graphId.unPlanNodeId) (last nodes)
|
||||||
|
|
||||||
|
encodePlan :: Scenario -> Aeson.Value
|
||||||
|
encodePlan sc =
|
||||||
|
Aeson.object
|
||||||
|
[ "_scenario" .= sc.scName
|
||||||
|
, "schema" .= Aeson.object
|
||||||
|
[pathKey p .= length t.columns | (p, t) <- Map.toList sc.scTheory.tables]
|
||||||
|
, "facts" .= Aeson.object
|
||||||
|
[pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts]
|
||||||
|
, "query" .= encodeQuery sc.scTheory.tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
|
||||||
|
]
|
||||||
|
|
||||||
|
-- | Group facts by table while preserving table-first-seen order and
|
||||||
|
-- per-table insertion order.
|
||||||
|
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
|
||||||
|
groupedFacts = go []
|
||||||
|
where
|
||||||
|
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
|
||||||
|
go acc ((p, row) : rest) =
|
||||||
|
let acc' = case break (\(q, _) -> q == p) acc of
|
||||||
|
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
|
||||||
|
(before, []) -> before ++ [(p, [row])]
|
||||||
|
in go acc' rest
|
||||||
|
|
||||||
|
-- * Self-check
|
||||||
|
--
|
||||||
|
-- Run the planner's @evalConjunctionPlanned@ against the scenario's DB
|
||||||
|
-- to confirm the plan we're about to emit is well-formed and produces
|
||||||
|
-- non-error output. Catches malformed scenarios before they hand a bad
|
||||||
|
-- plan to the Rust runner.
|
||||||
|
|
||||||
|
selfCheck :: Scenario -> IO ()
|
||||||
|
selfCheck sc = do
|
||||||
|
let db = foldl (\d (p, row) -> insertRow p row d) (fromTheory sc.scTheory) sc.scFacts
|
||||||
|
case evalConjunctionPlanned db sc.scAtoms of
|
||||||
|
Left err -> die ("self-check failed for " <> sc.scName <> ": " <> show err)
|
||||||
|
Right _ -> pure ()
|
||||||
|
|
||||||
|
-- * Entry point
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
args <- getArgs
|
||||||
|
case args of
|
||||||
|
[name] -> case lookup name [(s.scName, s) | s <- scenarios] of
|
||||||
|
Just sc -> do
|
||||||
|
selfCheck sc
|
||||||
|
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
|
||||||
|
Nothing ->
|
||||||
|
die ("unknown scenario: " <> name <> "\navailable: " <> unwords (map (.scName) scenarios))
|
||||||
|
_ -> do
|
||||||
|
hPutStrLn stderr "usage: glog-export <scenario>"
|
||||||
|
hPutStrLn stderr ("scenarios: " <> unwords (map (.scName) scenarios))
|
||||||
|
die ""
|
||||||
Loading…
x
Reference in New Issue
Block a user