Add the infrastructure to run the plan on Rust
This commit is contained in:
parent
693151b89e
commit
510662e7c9
1
.gitignore
vendored
1
.gitignore
vendored
@ -81,3 +81,4 @@ tarpaulin-report.html
|
||||
.claude/
|
||||
.codex
|
||||
.agents/
|
||||
dist-newstyle/
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -5,4 +5,4 @@
|
||||
[submodule "external/geolog"]
|
||||
path = external/geolog
|
||||
url = gitlab@git.sgai.uk:creators/geolog.git
|
||||
branch = query-plan-ir-draft-1
|
||||
branch = query-plan-algebraic
|
||||
|
||||
17
Makefile
17
Makefile
@ -76,6 +76,23 @@ clean: ## Remove build output
|
||||
cargo clean; \
|
||||
fi
|
||||
|
||||
EXPORTER_DIR := tools/exporter
|
||||
EXPORTER_FIXTURES := crates/glog-runner/fixtures
|
||||
EXPORTER_SCENARIOS := three-atom-chain
|
||||
|
||||
.PHONY: export-fixtures
|
||||
export-fixtures: ## Regenerate JSON plan fixtures from the Haskell exporter (needs Cabal and GHC; use `make shell` first).
|
||||
@if ! command -v cabal >/dev/null 2>&1; then \
|
||||
echo "cabal not found. Enter the dev shell with 'make shell' (or 'nix develop') first."; \
|
||||
exit 1; \
|
||||
fi
|
||||
@cd $(EXPORTER_DIR) && cabal build glog-export
|
||||
@for sc in $(EXPORTER_SCENARIOS); do \
|
||||
out=$(EXPORTER_FIXTURES)/$$(echo $$sc | tr '-' '_').json; \
|
||||
echo "exporting $$sc -> $$out"; \
|
||||
(cd $(EXPORTER_DIR) && cabal run -v0 glog-export -- $$sc) > $$out; \
|
||||
done
|
||||
|
||||
.PHONY: shell
|
||||
shell: ## Enter the Nix dev shell defined in flake.nix
|
||||
@nix develop
|
||||
|
||||
19
crates/glog-runner/Cargo.toml
Normal file
19
crates/glog-runner/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "glog-runner"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
storage = { path = "../storage" }
|
||||
query-ops = { path = "../query-ops" }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
|
||||
[[bin]]
|
||||
name = "glog-run"
|
||||
path = "src/main.rs"
|
||||
166
crates/glog-runner/fixtures/three_atom_chain.json
Normal file
166
crates/glog-runner/fixtures/three_atom_chain.json
Normal file
@ -0,0 +1,166 @@
|
||||
{
|
||||
"_scenario": "three-atom-chain",
|
||||
"facts": {
|
||||
"edge": [
|
||||
[
|
||||
{
|
||||
"str": "node:1"
|
||||
},
|
||||
{
|
||||
"str": "node:2"
|
||||
},
|
||||
{
|
||||
"str": "edge:1"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"str": "node:2"
|
||||
},
|
||||
{
|
||||
"str": "node:3"
|
||||
},
|
||||
{
|
||||
"str": "edge:2"
|
||||
}
|
||||
]
|
||||
],
|
||||
"node": [
|
||||
[
|
||||
{
|
||||
"str": "node:1"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"str": "node:2"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"str": "node:3"
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"query": {
|
||||
"nodes": [
|
||||
{
|
||||
"action": {
|
||||
"scan": {
|
||||
"columns": [
|
||||
{
|
||||
"var": "a"
|
||||
},
|
||||
{
|
||||
"var": "b"
|
||||
},
|
||||
{
|
||||
"var": "_w0_2"
|
||||
}
|
||||
],
|
||||
"table": "edge"
|
||||
}
|
||||
},
|
||||
"id": 1
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"scan": {
|
||||
"columns": [
|
||||
{
|
||||
"var": "b"
|
||||
},
|
||||
{
|
||||
"var": "c"
|
||||
},
|
||||
{
|
||||
"var": "_w1_2"
|
||||
}
|
||||
],
|
||||
"table": "edge"
|
||||
}
|
||||
},
|
||||
"id": 2
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"scan": {
|
||||
"columns": [
|
||||
{
|
||||
"var": "a"
|
||||
}
|
||||
],
|
||||
"table": "node"
|
||||
}
|
||||
},
|
||||
"id": 3
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 1,
|
||||
"op": "left",
|
||||
"right": 3
|
||||
}
|
||||
},
|
||||
"id": 4
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 2,
|
||||
"op": "left",
|
||||
"right": 4
|
||||
}
|
||||
},
|
||||
"id": 5
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 5,
|
||||
"op": "right",
|
||||
"right": 4
|
||||
}
|
||||
},
|
||||
"id": 6
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 6,
|
||||
"op": "right",
|
||||
"right": 3
|
||||
}
|
||||
},
|
||||
"id": 7
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 6,
|
||||
"op": "natural",
|
||||
"right": 7
|
||||
}
|
||||
},
|
||||
"id": 8
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"join": {
|
||||
"left": 5,
|
||||
"op": "natural",
|
||||
"right": 8
|
||||
}
|
||||
},
|
||||
"id": 9
|
||||
}
|
||||
],
|
||||
"root": 9
|
||||
},
|
||||
"schema": {
|
||||
"edge": 3,
|
||||
"node": 1
|
||||
}
|
||||
}
|
||||
344
crates/glog-runner/src/lib.rs
Normal file
344
crates/glog-runner/src/lib.rs
Normal file
@ -0,0 +1,344 @@
|
||||
//! End-to-end runner that executes a `geolog-lang` conjunctive-query plan
|
||||
//! against this workspace's storage and `query-ops` operators.
|
||||
//!
|
||||
//! The upstream Haskell planner in `external/geolog/geolog-lang`
|
||||
//! (`Geolog.DB.Plan`) builds a Yannakakis-style join DAG over `QAtom`s. This
|
||||
//! crate accepts that DAG as JSON, materializes the input relations through
|
||||
//! the [`Storage`] trait, and walks the DAG using
|
||||
//! [`query_ops::atom::scan_atom`], [`query_ops::join::semijoin`], and
|
||||
//! [`query_ops::join::natural_join`]. The result is a binding
|
||||
//! [`Relation`](query_ops::relation::Relation) over the query's variables.
|
||||
//!
|
||||
//! The JSON IR mirrors `Geolog.DB.Plan.JoinPlan` and `Geolog.DB.InMemory.QAtom`
|
||||
//! without depending on the Haskell side at build time. A Haskell exporter
|
||||
//! that dumps `(schema, facts, JoinPlan)` to this shape is the planned
|
||||
//! follow-up that completes the round trip; the IR is the contract.
|
||||
//!
|
||||
//! Mapping from the Haskell planner:
|
||||
//!
|
||||
//! | `Geolog.DB.Plan` | this crate |
|
||||
//! |-----------------------------|-----------------------------------------------|
|
||||
//! | `PlanEvalAtom` | [`Action::Scan`] → `scan_atom` |
|
||||
//! | `PlanJoin LeftJoin a b` | [`Action::Join`] with [`JoinOp::Left`] → `semijoin(rel[a], rel[b])` |
|
||||
//! | `PlanJoin RightJoin a b` | [`Action::Join`] with [`JoinOp::Right`] → `semijoin(rel[b], rel[a])` |
|
||||
//! | `PlanJoin NaturalJoin a b` | [`Action::Join`] with [`JoinOp::Natural`] → `natural_join(rel[a], rel[b])` |
|
||||
//!
|
||||
//! The atom side covers `evalAtom` (`Geolog.DB.InMemory`): a [`Term::Var`]
|
||||
//! repeated across positions enforces equality, [`Term::Lit`] filters by
|
||||
//! constant, and distinct variables project in first-occurrence order.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
use query_ops::atom::{AtomPattern, Term, scan_atom};
|
||||
use query_ops::join::{natural_join, semijoin};
|
||||
use query_ops::relation::Relation;
|
||||
use storage::value::Value;
|
||||
use storage::{MemoryStorage, Storage, StorageError, scan_as_table};
|
||||
|
||||
/// A single fixture: schema, ground facts, and a query plan to execute.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Plan {
|
||||
/// Relation name → arity (column count).
|
||||
pub schema: HashMap<String, usize>,
|
||||
/// Relation name → list of ground tuples to insert before execution.
|
||||
pub facts: HashMap<String, Vec<Vec<JsonValue>>>,
|
||||
/// The join DAG itself.
|
||||
pub query: Query,
|
||||
}
|
||||
|
||||
/// Mirrors `Geolog.DB.Plan.JoinPlan`: a set of nodes plus the id of the
|
||||
/// rooted result node.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Query {
|
||||
pub root: u32,
|
||||
pub nodes: Vec<Node>,
|
||||
}
|
||||
|
||||
/// One node of the plan DAG. `id`s are dense within a `Query` but don't need
|
||||
/// to start at any particular value, mirroring the Haskell `PlanNodeId`.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Node {
|
||||
pub id: u32,
|
||||
pub action: Action,
|
||||
}
|
||||
|
||||
/// What to compute at a node. Tagged externally so JSON reads as
|
||||
/// `{"action": {"scan": {...}}}` or `{"action": {"join": {...}}}`.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Action {
|
||||
Scan(Atom),
|
||||
Join(Join),
|
||||
}
|
||||
|
||||
/// A flat atom pattern, one entry per column of the target relation.
|
||||
/// Matches the `toFlatArgs` view used by `Geolog.DB.InMemory.evalAtom`:
|
||||
/// `qaValues` positions are filled in directly, and the entity-id column
|
||||
/// (if any) appears at the last position. Wildcard positions in the
|
||||
/// Haskell `QAtom` (a `Map Int QVal` with a missing key) translate to a
|
||||
/// fresh, unique variable name on this side, which the operator binds but
|
||||
/// never joins against.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Atom {
|
||||
pub table: String,
|
||||
pub columns: Vec<JsonTerm>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JsonTerm {
|
||||
Var(String),
|
||||
Lit(JsonValue),
|
||||
}
|
||||
|
||||
/// Wire-level value tag. Restricted to what `storage::value::Value` carries.
|
||||
/// Entity identities from the Haskell side (`ValEntity path id`) round-trip
|
||||
/// through `Str` for now using a `"path:id"` convention; that's a fixture
|
||||
/// concern, not a runner concern.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JsonValue {
|
||||
Int(i64),
|
||||
Str(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Join {
|
||||
pub op: JoinOp,
|
||||
pub left: u32,
|
||||
pub right: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JoinOp {
|
||||
/// `Geolog.DB.Plan.LeftJoin`: result is `left` rows whose shared columns
|
||||
/// appear in `right`. Lowered to `semijoin(left, right)`.
|
||||
Left,
|
||||
/// `Geolog.DB.Plan.RightJoin`: result is `right` rows whose shared
|
||||
/// columns appear in `left`. Lowered to `semijoin(right, left)`.
|
||||
Right,
|
||||
/// `Geolog.DB.Plan.NaturalJoin`. Lowered to `natural_join(left, right)`.
|
||||
Natural,
|
||||
}
|
||||
|
||||
/// Errors a runner can produce in addition to storage failures.
|
||||
#[derive(Debug)]
|
||||
pub enum RunError {
|
||||
/// A fact references a relation that isn't declared in `schema`.
|
||||
UnknownRelation(String),
|
||||
/// A node id appears in a `Join` action but no node with that id exists.
|
||||
MissingNode(u32),
|
||||
/// `Query.root` doesn't match any node in `nodes`.
|
||||
MissingRoot(u32),
|
||||
/// Two nodes share the same id.
|
||||
DuplicateNode(u32),
|
||||
/// A join node references its left or right side before that side has
|
||||
/// been computed: the DAG isn't actually topologically sorted by id, or
|
||||
/// it has a cycle.
|
||||
UnresolvedDependency { node: u32, depends_on: u32 },
|
||||
/// Storage layer rejected an operation.
|
||||
Storage(StorageError),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RunError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnknownRelation(name) => {
|
||||
write!(f, "facts reference relation {name:?} not in schema")
|
||||
}
|
||||
Self::MissingNode(id) => write!(f, "plan references missing node id {id}"),
|
||||
Self::MissingRoot(id) => write!(f, "plan root id {id} matches no node"),
|
||||
Self::DuplicateNode(id) => write!(f, "duplicate node id {id} in plan"),
|
||||
Self::UnresolvedDependency { node, depends_on } => write!(
|
||||
f,
|
||||
"node {node} depends on {depends_on}, which has not been computed yet"
|
||||
),
|
||||
Self::Storage(err) => write!(f, "storage error: {err}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for RunError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::Storage(err) => Some(err),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StorageError> for RunError {
|
||||
fn from(err: StorageError) -> Self {
|
||||
Self::Storage(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonValue> for Value {
|
||||
fn from(jv: JsonValue) -> Self {
|
||||
match jv {
|
||||
JsonValue::Int(n) => Self::Int(n),
|
||||
JsonValue::Str(s) => Self::Str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonTerm> for Term {
|
||||
fn from(t: JsonTerm) -> Self {
|
||||
match t {
|
||||
JsonTerm::Var(name) => Self::Var(name),
|
||||
JsonTerm::Lit(value) => Self::Lit(value.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a [`Plan`] from a JSON string.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns a [`serde_json::Error`] if the input isn't valid JSON in the
|
||||
/// expected shape.
|
||||
pub fn parse_plan(json: &str) -> Result<Plan, serde_json::Error> {
|
||||
serde_json::from_str(json)
|
||||
}
|
||||
|
||||
/// Load schema and facts from a [`Plan`] into a fresh [`MemoryStorage`].
|
||||
///
|
||||
/// All facts are inserted in a single transaction; commit is atomic so a
|
||||
/// failure on row N leaves the storage empty.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`RunError::UnknownRelation`] if facts mention a relation not
|
||||
/// declared in `schema`. Wraps storage failures (arity mismatch, transaction
|
||||
/// errors) in [`RunError::Storage`].
|
||||
pub fn load_into_memory(plan: &Plan) -> Result<MemoryStorage, RunError> {
|
||||
let mut storage = MemoryStorage::default();
|
||||
for (name, arity) in &plan.schema {
|
||||
storage.create_relation(name, *arity)?;
|
||||
}
|
||||
{
|
||||
let mut tx = storage.transaction()?;
|
||||
for (name, rows) in &plan.facts {
|
||||
if !plan.schema.contains_key(name) {
|
||||
return Err(RunError::UnknownRelation(name.clone()));
|
||||
}
|
||||
for row in rows {
|
||||
let cells: Vec<Value> = row.iter().cloned().map(Value::from).collect();
|
||||
tx.insert(name, cells)?;
|
||||
}
|
||||
}
|
||||
let _ = tx.commit()?;
|
||||
}
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
/// Execute a plan against a storage backend, returning the bindings
|
||||
/// [`Relation`] for the rooted plan node.
|
||||
///
|
||||
/// Nodes are executed in ascending `id` order. For a Yannakakis plan as
|
||||
/// emitted by `Geolog.DB.Plan` this is equivalent to a topological sort,
|
||||
/// since `insertJoin` only references node ids that have already been
|
||||
/// allocated. A non-monotone id ordering is rejected with
|
||||
/// [`RunError::UnresolvedDependency`].
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`RunError::DuplicateNode`] for repeated ids,
|
||||
/// [`RunError::MissingNode`] for join references to unknown ids,
|
||||
/// [`RunError::MissingRoot`] if `query.root` isn't present, and storage
|
||||
/// errors during the per-scan `scan_as_table` call.
|
||||
pub fn execute<S: Storage>(storage: &S, query: &Query) -> Result<Relation, RunError> {
|
||||
let mut seen_ids: std::collections::HashSet<u32> =
|
||||
std::collections::HashSet::with_capacity(query.nodes.len());
|
||||
for node in &query.nodes {
|
||||
if !seen_ids.insert(node.id) {
|
||||
return Err(RunError::DuplicateNode(node.id));
|
||||
}
|
||||
}
|
||||
if !seen_ids.contains(&query.root) {
|
||||
return Err(RunError::MissingRoot(query.root));
|
||||
}
|
||||
|
||||
let mut ordered: Vec<&Node> = query.nodes.iter().collect();
|
||||
ordered.sort_by_key(|n| n.id);
|
||||
|
||||
let mut results: HashMap<u32, Relation> = HashMap::with_capacity(ordered.len());
|
||||
for node in ordered {
|
||||
let computed = match &node.action {
|
||||
Action::Scan(atom) => {
|
||||
let table = scan_as_table(storage, &atom.table)?;
|
||||
let pattern = AtomPattern {
|
||||
columns: atom.columns.iter().cloned().map(Term::from).collect(),
|
||||
};
|
||||
scan_atom(&table, &pattern)
|
||||
}
|
||||
Action::Join(join) => {
|
||||
let left = require_dep(&results, &seen_ids, node.id, join.left)?;
|
||||
let right = require_dep(&results, &seen_ids, node.id, join.right)?;
|
||||
match join.op {
|
||||
JoinOp::Left => semijoin(left, right),
|
||||
JoinOp::Right => semijoin(right, left),
|
||||
JoinOp::Natural => natural_join(left, right),
|
||||
}
|
||||
}
|
||||
};
|
||||
results.insert(node.id, computed);
|
||||
}
|
||||
|
||||
results
|
||||
.remove(&query.root)
|
||||
.ok_or(RunError::MissingRoot(query.root))
|
||||
}
|
||||
|
||||
fn require_dep<'a>(
|
||||
results: &'a HashMap<u32, Relation>,
|
||||
seen: &std::collections::HashSet<u32>,
|
||||
node: u32,
|
||||
depends_on: u32,
|
||||
) -> Result<&'a Relation, RunError> {
|
||||
if let Some(rel) = results.get(&depends_on) {
|
||||
Ok(rel)
|
||||
} else if seen.contains(&depends_on) {
|
||||
Err(RunError::UnresolvedDependency { node, depends_on })
|
||||
} else {
|
||||
Err(RunError::MissingNode(depends_on))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience: parse JSON, load it into a fresh in-memory storage, and
|
||||
/// execute, returning the root binding relation.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns a JSON parse error if the input is malformed, or a [`RunError`]
|
||||
/// for any later step.
|
||||
pub fn run_json(json: &str) -> Result<Relation, RunFromJsonError> {
|
||||
let plan = parse_plan(json).map_err(RunFromJsonError::Parse)?;
|
||||
let storage = load_into_memory(&plan).map_err(RunFromJsonError::Run)?;
|
||||
let bindings = execute(&storage, &plan.query).map_err(RunFromJsonError::Run)?;
|
||||
Ok(bindings)
|
||||
}
|
||||
|
||||
/// Combined error from [`run_json`].
|
||||
#[derive(Debug)]
|
||||
pub enum RunFromJsonError {
|
||||
Parse(serde_json::Error),
|
||||
Run(RunError),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RunFromJsonError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Parse(err) => write!(f, "parse error: {err}"),
|
||||
Self::Run(err) => write!(f, "run error: {err}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for RunFromJsonError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::Parse(err) => Some(err),
|
||||
Self::Run(err) => Some(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
59
crates/glog-runner/src/main.rs
Normal file
59
crates/glog-runner/src/main.rs
Normal file
@ -0,0 +1,59 @@
|
||||
//! `glog-run` CLI: read a JSON plan from a file (or stdin if `-`), execute
|
||||
//! it against a fresh in-memory store, and print the resulting binding
|
||||
//! relation as JSON on stdout.
|
||||
|
||||
use std::io::{self, Read};
|
||||
use std::process::ExitCode;
|
||||
|
||||
fn main() -> ExitCode {
|
||||
let mut args = std::env::args().skip(1);
|
||||
let Some(path) = args.next() else {
|
||||
eprintln!("usage: glog-run <plan.json | ->");
|
||||
return ExitCode::from(2);
|
||||
};
|
||||
|
||||
let input = match read_input(&path) {
|
||||
Ok(s) => s,
|
||||
Err(err) => {
|
||||
eprintln!("failed to read {path}: {err}");
|
||||
return ExitCode::from(1);
|
||||
}
|
||||
};
|
||||
|
||||
let relation = match glog_runner::run_json(&input) {
|
||||
Ok(r) => r,
|
||||
Err(err) => {
|
||||
eprintln!("{err}");
|
||||
return ExitCode::from(1);
|
||||
}
|
||||
};
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"columns": relation.columns,
|
||||
"rows": relation
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| row.iter().map(value_to_json).collect::<Vec<_>>())
|
||||
.collect::<Vec<_>>(),
|
||||
});
|
||||
println!("{payload}");
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
|
||||
fn read_input(path: &str) -> io::Result<String> {
|
||||
if path == "-" {
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_to_string(&mut buf)?;
|
||||
Ok(buf)
|
||||
} else {
|
||||
std::fs::read_to_string(path)
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_json(value: &storage::value::Value) -> serde_json::Value {
|
||||
match value {
|
||||
storage::value::Value::Int(n) => serde_json::Value::Number((*n).into()),
|
||||
storage::value::Value::Str(s) => serde_json::Value::String(s.clone()),
|
||||
storage::value::Value::Id(id) => serde_json::Value::String(id.to_string()),
|
||||
}
|
||||
}
|
||||
73
crates/glog-runner/tests/three_atom_chain.rs
Normal file
73
crates/glog-runner/tests/three_atom_chain.rs
Normal file
@ -0,0 +1,73 @@
|
||||
//! End-to-end check: run the JSON fixture and verify the resulting bindings
|
||||
//! match the `DB.InMemoryTest` "matches evalConjunction on three-atom chain"
|
||||
//! case from `external/geolog/geolog-lang/test/DB/InMemoryTest.hs`.
|
||||
//!
|
||||
//! For `node = {e1, e2, e3}` and `edge = {(e1,e2,ee1), (e2,e3,ee2)}` the
|
||||
//! conjunction `node(a), edge(a, b, _), edge(b, c, _)` has exactly one
|
||||
//! solution: `(a=e1, b=e2, c=e3)`.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use glog_runner::run_json;
|
||||
use storage::value::Value;
|
||||
|
||||
fn fixture() -> &'static str {
|
||||
include_str!("../fixtures/three_atom_chain.json")
|
||||
}
|
||||
|
||||
fn ent(path: &str, id: u32) -> Value {
|
||||
Value::Str(format!("{path}:{id}"))
|
||||
}
|
||||
|
||||
fn project<'a>(
|
||||
columns: &'a [String],
|
||||
row: &'a [Value],
|
||||
keep: &'a [&'a str],
|
||||
) -> BTreeMap<&'a str, &'a Value> {
|
||||
keep.iter()
|
||||
.map(|name| {
|
||||
let pos = columns
|
||||
.iter()
|
||||
.position(|c| c == name)
|
||||
.expect("column missing");
|
||||
(*name, &row[pos])
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three_atom_chain_matches_haskell_oracle() {
|
||||
let result = run_json(fixture()).expect("fixture should execute");
|
||||
|
||||
// The plan's root keeps every variable, including the per-atom wildcards
|
||||
// `_r1` and `_r2`. The oracle only asserts the (a, b, c) projection.
|
||||
let keep = ["a", "b", "c"];
|
||||
let mut projected: Vec<BTreeMap<&str, &Value>> = result
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| project(&result.columns, row, &keep))
|
||||
.collect();
|
||||
projected.sort_by_key(|m| format!("{m:?}"));
|
||||
|
||||
let e1 = ent("node", 1);
|
||||
let e2 = ent("node", 2);
|
||||
let e3 = ent("node", 3);
|
||||
let expected = vec![BTreeMap::from([("a", &e1), ("b", &e2), ("c", &e3)])];
|
||||
|
||||
assert_eq!(projected, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_columns_cover_a_b_c_plus_two_wildcards() {
|
||||
// The exporter emits unique wildcard variable names for the entity-id
|
||||
// column of each edge atom (e.g. `_w0_2`, `_w1_2`); their exact spelling
|
||||
// is an implementation detail of the exporter, so this test only checks
|
||||
// that the named variables are all present and that the total column
|
||||
// count is the three named ones plus two anonymous wildcards.
|
||||
let result = run_json(fixture()).expect("fixture should execute");
|
||||
let cols: std::collections::HashSet<&str> = result.columns.iter().map(String::as_str).collect();
|
||||
for expected in ["a", "b", "c"] {
|
||||
assert!(cols.contains(expected), "missing column {expected}");
|
||||
}
|
||||
assert_eq!(result.columns.len(), 5, "expected 3 named + 2 wildcards");
|
||||
}
|
||||
2
external/geolog
vendored
2
external/geolog
vendored
@ -1 +1 @@
|
||||
Subproject commit 99d4006f4655d8a6815a9156fe4d9304515f356d
|
||||
Subproject commit 426d4c96d6031ccaf5e14c12c3dab496e3b4c365
|
||||
21
flake.nix
21
flake.nix
@ -1,5 +1,5 @@
|
||||
{
|
||||
description = "Storage engine playground: Rust workspace for FlowLog, DBSP, CRDT, and Geomerge experiments.";
|
||||
description = "Storage engine playground";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
@ -29,12 +29,13 @@
|
||||
|
||||
packages = [
|
||||
rustToolchain
|
||||
# Diagram regeneration in crates/geomerge-demo/docs/diagrams.
|
||||
pkgs.graphviz
|
||||
# Cargo helpers.
|
||||
pkgs.cargo-watch
|
||||
pkgs.cargo-nextest
|
||||
# Pre-commit hooks (see .pre-commit-config.yaml, Makefile setup-hooks).
|
||||
pkgs.haskell.compiler.ghc912
|
||||
pkgs.cabal-install
|
||||
pkgs.pkg-config
|
||||
pkgs.zlib
|
||||
pkgs.pre-commit
|
||||
pkgs.python3
|
||||
];
|
||||
@ -44,10 +45,14 @@
|
||||
};
|
||||
|
||||
shellHook = ''
|
||||
echo "storage-engine-playground dev shell"
|
||||
echo " rustc: $(rustc --version)"
|
||||
echo " cargo: $(cargo --version)"
|
||||
echo " dot: $(dot -V 2>&1)"
|
||||
# Banner goes to stderr so `nix develop --command` invocations
|
||||
# that pipe stdout (e.g. tools/exporter producing JSON) stay clean.
|
||||
>&2 echo "storage-engine-playground dev shell"
|
||||
>&2 echo " rustc: $(rustc --version)"
|
||||
>&2 echo " cargo: $(cargo --version)"
|
||||
>&2 echo " ghc: $(ghc --version)"
|
||||
>&2 echo " cabal: $(cabal --version | head -1)"
|
||||
>&2 echo " dot: $(dot -V 2>&1)"
|
||||
'';
|
||||
};
|
||||
|
||||
|
||||
24
tools/exporter/cabal.project
Normal file
24
tools/exporter/cabal.project
Normal file
@ -0,0 +1,24 @@
|
||||
-- cabal.project for the geolog -> Rust JSON exporter.
|
||||
--
|
||||
-- This file points at the geolog-lang library inside the external/geolog
|
||||
-- submodule, plus the sibling packages it depends on (data-partition,
|
||||
-- diagnostician, fnotation). It mirrors the submodule's own cabal.project
|
||||
-- so the exporter sees the same source set the submodule's tests build
|
||||
-- against.
|
||||
|
||||
packages:
|
||||
glog-exporter.cabal
|
||||
../../external/geolog/geolog-lang/geolog-lang.cabal
|
||||
../../external/geolog/data-partition/data-partition.cabal
|
||||
../../external/geolog/diagnostician/diagnostician.cabal
|
||||
../../external/geolog/fnotation/fnotation.cabal
|
||||
|
||||
-- geolog-lang's DB.Plan.Render module uses a patched diagrams-graphviz.
|
||||
-- Same pin as external/geolog/cabal.project.
|
||||
source-repository-package
|
||||
type: git
|
||||
location: https://github.com/georgefst/diagrams-graphviz.git
|
||||
tag: 993533c564861f9d0663d719eafd56efd95f59ba
|
||||
|
||||
jobs: $ncpus
|
||||
semaphore: true
|
||||
36
tools/exporter/glog-exporter.cabal
Normal file
36
tools/exporter/glog-exporter.cabal
Normal file
@ -0,0 +1,36 @@
|
||||
cabal-version: 3.4
|
||||
name: glog-exporter
|
||||
version: 0.1.0.0
|
||||
license: MIT OR Apache-2.0
|
||||
author: storage-engine-playground
|
||||
synopsis: Export geolog-lang join plans as JSON for the Rust runner.
|
||||
description:
|
||||
Builds a FlatTheory + facts + a list of QAtoms for a named scenario,
|
||||
runs Geolog.DB.Plan.planConjunction, and emits a JSON document that
|
||||
crates/glog-runner consumes. This allows the playground use query-ops and
|
||||
storage end-to-end with a real Yannakakis plan produced by the geolog
|
||||
frontend, not a hand-written fixture.
|
||||
|
||||
build-type: Simple
|
||||
|
||||
executable glog-export
|
||||
main-is: Main.hs
|
||||
hs-source-dirs: src
|
||||
default-language: GHC2024
|
||||
default-extensions:
|
||||
BlockArguments
|
||||
LambdaCase
|
||||
OverloadedRecordDot
|
||||
OverloadedStrings
|
||||
|
||||
ghc-options: -Wall
|
||||
|
||||
build-depends:
|
||||
, aeson >=2.2
|
||||
, aeson-pretty >=0.8
|
||||
, algebraic-graphs >=0.7
|
||||
, base
|
||||
, bytestring
|
||||
, containers
|
||||
, geolog-lang
|
||||
, text
|
||||
252
tools/exporter/src/Main.hs
Normal file
252
tools/exporter/src/Main.hs
Normal file
@ -0,0 +1,252 @@
|
||||
-- | Exports a geolog-lang join plan as JSON for the Rust runner in
|
||||
-- @crates/glog-runner@.
|
||||
--
|
||||
-- Invocation:
|
||||
--
|
||||
-- @
|
||||
-- cabal run glog-export -- <scenario> > plan.json
|
||||
-- @
|
||||
--
|
||||
-- Available scenarios: @three-atom-chain@.
|
||||
--
|
||||
-- The output shape is documented in @crates\/glog-runner\/src\/lib.rs@.
|
||||
-- This program is the canonical producer: any change to the IR should
|
||||
-- start here, with the Rust runner updated to match.
|
||||
module Main (main) where
|
||||
|
||||
import Algebra.Graph qualified as AG
|
||||
import Data.Aeson ((.=))
|
||||
import Data.Aeson qualified as Aeson
|
||||
import Data.Aeson.Encode.Pretty qualified as AesonPretty
|
||||
import Data.Aeson.Key qualified as Key
|
||||
import Data.ByteString.Lazy.Char8 qualified as LBS8
|
||||
import Data.List (sortOn)
|
||||
import Data.Map.Strict (Map)
|
||||
import Data.Map.Strict qualified as Map
|
||||
import Data.Set qualified as Set
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as T
|
||||
import Geolog.DB.InMemory
|
||||
import Geolog.DB.Plan
|
||||
import Geolog.IR qualified as IR
|
||||
import System.Environment (getArgs)
|
||||
import System.Exit (die)
|
||||
import System.IO (hPutStrLn, stderr)
|
||||
|
||||
-- * Scenario plumbing
|
||||
--
|
||||
-- A scenario fixes a schema, a set of ground facts, and a conjunction of
|
||||
-- query atoms. The exporter is intentionally code-driven (not @.glog@
|
||||
-- driven): @.glog@ files declare theories, not queries, so the query
|
||||
-- side has to live in Haskell either way.
|
||||
|
||||
data Scenario = Scenario
|
||||
{ scName :: String
|
||||
, scTheory :: IR.FlatTheory
|
||||
, scFacts :: [(IR.Path, [Val])]
|
||||
, scAtoms :: [QAtom]
|
||||
}
|
||||
|
||||
-- * three-atom-chain
|
||||
--
|
||||
-- Mirrors @DB.InMemoryTest@ "matches evalConjunction on three-atom chain".
|
||||
-- node = {e1, e2, e3}, edge = {(e1,e2,ee1), (e2,e3,ee2)}.
|
||||
-- Conjunction: node(a), edge(a, b, _), edge(b, c, _).
|
||||
|
||||
nodePath, edgePath :: IR.Path
|
||||
nodePath = ["node"]
|
||||
edgePath = ["edge"]
|
||||
|
||||
threeAtomChain :: Scenario
|
||||
threeAtomChain =
|
||||
Scenario
|
||||
{ scName = "three-atom-chain"
|
||||
, scTheory =
|
||||
IR.FlatTheory
|
||||
{ tables =
|
||||
Map.fromList
|
||||
[ (nodePath, IR.Table {columns = [IR.EntityType nodePath], primaryKey = Nothing})
|
||||
, (edgePath, IR.Table {columns = [IR.EntityType nodePath, IR.EntityType nodePath, IR.EntityType edgePath], primaryKey = Nothing})
|
||||
]
|
||||
, laws = Map.empty
|
||||
}
|
||||
, scFacts =
|
||||
[ (nodePath, [ValEntity nodePath 1])
|
||||
, (nodePath, [ValEntity nodePath 2])
|
||||
, (nodePath, [ValEntity nodePath 3])
|
||||
, (edgePath, [ValEntity nodePath 1, ValEntity nodePath 2, ValEntity edgePath 1])
|
||||
, (edgePath, [ValEntity nodePath 2, ValEntity nodePath 3, ValEntity edgePath 2])
|
||||
]
|
||||
, scAtoms =
|
||||
[ QAtom {qaTable = nodePath, qaRowId = Nothing, qaValues = Map.singleton 0 (QVar (Var "a"))}
|
||||
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "a")), (1, QVar (Var "b"))]}
|
||||
, QAtom {qaTable = edgePath, qaRowId = Nothing, qaValues = Map.fromList [(0, QVar (Var "b")), (1, QVar (Var "c"))]}
|
||||
]
|
||||
}
|
||||
|
||||
scenarios :: [Scenario]
|
||||
scenarios = [threeAtomChain]
|
||||
|
||||
-- * JSON encoding
|
||||
--
|
||||
-- The shape mirrors the IR in @crates/glog-runner/src/lib.rs@:
|
||||
--
|
||||
-- > {
|
||||
-- > "schema": {<name>: <arity>, ...},
|
||||
-- > "facts": {<name>: [[<value>, ...], ...], ...},
|
||||
-- > "query": {"root": <id>, "nodes": [{"id": <id>, "action": <action>}, ...]}
|
||||
-- > }
|
||||
|
||||
-- | Render a 'Geolog.IR.Path' (a list of 'FNotation.Names.Name') as a flat
|
||||
-- string for use as a relation name on the Rust side. Each 'Name' is
|
||||
-- already shown with @\/@ between its own init segments and last, so we
|
||||
-- reuse 'show' and join Names with @\/@ too.
|
||||
pathText :: IR.Path -> Text
|
||||
pathText = T.intercalate "/" . map (T.pack . show)
|
||||
|
||||
pathKey :: IR.Path -> Aeson.Key
|
||||
pathKey = Key.fromText . pathText
|
||||
|
||||
encodeValue :: Val -> Aeson.Value
|
||||
encodeValue =
|
||||
Aeson.object . pure . \case
|
||||
ValInt n -> "int" .= n
|
||||
ValText t -> "str" .= t
|
||||
ValEntity p n -> "str" .= (pathText p <> ":" <> T.pack (show n))
|
||||
|
||||
encodeTerm :: QVal -> Aeson.Value
|
||||
encodeTerm = \case
|
||||
QVar (Var name) -> Aeson.object ["var" .= name]
|
||||
QLit v -> Aeson.object ["lit" .= encodeValue v]
|
||||
|
||||
-- | Flatten an atom into one term per stored column, mirroring
|
||||
-- @Geolog.DB.InMemory.toFlatArgs@: @qaValues@ keys map to positions
|
||||
-- @0..n-2@, @qaRowId@ (if present) maps to position @n-1@, and any
|
||||
-- missing positions become wildcard variables with locally-unique names.
|
||||
flattenAtom :: Int -> Int -> QAtom -> [Aeson.Value]
|
||||
flattenAtom atomIdx arity qa =
|
||||
[ encodeTerm (Map.findWithDefault (wildcard atomIdx pos) pos merged)
|
||||
| pos <- [0 .. arity - 1]
|
||||
]
|
||||
where
|
||||
merged = case qa.qaRowId of
|
||||
Nothing -> qa.qaValues
|
||||
Just v -> Map.insert (arity - 1) v qa.qaValues
|
||||
wildcard a p = QVar (Var (T.pack ("_w" <> show a <> "_" <> show p)))
|
||||
|
||||
encodeAtom :: Map IR.Path IR.Table -> Int -> QAtom -> Aeson.Value
|
||||
encodeAtom tables atomIdx qa =
|
||||
Aeson.object
|
||||
[ "table" .= pathText qa.qaTable
|
||||
, "columns" .= flattenAtom atomIdx arity qa
|
||||
]
|
||||
where
|
||||
arity = case Map.lookup qa.qaTable tables of
|
||||
Just t -> length t.columns
|
||||
Nothing -> error ("encodeAtom: unknown table " <> show qa.qaTable)
|
||||
|
||||
-- | Stable atom indexing keyed by atom identity, so the wildcard names in
|
||||
-- @flattenAtom@ are deterministic across runs even if the planner's node
|
||||
-- ordering changes.
|
||||
atomIndex :: [QAtom] -> Map QAtom Int
|
||||
atomIndex atoms = Map.fromList (zip (Set.toList (Set.fromList atoms)) [0 ..])
|
||||
|
||||
encodeJoinOp :: JoinType -> Aeson.Value
|
||||
encodeJoinOp = \case
|
||||
LeftJoin -> "left"
|
||||
RightJoin -> "right"
|
||||
NaturalJoin -> "natural"
|
||||
|
||||
encodeNode :: Map IR.Path IR.Table -> Map QAtom Int -> PlanNode -> Aeson.Value
|
||||
encodeNode tables idx n =
|
||||
Aeson.object
|
||||
[ "id" .= n.graphId.unPlanNodeId
|
||||
, "action" .= case n.action of
|
||||
PlanEvalAtom qa ->
|
||||
let i = Map.findWithDefault 0 qa idx
|
||||
in Aeson.object ["scan" .= encodeAtom tables i qa]
|
||||
PlanJoin jt (PlanNodeId a) (PlanNodeId b) ->
|
||||
Aeson.object
|
||||
[ "join"
|
||||
.= Aeson.object
|
||||
[ "op" .= encodeJoinOp jt
|
||||
, "left" .= a
|
||||
, "right" .= b
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
-- | Render a 'PlanGraph' as the JSON the runner consumes. Empty graphs
|
||||
-- produce @{"root": 0, "nodes": []}@, which the runner treats as a
|
||||
-- well-formed but empty query.
|
||||
encodeQuery :: Map IR.Path IR.Table -> Map QAtom Int -> PlanGraph -> Aeson.Value
|
||||
encodeQuery tables idx (PlanGraph g)
|
||||
| null nodes =
|
||||
Aeson.object ["root" .= (0 :: Int), "nodes" .= ([] :: [Aeson.Value])]
|
||||
| otherwise =
|
||||
Aeson.object
|
||||
[ "root" .= rootId
|
||||
, "nodes" .= map (encodeNode tables idx) nodes
|
||||
]
|
||||
where
|
||||
nodes = sortOn (.graphId.unPlanNodeId) (AG.vertexList g)
|
||||
rootId = case graphRoot (PlanGraph g) of
|
||||
Just (PlanNodeId i) -> i
|
||||
-- Non-empty graph with no topological root means a cycle, which
|
||||
-- planConjunction never produces. Fall back to the last id rather
|
||||
-- than crashing so a bug here is still inspectable.
|
||||
Nothing -> (.graphId.unPlanNodeId) (last nodes)
|
||||
|
||||
encodePlan :: Scenario -> Aeson.Value
|
||||
encodePlan sc =
|
||||
Aeson.object
|
||||
[ "_scenario" .= sc.scName
|
||||
, "schema" .= Aeson.object
|
||||
[pathKey p .= length t.columns | (p, t) <- Map.toList sc.scTheory.tables]
|
||||
, "facts" .= Aeson.object
|
||||
[pathKey p .= map (map encodeValue) rows | (p, rows) <- groupedFacts sc.scFacts]
|
||||
, "query" .= encodeQuery sc.scTheory.tables (atomIndex sc.scAtoms) (planConjunction sc.scAtoms)
|
||||
]
|
||||
|
||||
-- | Group facts by table while preserving table-first-seen order and
|
||||
-- per-table insertion order.
|
||||
groupedFacts :: [(IR.Path, [Val])] -> [(IR.Path, [[Val]])]
|
||||
groupedFacts = go []
|
||||
where
|
||||
go acc [] = reverse [(p, reverse rs) | (p, rs) <- acc]
|
||||
go acc ((p, row) : rest) =
|
||||
let acc' = case break (\(q, _) -> q == p) acc of
|
||||
(before, (q, rs) : after) -> before ++ (q, row : rs) : after
|
||||
(before, []) -> before ++ [(p, [row])]
|
||||
in go acc' rest
|
||||
|
||||
-- * Self-check
|
||||
--
|
||||
-- Run the planner's @evalConjunctionPlanned@ against the scenario's DB
|
||||
-- to confirm the plan we're about to emit is well-formed and produces
|
||||
-- non-error output. Catches malformed scenarios before they hand a bad
|
||||
-- plan to the Rust runner.
|
||||
|
||||
selfCheck :: Scenario -> IO ()
|
||||
selfCheck sc = do
|
||||
let db = foldl (\d (p, row) -> insertRow p row d) (fromTheory sc.scTheory) sc.scFacts
|
||||
case evalConjunctionPlanned db sc.scAtoms of
|
||||
Left err -> die ("self-check failed for " <> sc.scName <> ": " <> show err)
|
||||
Right _ -> pure ()
|
||||
|
||||
-- * Entry point
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
args <- getArgs
|
||||
case args of
|
||||
[name] -> case lookup name [(s.scName, s) | s <- scenarios] of
|
||||
Just sc -> do
|
||||
selfCheck sc
|
||||
LBS8.putStrLn (AesonPretty.encodePretty (encodePlan sc))
|
||||
Nothing ->
|
||||
die ("unknown scenario: " <> name <> "\navailable: " <> unwords (map (.scName) scenarios))
|
||||
_ -> do
|
||||
hPutStrLn stderr "usage: glog-export <scenario>"
|
||||
hPutStrLn stderr ("scenarios: " <> unwords (map (.scName) scenarios))
|
||||
die ""
|
||||
Loading…
x
Reference in New Issue
Block a user