From 1c34368da6933d1bb1418c146e9a1bb0836c2429 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Mon, 1 Jun 2026 13:22:32 +0200 Subject: [PATCH] Improve scaffolding for `query-ops` crate --- crates/query-ops/src/atom.rs | 29 +++++++++++++++++++++++++ crates/query-ops/src/join.rs | 25 ++++++++++++++++++++++ crates/query-ops/src/lib.rs | 27 +++++++++++++++++------- crates/query-ops/src/relation.rs | 36 ++++++++++++++++++++++++++++++++ crates/query-ops/src/table.rs | 35 +++++++++++++++++++++++++++++++ crates/query-ops/src/value.rs | 7 +++++++ 6 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 crates/query-ops/src/atom.rs create mode 100644 crates/query-ops/src/join.rs create mode 100644 crates/query-ops/src/relation.rs create mode 100644 crates/query-ops/src/table.rs create mode 100644 crates/query-ops/src/value.rs diff --git a/crates/query-ops/src/atom.rs b/crates/query-ops/src/atom.rs new file mode 100644 index 0000000..d9b31d9 --- /dev/null +++ b/crates/query-ops/src/atom.rs @@ -0,0 +1,29 @@ +//! Atom operator: scan a [`Table`] under an [`AtomPattern`] and return a +//! binding [`Relation`]. +//! +//! An atom pattern specifies, for each table column, either a variable to bind +//! or a literal that the cell must equal. A variable appearing in more than one +//! column forces those cells to be equal (so `Edge(X, X)` keeps only +//! self-loops). The output relation has one column per distinct variable, in +//! first-occurrence order. + +use crate::{relation::Relation, table::Table, value::Value}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Term { + Var(String), + Lit(Value), +} + +#[derive(Debug, Clone)] +pub struct AtomPattern { + pub columns: Vec, +} + +#[must_use] +pub fn scan_atom(_table: &Table, _pattern: &AtomPattern) -> Relation { + todo!( + "scan rows, filter by repeated-variable equality and literal equality, \ + project to one column per distinct variable in first-occurrence order" + ) +} diff --git a/crates/query-ops/src/join.rs b/crates/query-ops/src/join.rs new file mode 100644 index 0000000..e75ccd1 --- /dev/null +++ b/crates/query-ops/src/join.rs @@ -0,0 +1,25 @@ +//! Semijoin and natural join over binding relations. +//! +//! Both operators join on the shared column names of their inputs (the +//! "overlapping variables" in Datalog terms). +//! +//! - [`semijoin`] keeps rows of `left` whose shared-column values appear in +//! `right`. Output columns are `left.columns` unchanged. +//! - [`natural_join`] keeps every pair `(l, r)` that agrees on shared columns, +//! emitting one row with the union of columns. Output column order is +//! `left.columns` followed by `right.columns` minus the shared ones. + +use crate::relation::Relation; + +#[must_use] +pub fn semijoin(_left: &Relation, _right: &Relation) -> Relation { + todo!("hash `right` on shared columns, probe with `left`, keep matching left rows") +} + +#[must_use] +pub fn natural_join(_left: &Relation, _right: &Relation) -> Relation { + todo!( + "hash one side on shared columns, probe with the other, emit \ + left ++ (right \\ shared) for every match" + ) +} diff --git a/crates/query-ops/src/lib.rs b/crates/query-ops/src/lib.rs index df690ca..f92d6b1 100644 --- a/crates/query-ops/src/lib.rs +++ b/crates/query-ops/src/lib.rs @@ -1,10 +1,23 @@ //! Physical operators for a small query-plan executor. //! -//! Targeted scope: -//! - atom scans with repeated-variable filtering and variable-binding output -//! - semijoins -//! - natural joins +//! Three operators are in scope: //! -//! The operators are intended to be composed by hand into a small plan and -//! evaluated against in-memory relations. Integration with an external query -//! plan IR is out of scope for this crate. +//! - [`atom::scan_atom`] scans a [`table::Table`] under an +//! [`atom::AtomPattern`], filtering for repeated-variable equality and +//! literal equality, and outputs a binding [`relation::Relation`]. +//! - [`join::semijoin`] keeps rows of one relation whose shared-column values +//! appear in another. +//! - [`join::natural_join`] combines rows that agree on shared columns, +//! emitting the union of their columns. +//! +//! Operators compose by function application; a "query plan written by hand" +//! is just an expression like +//! `natural_join(&semijoin(&a, &b), &scan_atom(&t, &p))`. +//! +//! Integration with an external query-plan IR is out of scope. + +pub mod atom; +pub mod join; +pub mod relation; +pub mod table; +pub mod value; diff --git a/crates/query-ops/src/relation.rs b/crates/query-ops/src/relation.rs new file mode 100644 index 0000000..3e49152 --- /dev/null +++ b/crates/query-ops/src/relation.rs @@ -0,0 +1,36 @@ +//! Binding relations: rows over named (variable) columns. +//! +//! Every operator in this crate (after the initial atom scan) consumes and +//! produces [`Relation`]s. Column names are variable names; a value at column +//! `i` of a row is the value bound to variable `columns[i]` in that solution. + +use crate::value::Value; + +#[derive(Debug, Clone)] +pub struct Relation { + pub columns: Vec, + pub rows: Vec>, +} + +impl Relation { + #[must_use] + pub fn new(columns: Vec) -> Self { + Self { + columns, + rows: Vec::new(), + } + } + + /// # Panics + /// Panics if `row.len() != self.columns.len()`. + pub fn push(&mut self, row: Vec) { + assert_eq!( + row.len(), + self.columns.len(), + "row arity mismatch: expected {}, got {}", + self.columns.len(), + row.len(), + ); + self.rows.push(row); + } +} diff --git a/crates/query-ops/src/table.rs b/crates/query-ops/src/table.rs new file mode 100644 index 0000000..de0f143 --- /dev/null +++ b/crates/query-ops/src/table.rs @@ -0,0 +1,35 @@ +//! Raw input relations with positional columns. +//! +//! Tables are the input to atom scans. They carry no column names: positions +//! are matched against an [`AtomPattern`](crate::atom::AtomPattern). + +use crate::value::Value; + +#[derive(Debug, Clone)] +pub struct Table { + pub arity: usize, + pub rows: Vec>, +} + +impl Table { + #[must_use] + pub fn new(arity: usize) -> Self { + Self { + arity, + rows: Vec::new(), + } + } + + /// # Panics + /// Panics if `row.len() != self.arity`. + pub fn push(&mut self, row: Vec) { + assert_eq!( + row.len(), + self.arity, + "row arity mismatch: expected {}, got {}", + self.arity, + row.len(), + ); + self.rows.push(row); + } +} diff --git a/crates/query-ops/src/value.rs b/crates/query-ops/src/value.rs new file mode 100644 index 0000000..db7aaa8 --- /dev/null +++ b/crates/query-ops/src/value.rs @@ -0,0 +1,7 @@ +//! Cell values shared by tables and binding relations. + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Value { + Int(i64), + Str(String), +}