Improve scaffolding for query-ops crate

This commit is contained in:
Hassan Abedi 2026-06-01 13:22:32 +02:00
parent 6212f13ee6
commit b1d38eff49
6 changed files with 152 additions and 7 deletions

View File

@ -0,0 +1,29 @@
//! Atom operator: scan a [`Table`] under an [`AtomPattern`] and return a
//! binding [`Relation`].
//!
//! An atom pattern specifies, for each table column, either a variable to bind
//! or a literal that the cell must equal. A variable appearing in more than one
//! column forces those cells to be equal (so `Edge(X, X)` keeps only
//! self-loops). The output relation has one column per distinct variable, in
//! first-occurrence order.
use crate::{relation::Relation, table::Table, value::Value};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Term {
Var(String),
Lit(Value),
}
#[derive(Debug, Clone)]
pub struct AtomPattern {
pub columns: Vec<Term>,
}
#[must_use]
pub fn scan_atom(_table: &Table, _pattern: &AtomPattern) -> Relation {
todo!(
"scan rows, filter by repeated-variable equality and literal equality, \
project to one column per distinct variable in first-occurrence order"
)
}

View File

@ -0,0 +1,25 @@
//! Semijoin and natural join over binding relations.
//!
//! Both operators join on the shared column names of their inputs (the
//! "overlapping variables" in Datalog terms).
//!
//! - [`semijoin`] keeps rows of `left` whose shared-column values appear in
//! `right`. Output columns are `left.columns` unchanged.
//! - [`natural_join`] keeps every pair `(l, r)` that agrees on shared columns,
//! emitting one row with the union of columns. Output column order is
//! `left.columns` followed by `right.columns` minus the shared ones.
use crate::relation::Relation;
#[must_use]
pub fn semijoin(_left: &Relation, _right: &Relation) -> Relation {
todo!("hash `right` on shared columns, probe with `left`, keep matching left rows")
}
#[must_use]
pub fn natural_join(_left: &Relation, _right: &Relation) -> Relation {
todo!(
"hash one side on shared columns, probe with the other, emit \
left ++ (right \\ shared) for every match"
)
}

View File

@ -1,10 +1,23 @@
//! Physical operators for a small query-plan executor. //! Physical operators for a small query-plan executor.
//! //!
//! Targeted scope: //! Three operators are in scope:
//! - atom scans with repeated-variable filtering and variable-binding output
//! - semijoins
//! - natural joins
//! //!
//! The operators are intended to be composed by hand into a small plan and //! - [`atom::scan_atom`] scans a [`table::Table`] under an
//! evaluated against in-memory relations. Integration with an external query //! [`atom::AtomPattern`], filtering for repeated-variable equality and
//! plan IR is out of scope for this crate. //! literal equality, and outputs a binding [`relation::Relation`].
//! - [`join::semijoin`] keeps rows of one relation whose shared-column values
//! appear in another.
//! - [`join::natural_join`] combines rows that agree on shared columns,
//! emitting the union of their columns.
//!
//! Operators compose by function application; a "query plan written by hand"
//! is just an expression like
//! `natural_join(&semijoin(&a, &b), &scan_atom(&t, &p))`.
//!
//! Integration with an external query-plan IR is out of scope.
pub mod atom;
pub mod join;
pub mod relation;
pub mod table;
pub mod value;

View File

@ -0,0 +1,36 @@
//! Binding relations: rows over named (variable) columns.
//!
//! Every operator in this crate (after the initial atom scan) consumes and
//! produces [`Relation`]s. Column names are variable names; a value at column
//! `i` of a row is the value bound to variable `columns[i]` in that solution.
use crate::value::Value;
#[derive(Debug, Clone)]
pub struct Relation {
pub columns: Vec<String>,
pub rows: Vec<Vec<Value>>,
}
impl Relation {
#[must_use]
pub fn new(columns: Vec<String>) -> Self {
Self {
columns,
rows: Vec::new(),
}
}
/// # Panics
/// Panics if `row.len() != self.columns.len()`.
pub fn push(&mut self, row: Vec<Value>) {
assert_eq!(
row.len(),
self.columns.len(),
"row arity mismatch: expected {}, got {}",
self.columns.len(),
row.len(),
);
self.rows.push(row);
}
}

View File

@ -0,0 +1,35 @@
//! Raw input relations with positional columns.
//!
//! Tables are the input to atom scans. They carry no column names: positions
//! are matched against an [`AtomPattern`](crate::atom::AtomPattern).
use crate::value::Value;
#[derive(Debug, Clone)]
pub struct Table {
pub arity: usize,
pub rows: Vec<Vec<Value>>,
}
impl Table {
#[must_use]
pub fn new(arity: usize) -> Self {
Self {
arity,
rows: Vec::new(),
}
}
/// # Panics
/// Panics if `row.len() != self.arity`.
pub fn push(&mut self, row: Vec<Value>) {
assert_eq!(
row.len(),
self.arity,
"row arity mismatch: expected {}, got {}",
self.arity,
row.len(),
);
self.rows.push(row);
}
}

View File

@ -0,0 +1,7 @@
//! Cell values shared by tables and binding relations.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Value {
Int(i64),
Str(String),
}