diff --git a/crates/query-ops/README.md b/crates/query-ops/README.md new file mode 100644 index 0000000..98dd99f --- /dev/null +++ b/crates/query-ops/README.md @@ -0,0 +1,65 @@ +## Query Ops + +Physical operators for a small query-plan executor: atom scan, semijoin, and natural join over a binding relation. +Operators compose by function application, so a query plan written by hand is just an expression. + +![Architecture Diagram](docs/diagrams/architecture.svg) + +### Public API + +| Item | Type | Description | +|--------------------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `scan_atom(&Table, &AtomPattern) -> Relation` | function | Scans the table under the pattern and returns a binding relation with one column per distinct variable in first-occurrence order. Literal positions and repeated variables filter rows during the scan. | +| `semijoin(&Relation, &Relation) -> Relation` | function | Returns the rows of `left` whose values on the columns shared with `right` also appear in `right`. The output column list is the same as `left.columns`. | +| `natural_join(&Relation, &Relation) -> Relation` | function | Returns every pair of `left` and `right` rows that agree on shared columns. Each output row holds the columns of `left` followed by the non-shared columns of `right`. | +| `Table` | struct | Holds positional input rows of fixed arity and carries no column names. Construct it with `Table::new(arity)` or `Table::from_rows(arity, rows)`. | +| `AtomPattern` | struct | Specifies, for each table column, either a variable to bind or a literal value to match. The pattern is a `Vec` whose length must equal the table's arity. | +| `Term` | enum | Represents one position of an `AtomPattern`. A term is either `Var(String)` to bind the cell to a named variable, or `Lit(Value)` to require the cell to equal a given value. | +| `Relation` | struct | Holds rows over named columns and is the type produced by every operator. Construct it with `Relation::new(columns)` or `Relation::from_rows(columns, rows)`. Column names within a single relation must be unique. | +| `Value` | enum | Represents a single cell value stored in a `Table` or `Relation`. A value is either `Int(i64)` or `Str(String)`. | + +### Example + +`Q(X) :- edge(X, X), labeled(X).` (labeled self-loops): + +```rust +use query_ops::atom::{AtomPattern, Term, scan_atom}; +use query_ops::join::semijoin; +use query_ops::table::Table; +use query_ops::value::Value; + +fn main() { + let edge = Table::from_rows( + 2, + vec![ + vec![Value::Int(1), Value::Int(2)], + vec![Value::Int(3), Value::Int(3)], // self-loop on 3 + vec![Value::Int(2), Value::Int(2)], // self-loop on 2 + ], + ); + let labeled = Table::from_rows(1, vec![vec![Value::Int(2)]]); + + let self_loops = scan_atom( + &edge, + &AtomPattern { + columns: vec![Term::Var("X".to_string()), Term::Var("X".to_string())], + }, + ); + let labeled_x = scan_atom( + &labeled, + &AtomPattern { + columns: vec![Term::Var("X".to_string())], + }, + ); + let result = semijoin(&self_loops, &labeled_x); + + assert_eq!(result.columns, vec!["X".to_string()]); + assert_eq!(result.rows, vec![vec![Value::Int(2)]]); +} +``` + +### Test + +```sh +cargo test -p query-ops +``` diff --git a/crates/query-ops/docs/diagrams/architecture.dot b/crates/query-ops/docs/diagrams/architecture.dot new file mode 100644 index 0000000..62eb353 --- /dev/null +++ b/crates/query-ops/docs/diagrams/architecture.dot @@ -0,0 +1,131 @@ +digraph QueryOpsHandPlan { +fontname = "Helvetica,Arial,sans-serif" +layout = dot +rankdir = LR +ranksep = 0.9; +nodesep = 0.7; +splines = true; +compound = true; +bgcolor = "white" + +node [ +fontname = "Helvetica,Arial,sans-serif", +shape = box, +style = "filled,rounded", +color = "#555555", +fillcolor = "white", +penwidth = 1.5 +] +edge [ +fontname = "Helvetica,Arial,sans-serif", +color = "#333333", +fontsize = 9, +fontcolor = "#555555", +labeldistance = 2.0, +penwidth = 1.2 +] + +subgraph cluster_inputs { +label = "Inputs (positional tables)" +style = "dashed" +color = "#888888" +fontcolor = "#555555" +margin = 18 +edge_table [label = < + + + +
Table: edge
• arity 2
• rows: (src, dst)
>, fillcolor = "#E8F4FD", color = "#2196F3"] +labeled_table [label = < + + + +
Table: labeled
• arity 1
• rows: (node)
>, fillcolor = "#E8F4FD", color = "#2196F3"] +} + +subgraph cluster_atoms { +label = "Atom Scans (scan_atom: Table × AtomPattern → Relation)" +style = "dashed" +color = "#9C27B0" +fontcolor = "#7B1FA2" +margin = 14 +self_loops [label = < + + + + +
self_loops
pattern: [Var X, Var X]
filter: row[0] == row[1]
cols: [X]
>, fillcolor = "#F3E5F5", color = "#9C27B0"] +edge_xy [label = < + + + + +
edge_xy
pattern: [Var X, Var Y]
filter: none
cols: [X, Y]
>, fillcolor = "#F3E5F5", color = "#9C27B0"] +labeled_x [label = < + + + +
labeled_x
pattern: [Var X]
cols: [X]
>, fillcolor = "#F3E5F5", color = "#9C27B0"] +labeled_y [label = < + + + +
labeled_y
pattern: [Var Y]
cols: [Y]
>, fillcolor = "#F3E5F5", color = "#9C27B0"] +} + +subgraph cluster_joins { +label = "Joins (shared cols = matching column names)" +style = "dashed" +color = "#4CAF50" +fontcolor = "#388E3C" +margin = 14 +q1 [label = < + + + + +
Q1: semijoin
edge(X, X), labeled(X)
keep left rows whose [X] is in right
cols: [X]
>, fillcolor = "#E8F5E9", color = "#4CAF50"] +q2 [label = < + + + + +
Q2: natural_join
edge(X, Y), labeled(Y)
emit left ++ (right \ shared) per match
cols: [X, Y]
>, fillcolor = "#E8F5E9", color = "#4CAF50"] +} + +subgraph cluster_outputs { +label = "Outputs (binding relations)" +style = "dashed" +color = "#888888" +fontcolor = "#555555" +margin = 18 +q1_out [label = < + + + +
Q1 result
labeled self-loops
cols: [X]
>, fillcolor = "#ECEFF1", color = "#607D8B"] +q2_out [label = < + + + +
Q2 result
edges into labeled nodes
cols: [X, Y]
>, fillcolor = "#ECEFF1", color = "#607D8B"] +} + +// Atom scans consume tables +edge_table -> self_loops [color = "#2196F3"] +edge_table -> edge_xy [color = "#2196F3"] +labeled_table -> labeled_x [color = "#2196F3"] +labeled_table -> labeled_y [color = "#2196F3"] + +// Q1: edge(X, X), labeled(X) -> semijoin +self_loops -> q1 [label = "left", color = "#9C27B0"] +labeled_x -> q1 [label = "right", color = "#9C27B0"] + +// Q2: edge(X, Y), labeled(Y) -> natural_join +edge_xy -> q2 [label = "left", color = "#9C27B0"] +labeled_y -> q2 [label = "right", color = "#9C27B0"] + +// Final outputs +q1 -> q1_out [color = "#4CAF50"] +q2 -> q2_out [color = "#4CAF50"] +} diff --git a/crates/query-ops/docs/diagrams/architecture.svg b/crates/query-ops/docs/diagrams/architecture.svg new file mode 100644 index 0000000..1d3a2be --- /dev/null +++ b/crates/query-ops/docs/diagrams/architecture.svg @@ -0,0 +1,299 @@ + + + + + + + QueryOpsHandPlan + + + cluster_inputs + + Inputs (positional tables) + + + + cluster_atoms + + Atom Scans  (scan_atom: Table × AtomPattern → Relation) + + + + cluster_joins + + Joins  (shared cols = matching column names) + + + + cluster_outputs + + Outputs (binding relations) + + + + + edge_table + + Table: edge + + • + arity 2 + + • + rows: (src, dst) + + + + + self_loops + + self_loops + + + pattern: [Var X, Var X] + + + filter: row[0] == row[1] + + + cols: [X] + + + + + edge_table->self_loops + + + + + + edge_xy + + edge_xy + + + pattern: [Var X, Var Y] + + + filter: none + + + cols: [X, Y] + + + + + edge_table->edge_xy + + + + + + labeled_table + + Table: labeled + + • + arity 1 + + • + rows: (node) + + + + + labeled_x + + labeled_x + + + pattern: [Var X] + + + cols: [X] + + + + + labeled_table->labeled_x + + + + + + labeled_y + + labeled_y + + + pattern: [Var Y] + + + cols: [Y] + + + + + labeled_table->labeled_y + + + + + + q1 + + Q1: semijoin + + + edge(X, X), labeled(X) + + + keep left rows whose [X] is in right + + + cols: [X] + + + + + self_loops->q1 + + + left + + + + + q2 + + Q2: natural_join + + + edge(X, Y), labeled(Y) + + + emit left ++ (right \ shared) per match + + + cols: [X, Y] + + + + + edge_xy->q2 + + + left + + + + + labeled_x->q1 + + + right + + + + + labeled_y->q2 + + + right + + + + + q1_out + + Q1 result + + labeled self-loops + + cols: [X] + + + + + q1->q1_out + + + + + + q2_out + + Q2 result + + + edges into labeled nodes + + + cols: [X, Y] + + + + + q2->q2_out + + + + + diff --git a/crates/query-ops/docs/diagrams/make_figures.sh b/crates/query-ops/docs/diagrams/make_figures.sh new file mode 100755 index 0000000..6d30150 --- /dev/null +++ b/crates/query-ops/docs/diagrams/make_figures.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# You need to have Graphviz installed to run this script +# On Debian-based OSes, you can install it using: sudo apt-get install graphviz + +# Directory containing .dot files. Defaults to the script's own directory so the +# script works regardless of the caller's working directory. +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +ASSET_DIR=${1:-"${SCRIPT_DIR}"} + +# Make figures from .dot files +for f in "${ASSET_DIR}"/*.dot; do + dot -Tsvg "$f" -o "${f%.dot}.svg" +done diff --git a/crates/query-ops/src/atom.rs b/crates/query-ops/src/atom.rs index d9b31d9..edfa9e9 100644 --- a/crates/query-ops/src/atom.rs +++ b/crates/query-ops/src/atom.rs @@ -7,6 +7,8 @@ //! self-loops). The output relation has one column per distinct variable, in //! first-occurrence order. +use std::collections::HashMap; + use crate::{relation::Relation, table::Table, value::Value}; #[derive(Debug, Clone, PartialEq, Eq)] @@ -20,10 +22,169 @@ pub struct AtomPattern { pub columns: Vec, } +/// # Panics +/// Panics if `pattern.columns.len() != table.arity`. #[must_use] -pub fn scan_atom(_table: &Table, _pattern: &AtomPattern) -> Relation { - todo!( - "scan rows, filter by repeated-variable equality and literal equality, \ - project to one column per distinct variable in first-occurrence order" - ) +pub fn scan_atom(table: &Table, pattern: &AtomPattern) -> Relation { + assert_eq!( + pattern.columns.len(), + table.arity, + "pattern arity mismatch: pattern has {}, table has {}", + pattern.columns.len(), + table.arity, + ); + + let mut output_vars: Vec = Vec::new(); + let mut output_positions: Vec = Vec::new(); + let mut equality_pairs: Vec<(usize, usize)> = Vec::new(); + let mut literal_checks: Vec<(usize, &Value)> = Vec::new(); + let mut first_position: HashMap<&str, usize> = HashMap::new(); + + for (i, term) in pattern.columns.iter().enumerate() { + match term { + Term::Var(name) => { + if let Some(&j) = first_position.get(name.as_str()) { + equality_pairs.push((j, i)); + } else { + first_position.insert(name.as_str(), i); + output_vars.push(name.clone()); + output_positions.push(i); + } + } + Term::Lit(value) => literal_checks.push((i, value)), + } + } + + let mut output = Relation::new(output_vars); + 'rows: for row in &table.rows { + for &(i, lit) in &literal_checks { + if &row[i] != lit { + continue 'rows; + } + } + for &(j, i) in &equality_pairs { + if row[i] != row[j] { + continue 'rows; + } + } + let projected: Vec = output_positions.iter().map(|&i| row[i].clone()).collect(); + output.push(projected); + } + output +} + +#[cfg(test)] +mod tests { + use super::*; + + fn var(name: &str) -> Term { + Term::Var(name.to_string()) + } + + fn lit(value: i64) -> Term { + Term::Lit(Value::Int(value)) + } + + fn int(value: i64) -> Value { + Value::Int(value) + } + + #[test] + fn repeated_variable_keeps_only_self_loops() { + let edge = Table::from_rows( + 2, + vec![ + vec![int(1), int(2)], + vec![int(2), int(2)], + vec![int(3), int(3)], + vec![int(1), int(1)], + ], + ); + let pattern = AtomPattern { + columns: vec![var("X"), var("X")], + }; + let result = scan_atom(&edge, &pattern); + assert_eq!(result.columns, vec!["X".to_string()]); + assert_eq!(result.rows, vec![vec![int(2)], vec![int(3)], vec![int(1)]]); + } + + #[test] + fn literal_filters_rows_to_match() { + let edge = Table::from_rows( + 2, + vec![ + vec![int(1), int(2)], + vec![int(2), int(3)], + vec![int(1), int(4)], + ], + ); + let pattern = AtomPattern { + columns: vec![lit(1), var("Y")], + }; + let result = scan_atom(&edge, &pattern); + assert_eq!(result.columns, vec!["Y".to_string()]); + assert_eq!(result.rows, vec![vec![int(2)], vec![int(4)]]); + } + + #[test] + fn distinct_variables_project_in_first_occurrence_order() { + let triples = Table::from_rows( + 3, + vec![vec![int(1), int(2), int(3)], vec![int(4), int(5), int(6)]], + ); + let pattern = AtomPattern { + columns: vec![var("A"), var("B"), var("C")], + }; + let result = scan_atom(&triples, &pattern); + assert_eq!( + result.columns, + vec!["A".to_string(), "B".to_string(), "C".to_string()], + ); + assert_eq!( + result.rows, + vec![vec![int(1), int(2), int(3)], vec![int(4), int(5), int(6)]], + ); + } + + #[test] + fn variable_repeated_three_times_requires_all_equal() { + let triples = Table::from_rows( + 3, + vec![ + vec![int(1), int(1), int(1)], + vec![int(1), int(1), int(2)], + vec![int(2), int(2), int(2)], + vec![int(1), int(2), int(1)], + ], + ); + let pattern = AtomPattern { + columns: vec![var("X"), var("X"), var("X")], + }; + let result = scan_atom(&triples, &pattern); + assert_eq!(result.columns, vec!["X".to_string()]); + assert_eq!(result.rows, vec![vec![int(1)], vec![int(2)]]); + } + + #[test] + fn literal_filter_repeated_var_and_projection_combine() { + // Pattern: [Lit(1), Var("X"), Lit(2), Var("X")]. + // Keep rows where col0 == 1, col2 == 2, and col1 == col3. + // Output is one column [X], bound to col1 (the first occurrence). + let table = Table::from_rows( + 4, + vec![ + vec![int(1), int(7), int(2), int(7)], + vec![int(1), int(7), int(2), int(8)], + vec![int(0), int(7), int(2), int(7)], + vec![int(1), int(7), int(3), int(7)], + vec![int(1), int(9), int(2), int(9)], + ], + ); + let pattern = AtomPattern { + columns: vec![lit(1), var("X"), lit(2), var("X")], + }; + let result = scan_atom(&table, &pattern); + assert_eq!(result.columns, vec!["X".to_string()]); + assert_eq!(result.rows, vec![vec![int(7)], vec![int(9)]]); + } } diff --git a/crates/query-ops/src/join.rs b/crates/query-ops/src/join.rs index e75ccd1..384baae 100644 --- a/crates/query-ops/src/join.rs +++ b/crates/query-ops/src/join.rs @@ -9,17 +9,212 @@ //! emitting one row with the union of columns. Output column order is //! `left.columns` followed by `right.columns` minus the shared ones. -use crate::relation::Relation; +use std::collections::{HashMap, HashSet}; -#[must_use] -pub fn semijoin(_left: &Relation, _right: &Relation) -> Relation { - todo!("hash `right` on shared columns, probe with `left`, keep matching left rows") +use crate::{relation::Relation, value::Value}; + +fn shared_columns(left: &Relation, right: &Relation) -> Vec<(usize, usize)> { + left.columns + .iter() + .enumerate() + .filter_map(|(li, name)| { + right + .columns + .iter() + .position(|rname| rname == name) + .map(|ri| (li, ri)) + }) + .collect() +} + +fn project<'a>(row: &'a [Value], indices: impl IntoIterator) -> Vec { + indices.into_iter().map(|&i| row[i].clone()).collect() } #[must_use] -pub fn natural_join(_left: &Relation, _right: &Relation) -> Relation { - todo!( - "hash one side on shared columns, probe with the other, emit \ - left ++ (right \\ shared) for every match" - ) +pub fn semijoin(left: &Relation, right: &Relation) -> Relation { + let shared = shared_columns(left, right); + let left_keys: Vec = shared.iter().map(|&(li, _)| li).collect(); + let right_keys: Vec = shared.iter().map(|&(_, ri)| ri).collect(); + + let mut right_set: HashSet> = HashSet::new(); + for row in &right.rows { + right_set.insert(project(row, &right_keys)); + } + + let mut output = Relation::new(left.columns.clone()); + for row in &left.rows { + if right_set.contains(&project(row, &left_keys)) { + output.push(row.clone()); + } + } + output +} + +#[must_use] +pub fn natural_join(left: &Relation, right: &Relation) -> Relation { + let shared = shared_columns(left, right); + let left_keys: Vec = shared.iter().map(|&(li, _)| li).collect(); + let right_keys: Vec = shared.iter().map(|&(_, ri)| ri).collect(); + + let shared_right: HashSet = right_keys.iter().copied().collect(); + let right_only: Vec = (0..right.columns.len()) + .filter(|i| !shared_right.contains(i)) + .collect(); + + let mut output_columns = left.columns.clone(); + for &i in &right_only { + output_columns.push(right.columns[i].clone()); + } + + let mut right_index: HashMap, Vec<&Vec>> = HashMap::new(); + for row in &right.rows { + right_index + .entry(project(row, &right_keys)) + .or_default() + .push(row); + } + + let mut output = Relation::new(output_columns); + for left_row in &left.rows { + let key = project(left_row, &left_keys); + let Some(matches) = right_index.get(&key) else { + continue; + }; + for right_row in matches { + let mut joined = left_row.clone(); + for &i in &right_only { + joined.push(right_row[i].clone()); + } + output.push(joined); + } + } + output +} + +#[cfg(test)] +mod tests { + use super::*; + + fn col(name: &str) -> String { + name.to_string() + } + + fn int(value: i64) -> Value { + Value::Int(value) + } + + #[test] + fn semijoin_keeps_left_rows_matched_on_shared_column() { + let left = Relation::from_rows( + vec![col("X"), col("Y")], + vec![ + vec![int(1), int(10)], + vec![int(2), int(20)], + vec![int(3), int(30)], + ], + ); + let right = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(3)]]); + let result = semijoin(&left, &right); + assert_eq!(result.columns, vec![col("X"), col("Y")]); + assert_eq!( + result.rows, + vec![vec![int(1), int(10)], vec![int(3), int(30)]], + ); + } + + #[test] + fn semijoin_does_not_duplicate_left_rows_when_right_has_duplicates() { + let left = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(2)]]); + let right = Relation::from_rows( + vec![col("X"), col("Y")], + vec![ + vec![int(1), int(100)], + vec![int(1), int(101)], + vec![int(2), int(200)], + ], + ); + let result = semijoin(&left, &right); + assert_eq!(result.columns, vec![col("X")]); + assert_eq!(result.rows, vec![vec![int(1)], vec![int(2)]]); + } + + #[test] + fn natural_join_emits_union_of_columns_on_match() { + let left = Relation::from_rows( + vec![col("X"), col("Y")], + vec![vec![int(1), int(10)], vec![int(2), int(20)]], + ); + let right = Relation::from_rows( + vec![col("Y"), col("Z")], + vec![ + vec![int(10), int(100)], + vec![int(20), int(200)], + vec![int(20), int(201)], + ], + ); + let result = natural_join(&left, &right); + assert_eq!(result.columns, vec![col("X"), col("Y"), col("Z")]); + assert_eq!( + result.rows, + vec![ + vec![int(1), int(10), int(100)], + vec![int(2), int(20), int(200)], + vec![int(2), int(20), int(201)], + ], + ); + } + + #[test] + fn natural_join_with_no_shared_columns_is_cartesian_product() { + let left = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(2)]]); + let right = Relation::from_rows(vec![col("Y")], vec![vec![int(10)], vec![int(20)]]); + let result = natural_join(&left, &right); + assert_eq!(result.columns, vec![col("X"), col("Y")]); + assert_eq!( + result.rows, + vec![ + vec![int(1), int(10)], + vec![int(1), int(20)], + vec![int(2), int(10)], + vec![int(2), int(20)], + ], + ); + } + + #[test] + fn semijoin_returns_empty_when_either_side_is_empty() { + let nonempty = Relation::from_rows(vec![col("X")], vec![vec![int(1)]]); + let empty = Relation::from_rows(vec![col("X")], vec![]); + + let r1 = semijoin(&empty, &nonempty); + assert_eq!(r1.columns, vec![col("X")]); + assert!(r1.rows.is_empty()); + + let r2 = semijoin(&nonempty, &empty); + assert_eq!(r2.columns, vec![col("X")]); + assert!(r2.rows.is_empty()); + + let r3 = semijoin(&empty, &empty); + assert_eq!(r3.columns, vec![col("X")]); + assert!(r3.rows.is_empty()); + } + + #[test] + fn natural_join_returns_empty_when_either_side_is_empty() { + let nonempty = Relation::from_rows(vec![col("X")], vec![vec![int(1)]]); + let empty = Relation::from_rows(vec![col("X")], vec![]); + + let r1 = natural_join(&empty, &nonempty); + assert_eq!(r1.columns, vec![col("X")]); + assert!(r1.rows.is_empty()); + + let r2 = natural_join(&nonempty, &empty); + assert_eq!(r2.columns, vec![col("X")]); + assert!(r2.rows.is_empty()); + + let r3 = natural_join(&empty, &empty); + assert_eq!(r3.columns, vec![col("X")]); + assert!(r3.rows.is_empty()); + } } diff --git a/crates/query-ops/src/relation.rs b/crates/query-ops/src/relation.rs index 3e49152..e2b75ca 100644 --- a/crates/query-ops/src/relation.rs +++ b/crates/query-ops/src/relation.rs @@ -3,6 +3,12 @@ //! Every operator in this crate (after the initial atom scan) consumes and //! produces [`Relation`]s. Column names are variable names; a value at column //! `i` of a row is the value bound to variable `columns[i]` in that solution. +//! +//! Column names within a single relation must be unique. Constructors enforce +//! this invariant; downstream operators rely on it when matching shared columns +//! across two relations. + +use std::collections::HashSet; use crate::value::Value; @@ -12,15 +18,46 @@ pub struct Relation { pub rows: Vec>, } +fn assert_unique_columns(columns: &[String]) { + let mut seen: HashSet<&str> = HashSet::with_capacity(columns.len()); + for name in columns { + assert!( + seen.insert(name.as_str()), + "duplicate column name in relation: {name}", + ); + } +} + impl Relation { + /// # Panics + /// Panics if `columns` contains a duplicate name. #[must_use] pub fn new(columns: Vec) -> Self { + assert_unique_columns(&columns); Self { columns, rows: Vec::new(), } } + /// # Panics + /// Panics if `columns` contains a duplicate name, or if any row's length + /// differs from `columns.len()`. + #[must_use] + pub fn from_rows(columns: Vec, rows: Vec>) -> Self { + assert_unique_columns(&columns); + let arity = columns.len(); + for (i, row) in rows.iter().enumerate() { + assert_eq!( + row.len(), + arity, + "row {i} arity mismatch: expected {arity}, got {}", + row.len(), + ); + } + Self { columns, rows } + } + /// # Panics /// Panics if `row.len() != self.columns.len()`. pub fn push(&mut self, row: Vec) { @@ -34,3 +71,20 @@ impl Relation { self.rows.push(row); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic(expected = "duplicate column name")] + fn from_rows_rejects_duplicate_column_names() { + let _ = Relation::from_rows(vec!["X".to_string(), "X".to_string()], vec![]); + } + + #[test] + #[should_panic(expected = "duplicate column name")] + fn new_rejects_duplicate_column_names() { + let _ = Relation::new(vec!["X".to_string(), "X".to_string()]); + } +} diff --git a/crates/query-ops/src/table.rs b/crates/query-ops/src/table.rs index de0f143..60af911 100644 --- a/crates/query-ops/src/table.rs +++ b/crates/query-ops/src/table.rs @@ -20,6 +20,21 @@ impl Table { } } + /// # Panics + /// Panics if any row's length differs from `arity`. + #[must_use] + pub fn from_rows(arity: usize, rows: Vec>) -> Self { + for (i, row) in rows.iter().enumerate() { + assert_eq!( + row.len(), + arity, + "row {i} arity mismatch: expected {arity}, got {}", + row.len(), + ); + } + Self { arity, rows } + } + /// # Panics /// Panics if `row.len() != self.arity`. pub fn push(&mut self, row: Vec) { diff --git a/crates/query-ops/tests/hand_plan.rs b/crates/query-ops/tests/hand_plan.rs new file mode 100644 index 0000000..a23c1b1 --- /dev/null +++ b/crates/query-ops/tests/hand_plan.rs @@ -0,0 +1,77 @@ +//! Hand-written query plans composed from `scan_atom`, `semijoin`, and `natural_join`. +//! +//! Schema: +//! - `edge(src, dst)`: directed edges +//! - `labeled(node)`: a set of labeled nodes +//! +//! Two rules are executed against the same fixture: +//! - `Q1(X) :- edge(X, X), labeled(X).` (labeled self-loops) +//! - `Q2(X, Y) :- edge(X, Y), labeled(Y).` (edges whose destination is labeled) + +use query_ops::atom::{scan_atom, AtomPattern, Term}; +use query_ops::join::{natural_join, semijoin}; +use query_ops::table::Table; +use query_ops::value::Value; + +fn var(name: &str) -> Term { + Term::Var(name.to_string()) +} + +fn int(value: i64) -> Value { + Value::Int(value) +} + +#[test] +fn labeled_self_loops_and_edges_into_labeled_nodes() { + let edge = Table::from_rows( + 2, + vec![ + vec![int(1), int(2)], + vec![int(2), int(3)], + vec![int(3), int(3)], + vec![int(4), int(1)], + vec![int(2), int(2)], + ], + ); + let labeled = Table::from_rows(1, vec![vec![int(2)], vec![int(3)]]); + + let self_loops = scan_atom( + &edge, + &AtomPattern { + columns: vec![var("X"), var("X")], + }, + ); + let labeled_x = scan_atom( + &labeled, + &AtomPattern { + columns: vec![var("X")], + }, + ); + let q1 = semijoin(&self_loops, &labeled_x); + assert_eq!(q1.columns, vec!["X".to_string()]); + assert_eq!(q1.rows, vec![vec![int(3)], vec![int(2)]]); + + let edge_xy = scan_atom( + &edge, + &AtomPattern { + columns: vec![var("X"), var("Y")], + }, + ); + let labeled_y = scan_atom( + &labeled, + &AtomPattern { + columns: vec![var("Y")], + }, + ); + let q2 = natural_join(&edge_xy, &labeled_y); + assert_eq!(q2.columns, vec!["X".to_string(), "Y".to_string()]); + assert_eq!( + q2.rows, + vec![ + vec![int(1), int(2)], + vec![int(2), int(3)], + vec![int(3), int(3)], + vec![int(2), int(2)], + ], + ); +}