Add the early version of query-ops implmenation
This commit is contained in:
parent
6819e3f8b3
commit
228a88d999
65
crates/query-ops/README.md
Normal file
65
crates/query-ops/README.md
Normal file
@ -0,0 +1,65 @@
|
||||
## Query Ops
|
||||
|
||||
Physical operators for a small query-plan executor: atom scan, semijoin, and natural join over a binding relation.
|
||||
Operators compose by function application, so a query plan written by hand is just an expression.
|
||||
|
||||

|
||||
|
||||
### Public API
|
||||
|
||||
| Item | Type | Description |
|
||||
|--------------------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `scan_atom(&Table, &AtomPattern) -> Relation` | function | Scans the table under the pattern and returns a binding relation with one column per distinct variable in first-occurrence order. Literal positions and repeated variables filter rows during the scan. |
|
||||
| `semijoin(&Relation, &Relation) -> Relation` | function | Returns the rows of `left` whose values on the columns shared with `right` also appear in `right`. The output column list is the same as `left.columns`. |
|
||||
| `natural_join(&Relation, &Relation) -> Relation` | function | Returns every pair of `left` and `right` rows that agree on shared columns. Each output row holds the columns of `left` followed by the non-shared columns of `right`. |
|
||||
| `Table` | struct | Holds positional input rows of fixed arity and carries no column names. Construct it with `Table::new(arity)` or `Table::from_rows(arity, rows)`. |
|
||||
| `AtomPattern` | struct | Specifies, for each table column, either a variable to bind or a literal value to match. The pattern is a `Vec<Term>` whose length must equal the table's arity. |
|
||||
| `Term` | enum | Represents one position of an `AtomPattern`. A term is either `Var(String)` to bind the cell to a named variable, or `Lit(Value)` to require the cell to equal a given value. |
|
||||
| `Relation` | struct | Holds rows over named columns and is the type produced by every operator. Construct it with `Relation::new(columns)` or `Relation::from_rows(columns, rows)`. Column names within a single relation must be unique. |
|
||||
| `Value` | enum | Represents a single cell value stored in a `Table` or `Relation`. A value is either `Int(i64)` or `Str(String)`. |
|
||||
|
||||
### Example
|
||||
|
||||
`Q(X) :- edge(X, X), labeled(X).` (labeled self-loops):
|
||||
|
||||
```rust
|
||||
use query_ops::atom::{AtomPattern, Term, scan_atom};
|
||||
use query_ops::join::semijoin;
|
||||
use query_ops::table::Table;
|
||||
use query_ops::value::Value;
|
||||
|
||||
fn main() {
|
||||
let edge = Table::from_rows(
|
||||
2,
|
||||
vec![
|
||||
vec![Value::Int(1), Value::Int(2)],
|
||||
vec![Value::Int(3), Value::Int(3)], // self-loop on 3
|
||||
vec![Value::Int(2), Value::Int(2)], // self-loop on 2
|
||||
],
|
||||
);
|
||||
let labeled = Table::from_rows(1, vec![vec![Value::Int(2)]]);
|
||||
|
||||
let self_loops = scan_atom(
|
||||
&edge,
|
||||
&AtomPattern {
|
||||
columns: vec![Term::Var("X".to_string()), Term::Var("X".to_string())],
|
||||
},
|
||||
);
|
||||
let labeled_x = scan_atom(
|
||||
&labeled,
|
||||
&AtomPattern {
|
||||
columns: vec![Term::Var("X".to_string())],
|
||||
},
|
||||
);
|
||||
let result = semijoin(&self_loops, &labeled_x);
|
||||
|
||||
assert_eq!(result.columns, vec!["X".to_string()]);
|
||||
assert_eq!(result.rows, vec![vec![Value::Int(2)]]);
|
||||
}
|
||||
```
|
||||
|
||||
### Test
|
||||
|
||||
```sh
|
||||
cargo test -p query-ops
|
||||
```
|
||||
131
crates/query-ops/docs/diagrams/architecture.dot
Normal file
131
crates/query-ops/docs/diagrams/architecture.dot
Normal file
@ -0,0 +1,131 @@
|
||||
digraph QueryOpsHandPlan {
|
||||
fontname = "Helvetica,Arial,sans-serif"
|
||||
layout = dot
|
||||
rankdir = LR
|
||||
ranksep = 0.9;
|
||||
nodesep = 0.7;
|
||||
splines = true;
|
||||
compound = true;
|
||||
bgcolor = "white"
|
||||
|
||||
node [
|
||||
fontname = "Helvetica,Arial,sans-serif",
|
||||
shape = box,
|
||||
style = "filled,rounded",
|
||||
color = "#555555",
|
||||
fillcolor = "white",
|
||||
penwidth = 1.5
|
||||
]
|
||||
edge [
|
||||
fontname = "Helvetica,Arial,sans-serif",
|
||||
color = "#333333",
|
||||
fontsize = 9,
|
||||
fontcolor = "#555555",
|
||||
labeldistance = 2.0,
|
||||
penwidth = 1.2
|
||||
]
|
||||
|
||||
subgraph cluster_inputs {
|
||||
label = "Inputs (positional tables)"
|
||||
style = "dashed"
|
||||
color = "#888888"
|
||||
fontcolor = "#555555"
|
||||
margin = 18
|
||||
edge_table [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Table: edge</b></td></tr>
|
||||
<tr><td align="left" balign="left">• arity 2</td></tr>
|
||||
<tr><td align="left" balign="left">• rows: (src, dst)</td></tr>
|
||||
</table>>, fillcolor = "#E8F4FD", color = "#2196F3"]
|
||||
labeled_table [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Table: labeled</b></td></tr>
|
||||
<tr><td align="left" balign="left">• arity 1</td></tr>
|
||||
<tr><td align="left" balign="left">• rows: (node)</td></tr>
|
||||
</table>>, fillcolor = "#E8F4FD", color = "#2196F3"]
|
||||
}
|
||||
|
||||
subgraph cluster_atoms {
|
||||
label = "Atom Scans (scan_atom: Table × AtomPattern → Relation)"
|
||||
style = "dashed"
|
||||
color = "#9C27B0"
|
||||
fontcolor = "#7B1FA2"
|
||||
margin = 14
|
||||
self_loops [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>self_loops</b></td></tr>
|
||||
<tr><td align="left" balign="left">pattern: [Var X, Var X]</td></tr>
|
||||
<tr><td align="left" balign="left">filter: row[0] == row[1]</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X]</td></tr>
|
||||
</table>>, fillcolor = "#F3E5F5", color = "#9C27B0"]
|
||||
edge_xy [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>edge_xy</b></td></tr>
|
||||
<tr><td align="left" balign="left">pattern: [Var X, Var Y]</td></tr>
|
||||
<tr><td align="left" balign="left">filter: none</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X, Y]</td></tr>
|
||||
</table>>, fillcolor = "#F3E5F5", color = "#9C27B0"]
|
||||
labeled_x [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>labeled_x</b></td></tr>
|
||||
<tr><td align="left" balign="left">pattern: [Var X]</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X]</td></tr>
|
||||
</table>>, fillcolor = "#F3E5F5", color = "#9C27B0"]
|
||||
labeled_y [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>labeled_y</b></td></tr>
|
||||
<tr><td align="left" balign="left">pattern: [Var Y]</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [Y]</td></tr>
|
||||
</table>>, fillcolor = "#F3E5F5", color = "#9C27B0"]
|
||||
}
|
||||
|
||||
subgraph cluster_joins {
|
||||
label = "Joins (shared cols = matching column names)"
|
||||
style = "dashed"
|
||||
color = "#4CAF50"
|
||||
fontcolor = "#388E3C"
|
||||
margin = 14
|
||||
q1 [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Q1: semijoin</b></td></tr>
|
||||
<tr><td align="left" balign="left">edge(X, X), labeled(X)</td></tr>
|
||||
<tr><td align="left" balign="left">keep left rows whose [X] is in right</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X]</td></tr>
|
||||
</table>>, fillcolor = "#E8F5E9", color = "#4CAF50"]
|
||||
q2 [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Q2: natural_join</b></td></tr>
|
||||
<tr><td align="left" balign="left">edge(X, Y), labeled(Y)</td></tr>
|
||||
<tr><td align="left" balign="left">emit left ++ (right \ shared) per match</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X, Y]</td></tr>
|
||||
</table>>, fillcolor = "#E8F5E9", color = "#4CAF50"]
|
||||
}
|
||||
|
||||
subgraph cluster_outputs {
|
||||
label = "Outputs (binding relations)"
|
||||
style = "dashed"
|
||||
color = "#888888"
|
||||
fontcolor = "#555555"
|
||||
margin = 18
|
||||
q1_out [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Q1 result</b></td></tr>
|
||||
<tr><td align="left" balign="left">labeled self-loops</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X]</td></tr>
|
||||
</table>>, fillcolor = "#ECEFF1", color = "#607D8B"]
|
||||
q2_out [label = <<table border="0" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td align="center"><b>Q2 result</b></td></tr>
|
||||
<tr><td align="left" balign="left">edges into labeled nodes</td></tr>
|
||||
<tr><td align="left" balign="left">cols: [X, Y]</td></tr>
|
||||
</table>>, fillcolor = "#ECEFF1", color = "#607D8B"]
|
||||
}
|
||||
|
||||
// Atom scans consume tables
|
||||
edge_table -> self_loops [color = "#2196F3"]
|
||||
edge_table -> edge_xy [color = "#2196F3"]
|
||||
labeled_table -> labeled_x [color = "#2196F3"]
|
||||
labeled_table -> labeled_y [color = "#2196F3"]
|
||||
|
||||
// Q1: edge(X, X), labeled(X) -> semijoin
|
||||
self_loops -> q1 [label = "left", color = "#9C27B0"]
|
||||
labeled_x -> q1 [label = "right", color = "#9C27B0"]
|
||||
|
||||
// Q2: edge(X, Y), labeled(Y) -> natural_join
|
||||
edge_xy -> q2 [label = "left", color = "#9C27B0"]
|
||||
labeled_y -> q2 [label = "right", color = "#9C27B0"]
|
||||
|
||||
// Final outputs
|
||||
q1 -> q1_out [color = "#4CAF50"]
|
||||
q2 -> q2_out [color = "#4CAF50"]
|
||||
}
|
||||
299
crates/query-ops/docs/diagrams/architecture.svg
Normal file
299
crates/query-ops/docs/diagrams/architecture.svg
Normal file
@ -0,0 +1,299 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 12.2.1 (0)
|
||||
-->
|
||||
<!-- Title: QueryOpsHandPlan Pages: 1 -->
|
||||
<svg width="1194pt" height="666pt"
|
||||
viewBox="0.00 0.00 1193.62 666.00" xmlns="http://www.w3.org/2000/svg">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 662)">
|
||||
<title>QueryOpsHandPlan</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-662 1189.62,-662 1189.62,4 -4,4"/>
|
||||
<g id="clust1" class="cluster">
|
||||
<title>cluster_inputs</title>
|
||||
<polygon fill="white" stroke="#888888" stroke-dasharray="5,2"
|
||||
points="6.12,-154 6.12,-457 168.38,-457 168.38,-154 6.12,-154"/>
|
||||
<text text-anchor="middle" x="87.25" y="-439.7" font-family="Helvetica,Arial,sans-serif" font-size="14.00"
|
||||
fill="#555555">Inputs (positional tables)
|
||||
</text>
|
||||
</g>
|
||||
<g id="clust2" class="cluster">
|
||||
<title>cluster_atoms</title>
|
||||
<polygon fill="white" stroke="#9c27b0" stroke-dasharray="5,2"
|
||||
points="203.38,-8 203.38,-650 579.38,-650 579.38,-8 203.38,-8"/>
|
||||
<text text-anchor="middle" x="391.38" y="-632.7" font-family="Helvetica,Arial,sans-serif" font-size="14.00"
|
||||
fill="#7b1fa2">Atom Scans  (scan_atom: Table × AtomPattern → Relation)
|
||||
</text>
|
||||
</g>
|
||||
<g id="clust3" class="cluster">
|
||||
<title>cluster_joins</title>
|
||||
<polygon fill="white" stroke="#4caf50" stroke-dasharray="5,2"
|
||||
points="635.38,-70 635.38,-424 932.62,-424 932.62,-70 635.38,-70"/>
|
||||
<text text-anchor="middle" x="784" y="-406.7" font-family="Helvetica,Arial,sans-serif" font-size="14.00"
|
||||
fill="#388e3c">Joins  (shared cols = matching column names)
|
||||
</text>
|
||||
</g>
|
||||
<g id="clust4" class="cluster">
|
||||
<title>cluster_outputs</title>
|
||||
<polygon fill="white" stroke="#888888" stroke-dasharray="5,2"
|
||||
points="967.62,-99 967.62,-402 1177.62,-402 1177.62,-99 967.62,-99"/>
|
||||
<text text-anchor="middle" x="1072.62" y="-384.7" font-family="Helvetica,Arial,sans-serif" font-size="14.00"
|
||||
fill="#555555">Outputs (binding relations)
|
||||
</text>
|
||||
</g>
|
||||
<!-- edge_table -->
|
||||
<g id="node1" class="node">
|
||||
<title>edge_table</title>
|
||||
<path fill="#e8f4fd" stroke="#2196f3" stroke-width="1.5"
|
||||
d="M135.5,-410.12C135.5,-410.12 38,-410.12 38,-410.12 32,-410.12 26,-404.12 26,-398.12 26,-398.12 26,-327.88 26,-327.88 26,-321.88 32,-315.88 38,-315.88 38,-315.88 135.5,-315.88 135.5,-315.88 141.5,-315.88 147.5,-321.88 147.5,-327.88 147.5,-327.88 147.5,-398.12 147.5,-398.12 147.5,-404.12 141.5,-410.12 135.5,-410.12"/>
|
||||
<text text-anchor="start" x="49.25" y="-389.82" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Table: edge
|
||||
</text>
|
||||
<text text-anchor="start" x="38" y="-360.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">•
|
||||
arity 2
|
||||
</text>
|
||||
<text text-anchor="start" x="38" y="-331.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">•
|
||||
rows: (src, dst)
|
||||
</text>
|
||||
</g>
|
||||
<!-- self_loops -->
|
||||
<g id="node3" class="node">
|
||||
<title>self_loops</title>
|
||||
<path fill="#f3e5f5" stroke="#9c27b0" stroke-width="1.5"
|
||||
d="M456.12,-606.62C456.12,-606.62 325.62,-606.62 325.62,-606.62 319.62,-606.62 313.62,-600.62 313.62,-594.62 313.62,-594.62 313.62,-495.38 313.62,-495.38 313.62,-489.38 319.62,-483.38 325.62,-483.38 325.62,-483.38 456.12,-483.38 456.12,-483.38 462.12,-483.38 468.12,-489.38 468.12,-495.38 468.12,-495.38 468.12,-594.62 468.12,-594.62 468.12,-600.62 462.12,-606.62 456.12,-606.62"/>
|
||||
<text text-anchor="start" x="357.5" y="-586.33" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">self_loops
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-557.08" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
pattern: [Var X, Var X]
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-528.08" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
filter: row[0] == row[1]
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-499.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X]
|
||||
</text>
|
||||
</g>
|
||||
<!-- edge_table->self_loops -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>edge_table->self_loops</title>
|
||||
<path fill="none" stroke="#2196f3" stroke-width="1.2"
|
||||
d="M139.07,-410.51C158.38,-427.04 181.08,-444.95 203.38,-459 234.4,-478.56 270.61,-496.29 302.82,-510.5"/>
|
||||
<polygon fill="#2196f3" stroke="#2196f3" stroke-width="1.2"
|
||||
points="301.35,-513.67 311.91,-514.45 304.14,-507.25 301.35,-513.67"/>
|
||||
</g>
|
||||
<!-- edge_xy -->
|
||||
<g id="node4" class="node">
|
||||
<title>edge_xy</title>
|
||||
<path fill="#f3e5f5" stroke="#9c27b0" stroke-width="1.5"
|
||||
d="M456.12,-433.62C456.12,-433.62 325.62,-433.62 325.62,-433.62 319.62,-433.62 313.62,-427.62 313.62,-421.62 313.62,-421.62 313.62,-322.38 313.62,-322.38 313.62,-316.38 319.62,-310.38 325.62,-310.38 325.62,-310.38 456.12,-310.38 456.12,-310.38 462.12,-310.38 468.12,-316.38 468.12,-322.38 468.12,-322.38 468.12,-421.62 468.12,-421.62 468.12,-427.62 462.12,-433.62 456.12,-433.62"/>
|
||||
<text text-anchor="start" x="363.5" y="-413.32" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">edge_xy
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-384.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
pattern: [Var X, Var Y]
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-355.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
filter: none
|
||||
</text>
|
||||
<text text-anchor="start" x="325.62" y="-326.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X, Y]
|
||||
</text>
|
||||
</g>
|
||||
<!-- edge_table->edge_xy -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>edge_table->edge_xy</title>
|
||||
<path fill="none" stroke="#2196f3" stroke-width="1.2"
|
||||
d="M147.8,-364.79C191.77,-366.1 252.15,-367.9 301.6,-369.37"/>
|
||||
<polygon fill="#2196f3" stroke="#2196f3" stroke-width="1.2"
|
||||
points="301.35,-372.86 311.45,-369.66 301.56,-365.87 301.35,-372.86"/>
|
||||
</g>
|
||||
<!-- labeled_table -->
|
||||
<g id="node2" class="node">
|
||||
<title>labeled_table</title>
|
||||
<path fill="#e8f4fd" stroke="#2196f3" stroke-width="1.5"
|
||||
d="M131.38,-266.12C131.38,-266.12 42.12,-266.12 42.12,-266.12 36.12,-266.12 30.12,-260.12 30.12,-254.12 30.12,-254.12 30.12,-183.88 30.12,-183.88 30.12,-177.88 36.12,-171.88 42.12,-171.88 42.12,-171.88 131.38,-171.88 131.38,-171.88 137.38,-171.88 143.38,-177.88 143.38,-183.88 143.38,-183.88 143.38,-254.12 143.38,-254.12 143.38,-260.12 137.38,-266.12 131.38,-266.12"/>
|
||||
<text text-anchor="start" x="42.12" y="-245.82" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Table: labeled
|
||||
</text>
|
||||
<text text-anchor="start" x="42.12" y="-216.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">•
|
||||
arity 1
|
||||
</text>
|
||||
<text text-anchor="start" x="42.12" y="-187.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">•
|
||||
rows: (node)
|
||||
</text>
|
||||
</g>
|
||||
<!-- labeled_x -->
|
||||
<g id="node5" class="node">
|
||||
<title>labeled_x</title>
|
||||
<path fill="#f3e5f5" stroke="#9c27b0" stroke-width="1.5"
|
||||
d="M435.88,-260.12C435.88,-260.12 345.88,-260.12 345.88,-260.12 339.88,-260.12 333.88,-254.12 333.88,-248.12 333.88,-248.12 333.88,-177.88 333.88,-177.88 333.88,-171.88 339.88,-165.88 345.88,-165.88 345.88,-165.88 435.88,-165.88 435.88,-165.88 441.88,-165.88 447.88,-171.88 447.88,-177.88 447.88,-177.88 447.88,-248.12 447.88,-248.12 447.88,-254.12 441.88,-260.12 435.88,-260.12"/>
|
||||
<text text-anchor="start" x="360.12" y="-239.82" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">labeled_x
|
||||
</text>
|
||||
<text text-anchor="start" x="345.88" y="-210.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
pattern: [Var X]
|
||||
</text>
|
||||
<text text-anchor="start" x="345.88" y="-181.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X]
|
||||
</text>
|
||||
</g>
|
||||
<!-- labeled_table->labeled_x -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>labeled_table->labeled_x</title>
|
||||
<path fill="none" stroke="#2196f3" stroke-width="1.2"
|
||||
d="M143.87,-217.89C194.16,-216.89 268.06,-215.42 321.73,-214.35"/>
|
||||
<polygon fill="#2196f3" stroke="#2196f3" stroke-width="1.2"
|
||||
points="321.8,-217.85 331.73,-214.15 321.66,-210.85 321.8,-217.85"/>
|
||||
</g>
|
||||
<!-- labeled_y -->
|
||||
<g id="node6" class="node">
|
||||
<title>labeled_y</title>
|
||||
<path fill="#f3e5f5" stroke="#9c27b0" stroke-width="1.5"
|
||||
d="M435.88,-116.12C435.88,-116.12 345.88,-116.12 345.88,-116.12 339.88,-116.12 333.88,-110.12 333.88,-104.12 333.88,-104.12 333.88,-33.88 333.88,-33.88 333.88,-27.88 339.88,-21.88 345.88,-21.88 345.88,-21.88 435.88,-21.88 435.88,-21.88 441.88,-21.88 447.88,-27.88 447.88,-33.88 447.88,-33.88 447.88,-104.12 447.88,-104.12 447.88,-110.12 441.88,-116.12 435.88,-116.12"/>
|
||||
<text text-anchor="start" x="360.12" y="-95.83" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">labeled_y
|
||||
</text>
|
||||
<text text-anchor="start" x="345.88" y="-66.58" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
pattern: [Var Y]
|
||||
</text>
|
||||
<text text-anchor="start" x="345.88" y="-37.58" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [Y]
|
||||
</text>
|
||||
</g>
|
||||
<!-- labeled_table->labeled_y -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>labeled_table->labeled_y</title>
|
||||
<path fill="none" stroke="#2196f3" stroke-width="1.2"
|
||||
d="M143.81,-177.48C162.25,-164.84 183.2,-151.53 203.38,-141 241.58,-121.06 286.59,-103.53 322.82,-90.76"/>
|
||||
<polygon fill="#2196f3" stroke="#2196f3" stroke-width="1.2"
|
||||
points="323.76,-94.14 332.05,-87.55 321.46,-87.53 323.76,-94.14"/>
|
||||
</g>
|
||||
<!-- q1 -->
|
||||
<g id="node7" class="node">
|
||||
<title>q1</title>
|
||||
<path fill="#e8f5e9" stroke="#4caf50" stroke-width="1.5"
|
||||
d="M885.88,-380.62C885.88,-380.62 681.12,-380.62 681.12,-380.62 675.12,-380.62 669.12,-374.62 669.12,-368.62 669.12,-368.62 669.12,-269.38 669.12,-269.38 669.12,-263.38 675.12,-257.38 681.12,-257.38 681.12,-257.38 885.88,-257.38 885.88,-257.38 891.88,-257.38 897.88,-263.38 897.88,-269.38 897.88,-269.38 897.88,-368.62 897.88,-368.62 897.88,-374.62 891.88,-380.62 885.88,-380.62"/>
|
||||
<text text-anchor="start" x="742.25" y="-360.32" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Q1: semijoin
|
||||
</text>
|
||||
<text text-anchor="start" x="681.12" y="-331.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
edge(X, X), labeled(X)
|
||||
</text>
|
||||
<text text-anchor="start" x="681.12" y="-302.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
keep left rows whose [X] is in right
|
||||
</text>
|
||||
<text text-anchor="start" x="681.12" y="-273.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X]
|
||||
</text>
|
||||
</g>
|
||||
<!-- self_loops->q1 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>self_loops->q1</title>
|
||||
<path fill="none" stroke="#9c27b0" stroke-width="1.2"
|
||||
d="M468.18,-513.59C502.81,-498.43 543.88,-479.17 579.38,-459 616.82,-437.72 656.18,-411.56 690.18,-387.64"/>
|
||||
<polygon fill="#9c27b0" stroke="#9c27b0" stroke-width="1.2"
|
||||
points="692,-390.64 698.14,-382.01 687.96,-384.93 692,-390.64"/>
|
||||
<text text-anchor="middle" x="607.38" y="-450.4" font-family="Helvetica,Arial,sans-serif" font-size="9.00"
|
||||
fill="#555555">left
|
||||
</text>
|
||||
</g>
|
||||
<!-- q2 -->
|
||||
<g id="node8" class="node">
|
||||
<title>q2</title>
|
||||
<path fill="#e8f5e9" stroke="#4caf50" stroke-width="1.5"
|
||||
d="M896,-207.62C896,-207.62 671,-207.62 671,-207.62 665,-207.62 659,-201.62 659,-195.62 659,-195.62 659,-96.38 659,-96.38 659,-90.38 665,-84.38 671,-84.38 671,-84.38 896,-84.38 896,-84.38 902,-84.38 908,-90.38 908,-96.38 908,-96.38 908,-195.62 908,-195.62 908,-201.62 902,-207.62 896,-207.62"/>
|
||||
<text text-anchor="start" x="731.75" y="-187.32" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Q2: natural_join
|
||||
</text>
|
||||
<text text-anchor="start" x="671" y="-158.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
edge(X, Y), labeled(Y)
|
||||
</text>
|
||||
<text text-anchor="start" x="671" y="-129.07" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
emit left ++ (right \ shared) per match
|
||||
</text>
|
||||
<text text-anchor="start" x="671" y="-100.08" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X, Y]
|
||||
</text>
|
||||
</g>
|
||||
<!-- edge_xy->q2 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>edge_xy->q2</title>
|
||||
<path fill="none" stroke="#9c27b0" stroke-width="1.2"
|
||||
d="M468.53,-363.61C517.02,-354.43 577.64,-335.21 616.38,-295 636.66,-273.94 617.27,-254.97 635.38,-232 639.88,-226.28 644.88,-220.87 650.24,-215.75"/>
|
||||
<polygon fill="#9c27b0" stroke="#9c27b0" stroke-width="1.2"
|
||||
points="652.47,-218.45 657.58,-209.17 647.79,-213.24 652.47,-218.45"/>
|
||||
<text text-anchor="middle" x="607.38" y="-312.83" font-family="Helvetica,Arial,sans-serif" font-size="9.00"
|
||||
fill="#555555">left
|
||||
</text>
|
||||
</g>
|
||||
<!-- labeled_x->q1 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>labeled_x->q1</title>
|
||||
<path fill="none" stroke="#9c27b0" stroke-width="1.2"
|
||||
d="M448.17,-228.1C493.59,-240.3 559.11,-257.94 616.38,-273.5 629.81,-277.15 643.85,-280.98 657.84,-284.8"/>
|
||||
<polygon fill="#9c27b0" stroke="#9c27b0" stroke-width="1.2"
|
||||
points="656.71,-288.12 667.27,-287.38 658.55,-281.36 656.71,-288.12"/>
|
||||
<text text-anchor="middle" x="607.38" y="-278.45" font-family="Helvetica,Arial,sans-serif" font-size="9.00"
|
||||
fill="#555555">right
|
||||
</text>
|
||||
</g>
|
||||
<!-- labeled_y->q2 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>labeled_y->q2</title>
|
||||
<path fill="none" stroke="#9c27b0" stroke-width="1.2"
|
||||
d="M448.26,-80.11C500.19,-90.35 579.3,-105.95 647.52,-119.39"/>
|
||||
<polygon fill="#9c27b0" stroke="#9c27b0" stroke-width="1.2"
|
||||
points="646.54,-122.77 657.03,-121.27 647.89,-115.9 646.54,-122.77"/>
|
||||
<text text-anchor="middle" x="607.38" y="-117.06" font-family="Helvetica,Arial,sans-serif" font-size="9.00"
|
||||
fill="#555555">right
|
||||
</text>
|
||||
</g>
|
||||
<!-- q1_out -->
|
||||
<g id="node9" class="node">
|
||||
<title>q1_out</title>
|
||||
<path fill="#eceff1" stroke="#607d8b" stroke-width="1.5"
|
||||
d="M1125.12,-355.12C1125.12,-355.12 1020.12,-355.12 1020.12,-355.12 1014.12,-355.12 1008.12,-349.12 1008.12,-343.12 1008.12,-343.12 1008.12,-272.88 1008.12,-272.88 1008.12,-266.88 1014.12,-260.88 1020.12,-260.88 1020.12,-260.88 1125.12,-260.88 1125.12,-260.88 1131.12,-260.88 1137.12,-266.88 1137.12,-272.88 1137.12,-272.88 1137.12,-343.12 1137.12,-343.12 1137.12,-349.12 1131.12,-355.12 1125.12,-355.12"/>
|
||||
<text text-anchor="start" x="1043" y="-334.82" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Q1 result
|
||||
</text>
|
||||
<text text-anchor="start" x="1020.12" y="-305.57" font-family="Helvetica,Arial,sans-serif"
|
||||
font-size="14.00">labeled self-loops
|
||||
</text>
|
||||
<text text-anchor="start" x="1020.12" y="-276.57" font-family="Helvetica,Arial,sans-serif"
|
||||
font-size="14.00">cols: [X]
|
||||
</text>
|
||||
</g>
|
||||
<!-- q1->q1_out -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>q1->q1_out</title>
|
||||
<path fill="none" stroke="#4caf50" stroke-width="1.2"
|
||||
d="M897.98,-314.65C930.91,-313.39 966.06,-312.04 996.06,-310.9"/>
|
||||
<polygon fill="#4caf50" stroke="#4caf50" stroke-width="1.2"
|
||||
points="996.04,-314.4 1005.9,-310.52 995.77,-307.4 996.04,-314.4"/>
|
||||
</g>
|
||||
<!-- q2_out -->
|
||||
<g id="node10" class="node">
|
||||
<title>q2_out</title>
|
||||
<path fill="#eceff1" stroke="#607d8b" stroke-width="1.5"
|
||||
d="M1147.62,-211.12C1147.62,-211.12 997.62,-211.12 997.62,-211.12 991.62,-211.12 985.62,-205.12 985.62,-199.12 985.62,-199.12 985.62,-128.88 985.62,-128.88 985.62,-122.88 991.62,-116.88 997.62,-116.88 997.62,-116.88 1147.62,-116.88 1147.62,-116.88 1153.62,-116.88 1159.62,-122.88 1159.62,-128.88 1159.62,-128.88 1159.62,-199.12 1159.62,-199.12 1159.62,-205.12 1153.62,-211.12 1147.62,-211.12"/>
|
||||
<text text-anchor="start" x="1043" y="-190.82" font-family="Helvetica,Arial,sans-serif" font-weight="bold"
|
||||
font-size="14.00">Q2 result
|
||||
</text>
|
||||
<text text-anchor="start" x="997.62" y="-161.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
edges into labeled nodes
|
||||
</text>
|
||||
<text text-anchor="start" x="997.62" y="-132.57" font-family="Helvetica,Arial,sans-serif" font-size="14.00">
|
||||
cols: [X, Y]
|
||||
</text>
|
||||
</g>
|
||||
<!-- q2->q2_out -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>q2->q2_out</title>
|
||||
<path fill="none" stroke="#4caf50" stroke-width="1.2"
|
||||
d="M908.35,-153.76C930.25,-155.14 952.77,-156.55 973.83,-157.87"/>
|
||||
<polygon fill="#4caf50" stroke="#4caf50" stroke-width="1.2"
|
||||
points="973.35,-161.35 983.55,-158.48 973.79,-154.36 973.35,-161.35"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 19 KiB |
14
crates/query-ops/docs/diagrams/make_figures.sh
Executable file
14
crates/query-ops/docs/diagrams/make_figures.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# You need to have Graphviz installed to run this script
|
||||
# On Debian-based OSes, you can install it using: sudo apt-get install graphviz
|
||||
|
||||
# Directory containing .dot files. Defaults to the script's own directory so the
|
||||
# script works regardless of the caller's working directory.
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
ASSET_DIR=${1:-"${SCRIPT_DIR}"}
|
||||
|
||||
# Make figures from .dot files
|
||||
for f in "${ASSET_DIR}"/*.dot; do
|
||||
dot -Tsvg "$f" -o "${f%.dot}.svg"
|
||||
done
|
||||
@ -7,6 +7,8 @@
|
||||
//! self-loops). The output relation has one column per distinct variable, in
|
||||
//! first-occurrence order.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{relation::Relation, table::Table, value::Value};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@ -20,10 +22,169 @@ pub struct AtomPattern {
|
||||
pub columns: Vec<Term>,
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if `pattern.columns.len() != table.arity`.
|
||||
#[must_use]
|
||||
pub fn scan_atom(_table: &Table, _pattern: &AtomPattern) -> Relation {
|
||||
todo!(
|
||||
"scan rows, filter by repeated-variable equality and literal equality, \
|
||||
project to one column per distinct variable in first-occurrence order"
|
||||
)
|
||||
pub fn scan_atom(table: &Table, pattern: &AtomPattern) -> Relation {
|
||||
assert_eq!(
|
||||
pattern.columns.len(),
|
||||
table.arity,
|
||||
"pattern arity mismatch: pattern has {}, table has {}",
|
||||
pattern.columns.len(),
|
||||
table.arity,
|
||||
);
|
||||
|
||||
let mut output_vars: Vec<String> = Vec::new();
|
||||
let mut output_positions: Vec<usize> = Vec::new();
|
||||
let mut equality_pairs: Vec<(usize, usize)> = Vec::new();
|
||||
let mut literal_checks: Vec<(usize, &Value)> = Vec::new();
|
||||
let mut first_position: HashMap<&str, usize> = HashMap::new();
|
||||
|
||||
for (i, term) in pattern.columns.iter().enumerate() {
|
||||
match term {
|
||||
Term::Var(name) => {
|
||||
if let Some(&j) = first_position.get(name.as_str()) {
|
||||
equality_pairs.push((j, i));
|
||||
} else {
|
||||
first_position.insert(name.as_str(), i);
|
||||
output_vars.push(name.clone());
|
||||
output_positions.push(i);
|
||||
}
|
||||
}
|
||||
Term::Lit(value) => literal_checks.push((i, value)),
|
||||
}
|
||||
}
|
||||
|
||||
let mut output = Relation::new(output_vars);
|
||||
'rows: for row in &table.rows {
|
||||
for &(i, lit) in &literal_checks {
|
||||
if &row[i] != lit {
|
||||
continue 'rows;
|
||||
}
|
||||
}
|
||||
for &(j, i) in &equality_pairs {
|
||||
if row[i] != row[j] {
|
||||
continue 'rows;
|
||||
}
|
||||
}
|
||||
let projected: Vec<Value> = output_positions.iter().map(|&i| row[i].clone()).collect();
|
||||
output.push(projected);
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn var(name: &str) -> Term {
|
||||
Term::Var(name.to_string())
|
||||
}
|
||||
|
||||
fn lit(value: i64) -> Term {
|
||||
Term::Lit(Value::Int(value))
|
||||
}
|
||||
|
||||
fn int(value: i64) -> Value {
|
||||
Value::Int(value)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repeated_variable_keeps_only_self_loops() {
|
||||
let edge = Table::from_rows(
|
||||
2,
|
||||
vec![
|
||||
vec![int(1), int(2)],
|
||||
vec![int(2), int(2)],
|
||||
vec![int(3), int(3)],
|
||||
vec![int(1), int(1)],
|
||||
],
|
||||
);
|
||||
let pattern = AtomPattern {
|
||||
columns: vec![var("X"), var("X")],
|
||||
};
|
||||
let result = scan_atom(&edge, &pattern);
|
||||
assert_eq!(result.columns, vec!["X".to_string()]);
|
||||
assert_eq!(result.rows, vec![vec![int(2)], vec![int(3)], vec![int(1)]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal_filters_rows_to_match() {
|
||||
let edge = Table::from_rows(
|
||||
2,
|
||||
vec![
|
||||
vec![int(1), int(2)],
|
||||
vec![int(2), int(3)],
|
||||
vec![int(1), int(4)],
|
||||
],
|
||||
);
|
||||
let pattern = AtomPattern {
|
||||
columns: vec![lit(1), var("Y")],
|
||||
};
|
||||
let result = scan_atom(&edge, &pattern);
|
||||
assert_eq!(result.columns, vec!["Y".to_string()]);
|
||||
assert_eq!(result.rows, vec![vec![int(2)], vec![int(4)]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distinct_variables_project_in_first_occurrence_order() {
|
||||
let triples = Table::from_rows(
|
||||
3,
|
||||
vec![vec![int(1), int(2), int(3)], vec![int(4), int(5), int(6)]],
|
||||
);
|
||||
let pattern = AtomPattern {
|
||||
columns: vec![var("A"), var("B"), var("C")],
|
||||
};
|
||||
let result = scan_atom(&triples, &pattern);
|
||||
assert_eq!(
|
||||
result.columns,
|
||||
vec!["A".to_string(), "B".to_string(), "C".to_string()],
|
||||
);
|
||||
assert_eq!(
|
||||
result.rows,
|
||||
vec![vec![int(1), int(2), int(3)], vec![int(4), int(5), int(6)]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn variable_repeated_three_times_requires_all_equal() {
|
||||
let triples = Table::from_rows(
|
||||
3,
|
||||
vec![
|
||||
vec![int(1), int(1), int(1)],
|
||||
vec![int(1), int(1), int(2)],
|
||||
vec![int(2), int(2), int(2)],
|
||||
vec![int(1), int(2), int(1)],
|
||||
],
|
||||
);
|
||||
let pattern = AtomPattern {
|
||||
columns: vec![var("X"), var("X"), var("X")],
|
||||
};
|
||||
let result = scan_atom(&triples, &pattern);
|
||||
assert_eq!(result.columns, vec!["X".to_string()]);
|
||||
assert_eq!(result.rows, vec![vec![int(1)], vec![int(2)]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal_filter_repeated_var_and_projection_combine() {
|
||||
// Pattern: [Lit(1), Var("X"), Lit(2), Var("X")].
|
||||
// Keep rows where col0 == 1, col2 == 2, and col1 == col3.
|
||||
// Output is one column [X], bound to col1 (the first occurrence).
|
||||
let table = Table::from_rows(
|
||||
4,
|
||||
vec![
|
||||
vec![int(1), int(7), int(2), int(7)],
|
||||
vec![int(1), int(7), int(2), int(8)],
|
||||
vec![int(0), int(7), int(2), int(7)],
|
||||
vec![int(1), int(7), int(3), int(7)],
|
||||
vec![int(1), int(9), int(2), int(9)],
|
||||
],
|
||||
);
|
||||
let pattern = AtomPattern {
|
||||
columns: vec![lit(1), var("X"), lit(2), var("X")],
|
||||
};
|
||||
let result = scan_atom(&table, &pattern);
|
||||
assert_eq!(result.columns, vec!["X".to_string()]);
|
||||
assert_eq!(result.rows, vec![vec![int(7)], vec![int(9)]]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,17 +9,212 @@
|
||||
//! emitting one row with the union of columns. Output column order is
|
||||
//! `left.columns` followed by `right.columns` minus the shared ones.
|
||||
|
||||
use crate::relation::Relation;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
#[must_use]
|
||||
pub fn semijoin(_left: &Relation, _right: &Relation) -> Relation {
|
||||
todo!("hash `right` on shared columns, probe with `left`, keep matching left rows")
|
||||
use crate::{relation::Relation, value::Value};
|
||||
|
||||
fn shared_columns(left: &Relation, right: &Relation) -> Vec<(usize, usize)> {
|
||||
left.columns
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(li, name)| {
|
||||
right
|
||||
.columns
|
||||
.iter()
|
||||
.position(|rname| rname == name)
|
||||
.map(|ri| (li, ri))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn project<'a>(row: &'a [Value], indices: impl IntoIterator<Item = &'a usize>) -> Vec<Value> {
|
||||
indices.into_iter().map(|&i| row[i].clone()).collect()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn natural_join(_left: &Relation, _right: &Relation) -> Relation {
|
||||
todo!(
|
||||
"hash one side on shared columns, probe with the other, emit \
|
||||
left ++ (right \\ shared) for every match"
|
||||
)
|
||||
pub fn semijoin(left: &Relation, right: &Relation) -> Relation {
|
||||
let shared = shared_columns(left, right);
|
||||
let left_keys: Vec<usize> = shared.iter().map(|&(li, _)| li).collect();
|
||||
let right_keys: Vec<usize> = shared.iter().map(|&(_, ri)| ri).collect();
|
||||
|
||||
let mut right_set: HashSet<Vec<Value>> = HashSet::new();
|
||||
for row in &right.rows {
|
||||
right_set.insert(project(row, &right_keys));
|
||||
}
|
||||
|
||||
let mut output = Relation::new(left.columns.clone());
|
||||
for row in &left.rows {
|
||||
if right_set.contains(&project(row, &left_keys)) {
|
||||
output.push(row.clone());
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn natural_join(left: &Relation, right: &Relation) -> Relation {
|
||||
let shared = shared_columns(left, right);
|
||||
let left_keys: Vec<usize> = shared.iter().map(|&(li, _)| li).collect();
|
||||
let right_keys: Vec<usize> = shared.iter().map(|&(_, ri)| ri).collect();
|
||||
|
||||
let shared_right: HashSet<usize> = right_keys.iter().copied().collect();
|
||||
let right_only: Vec<usize> = (0..right.columns.len())
|
||||
.filter(|i| !shared_right.contains(i))
|
||||
.collect();
|
||||
|
||||
let mut output_columns = left.columns.clone();
|
||||
for &i in &right_only {
|
||||
output_columns.push(right.columns[i].clone());
|
||||
}
|
||||
|
||||
let mut right_index: HashMap<Vec<Value>, Vec<&Vec<Value>>> = HashMap::new();
|
||||
for row in &right.rows {
|
||||
right_index
|
||||
.entry(project(row, &right_keys))
|
||||
.or_default()
|
||||
.push(row);
|
||||
}
|
||||
|
||||
let mut output = Relation::new(output_columns);
|
||||
for left_row in &left.rows {
|
||||
let key = project(left_row, &left_keys);
|
||||
let Some(matches) = right_index.get(&key) else {
|
||||
continue;
|
||||
};
|
||||
for right_row in matches {
|
||||
let mut joined = left_row.clone();
|
||||
for &i in &right_only {
|
||||
joined.push(right_row[i].clone());
|
||||
}
|
||||
output.push(joined);
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn col(name: &str) -> String {
|
||||
name.to_string()
|
||||
}
|
||||
|
||||
fn int(value: i64) -> Value {
|
||||
Value::Int(value)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn semijoin_keeps_left_rows_matched_on_shared_column() {
|
||||
let left = Relation::from_rows(
|
||||
vec![col("X"), col("Y")],
|
||||
vec![
|
||||
vec![int(1), int(10)],
|
||||
vec![int(2), int(20)],
|
||||
vec![int(3), int(30)],
|
||||
],
|
||||
);
|
||||
let right = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(3)]]);
|
||||
let result = semijoin(&left, &right);
|
||||
assert_eq!(result.columns, vec![col("X"), col("Y")]);
|
||||
assert_eq!(
|
||||
result.rows,
|
||||
vec![vec![int(1), int(10)], vec![int(3), int(30)]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn semijoin_does_not_duplicate_left_rows_when_right_has_duplicates() {
|
||||
let left = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(2)]]);
|
||||
let right = Relation::from_rows(
|
||||
vec![col("X"), col("Y")],
|
||||
vec![
|
||||
vec![int(1), int(100)],
|
||||
vec![int(1), int(101)],
|
||||
vec![int(2), int(200)],
|
||||
],
|
||||
);
|
||||
let result = semijoin(&left, &right);
|
||||
assert_eq!(result.columns, vec![col("X")]);
|
||||
assert_eq!(result.rows, vec![vec![int(1)], vec![int(2)]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn natural_join_emits_union_of_columns_on_match() {
|
||||
let left = Relation::from_rows(
|
||||
vec![col("X"), col("Y")],
|
||||
vec![vec![int(1), int(10)], vec![int(2), int(20)]],
|
||||
);
|
||||
let right = Relation::from_rows(
|
||||
vec![col("Y"), col("Z")],
|
||||
vec![
|
||||
vec![int(10), int(100)],
|
||||
vec![int(20), int(200)],
|
||||
vec![int(20), int(201)],
|
||||
],
|
||||
);
|
||||
let result = natural_join(&left, &right);
|
||||
assert_eq!(result.columns, vec![col("X"), col("Y"), col("Z")]);
|
||||
assert_eq!(
|
||||
result.rows,
|
||||
vec![
|
||||
vec![int(1), int(10), int(100)],
|
||||
vec![int(2), int(20), int(200)],
|
||||
vec![int(2), int(20), int(201)],
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn natural_join_with_no_shared_columns_is_cartesian_product() {
|
||||
let left = Relation::from_rows(vec![col("X")], vec![vec![int(1)], vec![int(2)]]);
|
||||
let right = Relation::from_rows(vec![col("Y")], vec![vec![int(10)], vec![int(20)]]);
|
||||
let result = natural_join(&left, &right);
|
||||
assert_eq!(result.columns, vec![col("X"), col("Y")]);
|
||||
assert_eq!(
|
||||
result.rows,
|
||||
vec![
|
||||
vec![int(1), int(10)],
|
||||
vec![int(1), int(20)],
|
||||
vec![int(2), int(10)],
|
||||
vec![int(2), int(20)],
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn semijoin_returns_empty_when_either_side_is_empty() {
|
||||
let nonempty = Relation::from_rows(vec![col("X")], vec![vec![int(1)]]);
|
||||
let empty = Relation::from_rows(vec![col("X")], vec![]);
|
||||
|
||||
let r1 = semijoin(&empty, &nonempty);
|
||||
assert_eq!(r1.columns, vec![col("X")]);
|
||||
assert!(r1.rows.is_empty());
|
||||
|
||||
let r2 = semijoin(&nonempty, &empty);
|
||||
assert_eq!(r2.columns, vec![col("X")]);
|
||||
assert!(r2.rows.is_empty());
|
||||
|
||||
let r3 = semijoin(&empty, &empty);
|
||||
assert_eq!(r3.columns, vec![col("X")]);
|
||||
assert!(r3.rows.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn natural_join_returns_empty_when_either_side_is_empty() {
|
||||
let nonempty = Relation::from_rows(vec![col("X")], vec![vec![int(1)]]);
|
||||
let empty = Relation::from_rows(vec![col("X")], vec![]);
|
||||
|
||||
let r1 = natural_join(&empty, &nonempty);
|
||||
assert_eq!(r1.columns, vec![col("X")]);
|
||||
assert!(r1.rows.is_empty());
|
||||
|
||||
let r2 = natural_join(&nonempty, &empty);
|
||||
assert_eq!(r2.columns, vec![col("X")]);
|
||||
assert!(r2.rows.is_empty());
|
||||
|
||||
let r3 = natural_join(&empty, &empty);
|
||||
assert_eq!(r3.columns, vec![col("X")]);
|
||||
assert!(r3.rows.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,6 +3,12 @@
|
||||
//! Every operator in this crate (after the initial atom scan) consumes and
|
||||
//! produces [`Relation`]s. Column names are variable names; a value at column
|
||||
//! `i` of a row is the value bound to variable `columns[i]` in that solution.
|
||||
//!
|
||||
//! Column names within a single relation must be unique. Constructors enforce
|
||||
//! this invariant; downstream operators rely on it when matching shared columns
|
||||
//! across two relations.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::value::Value;
|
||||
|
||||
@ -12,15 +18,46 @@ pub struct Relation {
|
||||
pub rows: Vec<Vec<Value>>,
|
||||
}
|
||||
|
||||
fn assert_unique_columns(columns: &[String]) {
|
||||
let mut seen: HashSet<&str> = HashSet::with_capacity(columns.len());
|
||||
for name in columns {
|
||||
assert!(
|
||||
seen.insert(name.as_str()),
|
||||
"duplicate column name in relation: {name}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl Relation {
|
||||
/// # Panics
|
||||
/// Panics if `columns` contains a duplicate name.
|
||||
#[must_use]
|
||||
pub fn new(columns: Vec<String>) -> Self {
|
||||
assert_unique_columns(&columns);
|
||||
Self {
|
||||
columns,
|
||||
rows: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if `columns` contains a duplicate name, or if any row's length
|
||||
/// differs from `columns.len()`.
|
||||
#[must_use]
|
||||
pub fn from_rows(columns: Vec<String>, rows: Vec<Vec<Value>>) -> Self {
|
||||
assert_unique_columns(&columns);
|
||||
let arity = columns.len();
|
||||
for (i, row) in rows.iter().enumerate() {
|
||||
assert_eq!(
|
||||
row.len(),
|
||||
arity,
|
||||
"row {i} arity mismatch: expected {arity}, got {}",
|
||||
row.len(),
|
||||
);
|
||||
}
|
||||
Self { columns, rows }
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if `row.len() != self.columns.len()`.
|
||||
pub fn push(&mut self, row: Vec<Value>) {
|
||||
@ -34,3 +71,20 @@ impl Relation {
|
||||
self.rows.push(row);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "duplicate column name")]
|
||||
fn from_rows_rejects_duplicate_column_names() {
|
||||
let _ = Relation::from_rows(vec!["X".to_string(), "X".to_string()], vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "duplicate column name")]
|
||||
fn new_rejects_duplicate_column_names() {
|
||||
let _ = Relation::new(vec!["X".to_string(), "X".to_string()]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,6 +20,21 @@ impl Table {
|
||||
}
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if any row's length differs from `arity`.
|
||||
#[must_use]
|
||||
pub fn from_rows(arity: usize, rows: Vec<Vec<Value>>) -> Self {
|
||||
for (i, row) in rows.iter().enumerate() {
|
||||
assert_eq!(
|
||||
row.len(),
|
||||
arity,
|
||||
"row {i} arity mismatch: expected {arity}, got {}",
|
||||
row.len(),
|
||||
);
|
||||
}
|
||||
Self { arity, rows }
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
/// Panics if `row.len() != self.arity`.
|
||||
pub fn push(&mut self, row: Vec<Value>) {
|
||||
|
||||
77
crates/query-ops/tests/hand_plan.rs
Normal file
77
crates/query-ops/tests/hand_plan.rs
Normal file
@ -0,0 +1,77 @@
|
||||
//! Hand-written query plans composed from `scan_atom`, `semijoin`, and `natural_join`.
|
||||
//!
|
||||
//! Schema:
|
||||
//! - `edge(src, dst)`: directed edges
|
||||
//! - `labeled(node)`: a set of labeled nodes
|
||||
//!
|
||||
//! Two rules are executed against the same fixture:
|
||||
//! - `Q1(X) :- edge(X, X), labeled(X).` (labeled self-loops)
|
||||
//! - `Q2(X, Y) :- edge(X, Y), labeled(Y).` (edges whose destination is labeled)
|
||||
|
||||
use query_ops::atom::{scan_atom, AtomPattern, Term};
|
||||
use query_ops::join::{natural_join, semijoin};
|
||||
use query_ops::table::Table;
|
||||
use query_ops::value::Value;
|
||||
|
||||
fn var(name: &str) -> Term {
|
||||
Term::Var(name.to_string())
|
||||
}
|
||||
|
||||
fn int(value: i64) -> Value {
|
||||
Value::Int(value)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn labeled_self_loops_and_edges_into_labeled_nodes() {
|
||||
let edge = Table::from_rows(
|
||||
2,
|
||||
vec![
|
||||
vec![int(1), int(2)],
|
||||
vec![int(2), int(3)],
|
||||
vec![int(3), int(3)],
|
||||
vec![int(4), int(1)],
|
||||
vec![int(2), int(2)],
|
||||
],
|
||||
);
|
||||
let labeled = Table::from_rows(1, vec![vec![int(2)], vec![int(3)]]);
|
||||
|
||||
let self_loops = scan_atom(
|
||||
&edge,
|
||||
&AtomPattern {
|
||||
columns: vec![var("X"), var("X")],
|
||||
},
|
||||
);
|
||||
let labeled_x = scan_atom(
|
||||
&labeled,
|
||||
&AtomPattern {
|
||||
columns: vec![var("X")],
|
||||
},
|
||||
);
|
||||
let q1 = semijoin(&self_loops, &labeled_x);
|
||||
assert_eq!(q1.columns, vec!["X".to_string()]);
|
||||
assert_eq!(q1.rows, vec![vec![int(3)], vec![int(2)]]);
|
||||
|
||||
let edge_xy = scan_atom(
|
||||
&edge,
|
||||
&AtomPattern {
|
||||
columns: vec![var("X"), var("Y")],
|
||||
},
|
||||
);
|
||||
let labeled_y = scan_atom(
|
||||
&labeled,
|
||||
&AtomPattern {
|
||||
columns: vec![var("Y")],
|
||||
},
|
||||
);
|
||||
let q2 = natural_join(&edge_xy, &labeled_y);
|
||||
assert_eq!(q2.columns, vec!["X".to_string(), "Y".to_string()]);
|
||||
assert_eq!(
|
||||
q2.rows,
|
||||
vec![
|
||||
vec![int(1), int(2)],
|
||||
vec![int(2), int(3)],
|
||||
vec![int(3), int(3)],
|
||||
vec![int(2), int(2)],
|
||||
],
|
||||
);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user