Add oblivious chase, broader SQL operators, LIMIT, and integer literals

This commit is contained in:
Hassan Abedi 2026-04-10 15:22:30 +02:00
parent be8e1388bc
commit 52cb492bce
16 changed files with 583 additions and 41 deletions

2
.gitignore vendored
View File

@ -77,9 +77,9 @@ tarpaulin-report.html
Cargo.lock Cargo.lock
# Misc # Misc
*.proptest-regressions
.DS_Store .DS_Store
.benchmarks .benchmarks
.env .env
.claude/ .claude/
*.proptest-regressions
.codex .codex

View File

@ -71,7 +71,7 @@ Quick examples:
- The chase engine should remain largely stateless; pass execution state explicitly. - The chase engine should remain largely stateless; pass execution state explicitly.
- New chase variants should be composable with existing infrastructure. - New chase variants should be composable with existing infrastructure.
- Existential variables generate labeled nulls (`Term::Null`). - Existential variables generate labeled nulls (`Term::Null`).
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY` over predicate-backed tables, equality predicates combined with `AND`, comma-join style multi-table queries, table aliases, and ordering by output-column names. - The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals.
- Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`. - Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`.
- Single-table SQL queries may use the table name as a qualifier when no alias is present. - Single-table SQL queries may use the table name as a qualifier when no alias is present.
- Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented. - Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented.

View File

@ -14,7 +14,7 @@ execution boundaries.
- Provenance-oriented explanations for derived answers - Provenance-oriented explanations for derived answers
- Script, REPL, and local web UI for experimentation - Script, REPL, and local web UI for experimentation
- Relational schema, catalog, logical-plan, and execution scaffolding - Relational schema, catalog, logical-plan, and execution scaffolding
- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY` queries over predicate-backed tables - A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY-LIMIT` queries over predicate-backed tables
### Architecture ### Architecture
@ -111,7 +111,7 @@ The repository now has a narrow SQL pipeline with:
- relational schemas, rows, and values - relational schemas, rows, and values
- SQL parsing for a small subset - SQL parsing for a small subset
- logical planning - logical planning
- execution for filtering, ordering, and basic multi-table joins - execution for filtering, ordering, limiting, and basic multi-table joins
Currently supported examples: Currently supported examples:
@ -119,9 +119,12 @@ Currently supported examples:
SELECT * FROM Parent SELECT * FROM Parent
SELECT c0 FROM Parent SELECT c0 FROM Parent
SELECT c0 FROM Parent WHERE c1 = 'bob' SELECT c0 FROM Parent WHERE c1 = 'bob'
SELECT c0 FROM Parent WHERE c1 != 'bob'
SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice' SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'
SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'carol'
SELECT c0 FROM Parent ORDER BY c0 DESC SELECT c0 FROM Parent ORDER BY c0 DESC
SELECT c0 AS parent_name, 'seed' AS label FROM Parent SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1
SELECT c0 AS parent_name, 'seed' AS label, 42 AS answer FROM Parent
SELECT Parent.parent, Ancestor.child SELECT Parent.parent, Ancestor.child
FROM Parent, Ancestor FROM Parent, Ancestor
WHERE Parent.child = Ancestor.parent WHERE Parent.child = Ancestor.parent
@ -172,8 +175,10 @@ Current limits:
- joins currently use comma-separated tables plus `WHERE` filtering - joins currently use comma-separated tables plus `WHERE` filtering
- multi-table queries require qualified column names such as `Parent.child` - multi-table queries require qualified column names such as `Parent.child`
- table aliases are supported via `FROM Parent AS p` - table aliases are supported via `FROM Parent AS p`
- `WHERE` supports equality predicates combined with `AND` - `WHERE` supports `=`, `!=`/`<>`, `AND`, and `OR` (with standard precedence)
- `ORDER BY` supports output-column ordering with `ASC`/`DESC` - `ORDER BY` supports output-column ordering with `ASC`/`DESC`
- `LIMIT` restricts the number of output rows
- literals include strings, integers, and `NULL`
- no aggregates - no aggregates
- projection aliases only via `AS` - projection aliases only via `AS`
@ -183,6 +188,7 @@ Runnable SQL examples:
- `examples/scripts/sql_join.ech` - `examples/scripts/sql_join.ech`
- `examples/scripts/sql_self_join.ech` - `examples/scripts/sql_self_join.ech`
- `examples/scripts/sql_order_by.ech` - `examples/scripts/sql_order_by.ech`
- `examples/scripts/sql_filter_ops.ech`
### Development ### Development

View File

@ -28,10 +28,13 @@ This document tracks the current state and next steps for the repository.
- [x] Minimal SQL AST and parser - [x] Minimal SQL AST and parser
- [x] Logical plan scaffolding - [x] Logical plan scaffolding
- [x] Logical-plan execution for the first SQL slice - [x] Logical-plan execution for the first SQL slice
- [x] `SELECT-FROM-WHERE-ORDER BY` support with positional or named columns - [x] `SELECT-FROM-WHERE-ORDER BY-LIMIT` support with positional or named columns
- [x] Basic multi-table SQL joins via qualified-column filtering - [x] Basic multi-table SQL joins via qualified-column filtering
- [x] Table aliases for self-joins and qualified references - [x] Table aliases for self-joins and qualified references
- [x] Basic `ORDER BY` support over output columns - [x] Basic `ORDER BY` support over output columns
- [x] `!=`/`<>` inequality and `OR` disjunction in `WHERE` clauses
- [x] `LIMIT` clause for restricting output row count
- [x] Integer literal and `DataType::Integer` support
### Near-Term Cleanup ### Near-Term Cleanup
@ -72,7 +75,7 @@ This document tracks the current state and next steps for the repository.
- [x] Restricted chase - [x] Restricted chase
- [x] Standard chase - [x] Standard chase
- [ ] Oblivious chase - [x] Oblivious chase
- [ ] Skolem chase - [ ] Skolem chase
- [ ] Core chase - [ ] Core chase
- [ ] Negative constraints - [ ] Negative constraints

View File

@ -0,0 +1,20 @@
# Demonstrate inequality, OR, LIMIT, and integer literals in the SQL frontend.
fact Employee(alice, 30, engineering).
fact Employee(bob, 25, sales).
fact Employee(carol, 35, engineering).
fact Employee(dave, 28, marketing).
schema Employee(name, age, dept).
# Inequality: exclude engineering.
sql SELECT name FROM Employee WHERE dept != 'engineering';
# OR: engineering or marketing.
sql SELECT name, dept FROM Employee WHERE dept = 'engineering' OR dept = 'marketing';
# LIMIT: first two rows in name order.
sql SELECT name FROM Employee ORDER BY name ASC LIMIT 2;
# Integer literal in projection.
sql SELECT name, 1 AS active FROM Employee WHERE dept = 'sales';

View File

@ -64,6 +64,12 @@ pub enum ChaseVariant {
/// rule with the same frontier variable bindings. This is the default. /// rule with the same frontier variable bindings. This is the default.
#[default] #[default]
Restricted, Restricted,
/// Oblivious chase: fires every matching rule application without checking
/// head satisfaction or tracking triggers. Terminates only when no body
/// match produces a genuinely new fact. For rules with existential
/// variables this variant will typically not terminate (it will hit the
/// step limit) because each application generates fresh nulls.
Oblivious,
} }
/// Configuration for the chase algorithm. /// Configuration for the chase algorithm.
@ -112,6 +118,26 @@ pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
chase_with_config(instance, rules, config) chase_with_config(instance, rules, config)
} }
/// Run the oblivious chase algorithm.
///
/// The oblivious chase fires every rule application whose body matches,
/// without checking whether the head is already satisfied and without
/// tracking previously applied triggers. It terminates only when a full
/// round produces no new facts.
///
/// For Datalog rules (no existential variables) the oblivious chase
/// reaches the same fixpoint as the restricted and standard variants.
/// For rules with existential variables it will typically not terminate
/// because each application generates fresh labeled nulls; in that case
/// it will run until the step limit.
pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
let config = ChaseConfig {
variant: ChaseVariant::Oblivious,
..Default::default()
};
chase_with_config(instance, rules, config)
}
/// Run the chase with custom configuration. /// Run the chase with custom configuration.
pub fn chase_with_config( pub fn chase_with_config(
mut instance: Instance, mut instance: Instance,
@ -137,6 +163,7 @@ pub fn chase_with_config(
ChaseVariant::Restricted => { ChaseVariant::Restricted => {
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers) restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
} }
ChaseVariant::Oblivious => oblivious_chase_step(&instance, rules, &mut null_gen),
}; };
if new_facts.is_empty() { if new_facts.is_empty() {
@ -231,6 +258,32 @@ fn restricted_chase_step(
new_facts new_facts
} }
/// Perform a single oblivious chase step: fire all matching rule applications
/// without checking head satisfaction or tracking triggers.
fn oblivious_chase_step(
instance: &Instance,
rules: &[Rule],
null_gen: &mut NullGenerator,
) -> Vec<Atom> {
let mut new_facts = Vec::new();
for rule in rules {
let matches = find_matches(instance, &rule.body);
for subst in matches {
let derived = apply_rule_head(rule, &subst, null_gen);
for fact in derived {
if !instance.contains(&fact) {
new_facts.push(fact);
}
}
}
}
new_facts
}
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings. /// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct EgdTrigger { struct EgdTrigger {
@ -329,6 +382,7 @@ pub fn chase_full(
ChaseVariant::Restricted => { ChaseVariant::Restricted => {
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers) restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
} }
ChaseVariant::Oblivious => oblivious_chase_step(&instance, tgds, &mut null_gen),
}; };
let tgd_changes = !new_facts.is_empty(); let tgd_changes = !new_facts.is_empty();
@ -796,4 +850,117 @@ mod tests {
assert!(result.error.is_none()); assert!(result.error.is_none());
assert_eq!(result.instance.facts_for_predicate("B").len(), 1); assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
} }
// Oblivious chase tests
#[test]
fn test_oblivious_chase_datalog_rules() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Parent", vec![Term::var("X"), Term::var("Y")])
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
.build();
let result = oblivious_chase(instance, &[rule1, rule2]);
assert!(result.terminated);
let ancestors = result.instance.facts_for_predicate("Ancestor");
assert_eq!(ancestors.len(), 3);
}
#[test]
fn test_oblivious_chase_matches_restricted_for_datalog() {
let instance: Instance = vec![
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Edge", vec![Term::var("X"), Term::var("Y")])
.then("Path", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Path", vec![Term::var("X"), Term::var("Y")])
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
.then("Path", vec![Term::var("X"), Term::var("Z")])
.build();
let rules = vec![rule1, rule2];
let oblivious_result = oblivious_chase(instance.clone(), &rules);
let restricted_result = chase(instance, &rules);
assert!(oblivious_result.terminated);
assert!(restricted_result.terminated);
let oblivious_paths = oblivious_result.instance.facts_for_predicate("Path");
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
assert_eq!(oblivious_paths.len(), restricted_paths.len());
}
#[test]
fn test_oblivious_chase_does_not_terminate_with_existentials() {
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
.build();
let config = ChaseConfig {
max_steps: 10,
variant: ChaseVariant::Oblivious,
};
let result = chase_with_config(instance, &[rule], config);
// The oblivious chase generates a fresh null each round, so it
// should hit the step limit rather than reaching a fixpoint.
assert!(!result.terminated);
assert!(result.instance.facts_for_predicate("HasSSN").len() > 1);
}
#[test]
fn test_oblivious_chase_via_config() {
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("A", vec![Term::var("X")])
.then("B", vec![Term::var("X")])
.build();
let config = ChaseConfig {
variant: ChaseVariant::Oblivious,
..Default::default()
};
let result = chase_with_config(instance, &[rule], config);
assert!(result.terminated);
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
}
} }

View File

@ -13,7 +13,7 @@ mod engine;
pub use atom::Atom; pub use atom::Atom;
pub use engine::{ pub use engine::{
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
chase_with_egds, standard_chase, chase_with_egds, oblivious_chase, standard_chase,
}; };
pub use inference::{Derivation, MaterializedState, find_matches, materialize}; pub use inference::{Derivation, MaterializedState, find_matches, materialize};
pub use instance::{Instance, InstanceError}; pub use instance::{Instance, InstanceError};

View File

@ -101,6 +101,11 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys)); rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys));
Ok(ResultSet::new(schema.clone(), rows)) Ok(ResultSet::new(schema.clone(), rows))
} }
LogicalPlan::Limit { input, count } => {
let result = execute(input, instance)?;
let rows = result.rows().iter().take(*count).cloned().collect();
Ok(ResultSet::new(result.schema().clone(), rows))
}
} }
} }
@ -113,9 +118,16 @@ fn eval_predicate(
LogicalExpr::Eq(left, right) => Ok(eval_expr(left, row, schema)? LogicalExpr::Eq(left, right) => Ok(eval_expr(left, row, schema)?
.sql_eq(&eval_expr(right, row, schema)?) .sql_eq(&eval_expr(right, row, schema)?)
.unwrap_or(false)), .unwrap_or(false)),
LogicalExpr::Ne(left, right) => Ok(eval_expr(left, row, schema)?
.sql_eq(&eval_expr(right, row, schema)?)
.map(|eq| !eq)
.unwrap_or(false)),
LogicalExpr::And(left, right) => { LogicalExpr::And(left, right) => {
Ok(eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?) Ok(eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?)
} }
LogicalExpr::Or(left, right) => {
Ok(eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?)
}
_ => Ok(false), _ => Ok(false),
} }
} }
@ -138,9 +150,19 @@ fn eval_expr(
let right = eval_expr(right, row, schema)?; let right = eval_expr(right, row, schema)?;
Ok(Value::Boolean(left.sql_eq(&right).unwrap_or(false))) Ok(Value::Boolean(left.sql_eq(&right).unwrap_or(false)))
} }
LogicalExpr::Ne(left, right) => {
let left = eval_expr(left, row, schema)?;
let right = eval_expr(right, row, schema)?;
Ok(Value::Boolean(
left.sql_eq(&right).map(|eq| !eq).unwrap_or(false),
))
}
LogicalExpr::And(left, right) => Ok(Value::Boolean( LogicalExpr::And(left, right) => Ok(Value::Boolean(
eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?, eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?,
)), )),
LogicalExpr::Or(left, right) => Ok(Value::Boolean(
eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?,
)),
} }
} }
@ -188,7 +210,11 @@ fn compare_values(left: &Value, right: &Value) -> Ordering {
(Value::Null, _) => Ordering::Greater, (Value::Null, _) => Ordering::Greater,
(_, Value::Null) => Ordering::Less, (_, Value::Null) => Ordering::Less,
(Value::Text(left), Value::Text(right)) => left.cmp(right), (Value::Text(left), Value::Text(right)) => left.cmp(right),
(Value::Integer(left), Value::Integer(right)) => left.cmp(right),
(Value::Boolean(left), Value::Boolean(right)) => left.cmp(right), (Value::Boolean(left), Value::Boolean(right)) => left.cmp(right),
// Cross-type ordering: Integer < Text < Boolean
(Value::Integer(_), _) => Ordering::Less,
(_, Value::Integer(_)) => Ordering::Greater,
(Value::Text(_), Value::Boolean(_)) => Ordering::Less, (Value::Text(_), Value::Boolean(_)) => Ordering::Less,
(Value::Boolean(_), Value::Text(_)) => Ordering::Greater, (Value::Boolean(_), Value::Text(_)) => Ordering::Greater,
} }

View File

@ -18,5 +18,5 @@ pub mod sql;
// Lower-level reasoning and provenance APIs remain under `query_engine::chase`. // Lower-level reasoning and provenance APIs remain under `query_engine::chase`.
pub use chase::{ pub use chase::{
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term, Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term,
chase, chase_with_config, standard_chase, chase, chase_with_config, oblivious_chase, standard_chase,
}; };

View File

@ -18,8 +18,12 @@ pub enum LogicalExpr {
Literal(Value), Literal(Value),
/// Equality. /// Equality.
Eq(Box<LogicalExpr>, Box<LogicalExpr>), Eq(Box<LogicalExpr>, Box<LogicalExpr>),
/// Inequality.
Ne(Box<LogicalExpr>, Box<LogicalExpr>),
/// Boolean conjunction. /// Boolean conjunction.
And(Box<LogicalExpr>, Box<LogicalExpr>), And(Box<LogicalExpr>, Box<LogicalExpr>),
/// Boolean disjunction.
Or(Box<LogicalExpr>, Box<LogicalExpr>),
} }
/// A named output expression in a projection. /// A named output expression in a projection.
@ -68,6 +72,11 @@ pub enum LogicalPlan {
expressions: Vec<NamedExpr>, expressions: Vec<NamedExpr>,
schema: Schema, schema: Schema,
}, },
/// Limit the number of output rows.
Limit {
input: Box<LogicalPlan>,
count: usize,
},
} }
impl LogicalPlan { impl LogicalPlan {
@ -79,6 +88,7 @@ impl LogicalPlan {
Self::Filter { input, .. } => input.output_schema(), Self::Filter { input, .. } => input.output_schema(),
Self::Sort { schema, .. } => schema, Self::Sort { schema, .. } => schema,
Self::Project { schema, .. } => schema, Self::Project { schema, .. } => schema,
Self::Limit { input, .. } => input.output_schema(),
} }
} }
} }

View File

@ -80,11 +80,7 @@ pub fn plan_select(
}; };
} }
if is_wildcard_projection(&select.projection) { if !is_wildcard_projection(&select.projection) {
let output_schema = plan.output_schema().clone();
return maybe_apply_sort(plan, output_schema, &select.order_by, &select.from);
}
let mut expressions = Vec::new(); let mut expressions = Vec::new();
let mut fields = Vec::new(); let mut fields = Vec::new();
for (index, item) in select.projection.iter().enumerate() { for (index, item) in select.projection.iter().enumerate() {
@ -94,7 +90,8 @@ pub fn plan_select(
let output_name = alias let output_name = alias
.clone() .clone()
.unwrap_or_else(|| default_projection_name(expr, index + 1)); .unwrap_or_else(|| default_projection_name(expr, index + 1));
let (data_type, nullable) = projection_metadata(expr, &input_schema, &select.from)?; let (data_type, nullable) =
projection_metadata(expr, &input_schema, &select.from)?;
expressions.push(NamedExpr { expressions.push(NamedExpr {
name: output_name.clone(), name: output_name.clone(),
expr: planned_expr, expr: planned_expr,
@ -105,14 +102,24 @@ pub fn plan_select(
} }
} }
let plan = LogicalPlan::Project { plan = LogicalPlan::Project {
input: Box::new(plan), input: Box::new(plan),
expressions, expressions,
schema: Schema::new(fields), schema: Schema::new(fields),
}; };
}
let output_schema = plan.output_schema().clone(); let output_schema = plan.output_schema().clone();
maybe_apply_sort(plan, output_schema, &select.order_by, &select.from) plan = maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)?;
if let Some(count) = select.limit {
plan = LogicalPlan::Limit {
input: Box::new(plan),
count,
};
}
Ok(plan)
} }
fn is_wildcard_projection(items: &[SelectItem]) -> bool { fn is_wildcard_projection(items: &[SelectItem]) -> bool {
@ -182,10 +189,18 @@ fn plan_expr(
Box::new(plan_expr(left, schema, tables)?), Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?), Box::new(plan_expr(right, schema, tables)?),
)), )),
BinaryOp::Ne => Ok(LogicalExpr::Ne(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
BinaryOp::And => Ok(LogicalExpr::And( BinaryOp::And => Ok(LogicalExpr::And(
Box::new(plan_expr(left, schema, tables)?), Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?), Box::new(plan_expr(right, schema, tables)?),
)), )),
BinaryOp::Or => Ok(LogicalExpr::Or(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
}, },
} }
} }
@ -226,6 +241,7 @@ fn maybe_apply_sort(
fn plan_literal(literal: &Literal) -> Value { fn plan_literal(literal: &Literal) -> Value {
match literal { match literal {
Literal::String(value) => Value::text(value.clone()), Literal::String(value) => Value::text(value.clone()),
Literal::Integer(n) => Value::Integer(*n),
Literal::Null => Value::Null, Literal::Null => Value::Null,
} }
} }
@ -245,6 +261,7 @@ fn projection_metadata(
Ok((field.data_type().clone(), field.nullable())) Ok((field.data_type().clone(), field.nullable()))
} }
Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)), Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)),
Expr::Literal(Literal::Integer(_)) => Ok((DataType::Integer, false)),
Expr::Literal(Literal::Null) => Ok((DataType::Text, true)), Expr::Literal(Literal::Null) => Ok((DataType::Text, true)),
Expr::Binary { .. } => Ok((DataType::Boolean, true)), Expr::Binary { .. } => Ok((DataType::Boolean, true)),
} }
@ -550,6 +567,7 @@ mod tests {
}], }],
selection: None, selection: None,
order_by: Vec::new(), order_by: Vec::new(),
limit: None,
}; };
let error = plan_select(&malformed, &catalog).unwrap_err(); let error = plan_select(&malformed, &catalog).unwrap_err();
assert_eq!( assert_eq!(

View File

@ -5,6 +5,8 @@ use std::fmt;
pub enum DataType { pub enum DataType {
/// UTF-8 text values. /// UTF-8 text values.
Text, Text,
/// 64-bit signed integer values.
Integer,
/// Boolean values. /// Boolean values.
Boolean, Boolean,
} }

View File

@ -5,6 +5,8 @@ use std::fmt;
pub enum Value { pub enum Value {
/// Textual data. /// Textual data.
Text(String), Text(String),
/// Integer data.
Integer(i64),
/// Boolean data. /// Boolean data.
Boolean(bool), Boolean(bool),
/// SQL-style null. /// SQL-style null.
@ -29,8 +31,9 @@ impl Value {
match (self, other) { match (self, other) {
(Self::Null, _) | (_, Self::Null) => None, (Self::Null, _) | (_, Self::Null) => None,
(Self::Text(left), Self::Text(right)) => Some(left == right), (Self::Text(left), Self::Text(right)) => Some(left == right),
(Self::Integer(left), Self::Integer(right)) => Some(left == right),
(Self::Boolean(left), Self::Boolean(right)) => Some(left == right), (Self::Boolean(left), Self::Boolean(right)) => Some(left == right),
(Self::Text(_), Self::Boolean(_)) | (Self::Boolean(_), Self::Text(_)) => Some(false), _ => Some(false),
} }
} }
} }
@ -39,6 +42,7 @@ impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Text(value) => write!(f, "{}", value), Self::Text(value) => write!(f, "{}", value),
Self::Integer(value) => write!(f, "{}", value),
Self::Boolean(value) => write!(f, "{}", value), Self::Boolean(value) => write!(f, "{}", value),
Self::Null => write!(f, "NULL"), Self::Null => write!(f, "NULL"),
} }

View File

@ -1,4 +1,4 @@
/// A parsed `SELECT-FROM-WHERE-ORDER BY` statement in the current SQL subset. /// A parsed `SELECT-FROM-WHERE-ORDER BY-LIMIT` statement in the current SQL subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Select { pub struct Select {
/// Output expressions requested by the query. /// Output expressions requested by the query.
@ -9,6 +9,8 @@ pub struct Select {
pub selection: Option<Expr>, pub selection: Option<Expr>,
/// Optional output ordering. /// Optional output ordering.
pub order_by: Vec<OrderByItem>, pub order_by: Vec<OrderByItem>,
/// Optional row limit.
pub limit: Option<usize>,
} }
/// One source entry in a `FROM` list. /// One source entry in a `FROM` list.
@ -58,6 +60,8 @@ pub enum Expr {
pub enum Literal { pub enum Literal {
/// A string literal. /// A string literal.
String(String), String(String),
/// An integer literal.
Integer(i64),
/// The `NULL` literal. /// The `NULL` literal.
Null, Null,
} }
@ -67,8 +71,12 @@ pub enum Literal {
pub enum BinaryOp { pub enum BinaryOp {
/// Equality. /// Equality.
Eq, Eq,
/// Inequality.
Ne,
/// Boolean conjunction. /// Boolean conjunction.
And, And,
/// Boolean disjunction.
Or,
} }
/// Sort direction for `ORDER BY`. /// Sort direction for `ORDER BY`.

View File

@ -43,16 +43,20 @@ enum Token {
Where, Where,
As, As,
And, And,
Or,
Order, Order,
By, By,
Asc, Asc,
Desc, Desc,
Null, Null,
Limit,
Identifier(String), Identifier(String),
String(String), String(String),
Integer(usize),
Star, Star,
Comma, Comma,
Eq, Eq,
Ne,
} }
/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset. /// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset.
@ -91,6 +95,13 @@ impl Parser {
Vec::new() Vec::new()
}; };
let limit = if self.peek() == Some(&Token::Limit) {
self.index += 1;
Some(self.expect_integer()?)
} else {
None
};
if let Some(token) = self.peek() { if let Some(token) = self.peek() {
return Err(ParseError::UnexpectedToken(render_token(token))); return Err(ParseError::UnexpectedToken(render_token(token)));
} }
@ -100,6 +111,7 @@ impl Parser {
from, from,
selection, selection,
order_by, order_by,
limit,
}) })
} }
@ -166,6 +178,22 @@ impl Parser {
} }
fn parse_expr(&mut self) -> Result<Expr, ParseError> { fn parse_expr(&mut self) -> Result<Expr, ParseError> {
let mut expr = self.parse_and()?;
while self.peek() == Some(&Token::Or) {
self.index += 1;
let right = self.parse_and()?;
expr = Expr::Binary {
left: Box::new(expr),
op: BinaryOp::Or,
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_and(&mut self) -> Result<Expr, ParseError> {
let mut expr = self.parse_equality()?; let mut expr = self.parse_equality()?;
while self.peek() == Some(&Token::And) { while self.peek() == Some(&Token::And) {
@ -220,6 +248,14 @@ impl Parser {
right: Box::new(right), right: Box::new(right),
}) })
} }
Token::Ne => {
let right = self.parse_operand()?;
Ok(Expr::Binary {
left: Box::new(left),
op: BinaryOp::Ne,
right: Box::new(right),
})
}
other => Err(ParseError::UnexpectedToken(render_token(&other))), other => Err(ParseError::UnexpectedToken(render_token(&other))),
} }
} }
@ -228,6 +264,7 @@ impl Parser {
match self.next().ok_or(ParseError::UnexpectedEnd)? { match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(Expr::Identifier(name)), Token::Identifier(name) => Ok(Expr::Identifier(name)),
Token::String(value) => Ok(Expr::Literal(Literal::String(value))), Token::String(value) => Ok(Expr::Literal(Literal::String(value))),
Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n as i64))),
Token::Null => Ok(Expr::Literal(Literal::Null)), Token::Null => Ok(Expr::Literal(Literal::Null)),
other => Err(ParseError::UnexpectedToken(render_token(&other))), other => Err(ParseError::UnexpectedToken(render_token(&other))),
} }
@ -249,6 +286,13 @@ impl Parser {
} }
} }
fn expect_integer(&mut self) -> Result<usize, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Integer(n) => Ok(n),
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn peek(&self) -> Option<&Token> { fn peek(&self) -> Option<&Token> {
self.tokens.get(self.index) self.tokens.get(self.index)
} }
@ -281,11 +325,33 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
chars.next(); chars.next();
tokens.push(Token::Comma); tokens.push(Token::Comma);
} }
'!' => {
chars.next();
if chars.peek() == Some(&'=') {
chars.next();
tokens.push(Token::Ne);
} else {
return Err(ParseError::UnexpectedToken("!".to_string()));
}
}
'<' => {
chars.next();
if chars.peek() == Some(&'>') {
chars.next();
tokens.push(Token::Ne);
} else {
return Err(ParseError::UnexpectedToken("<".to_string()));
}
}
'=' => { '=' => {
chars.next(); chars.next();
tokens.push(Token::Eq); tokens.push(Token::Eq);
} }
'\'' => tokens.push(Token::String(parse_string(&mut chars)?)), '\'' => tokens.push(Token::String(parse_string(&mut chars)?)),
ch if ch.is_ascii_digit() => {
let number = parse_integer(&mut chars);
tokens.push(Token::Integer(number));
}
ch if is_identifier_start(ch) => { ch if is_identifier_start(ch) => {
let ident = parse_identifier(&mut chars); let ident = parse_identifier(&mut chars);
let token = match ident.to_ascii_uppercase().as_str() { let token = match ident.to_ascii_uppercase().as_str() {
@ -294,11 +360,13 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
"WHERE" => Token::Where, "WHERE" => Token::Where,
"AS" => Token::As, "AS" => Token::As,
"AND" => Token::And, "AND" => Token::And,
"OR" => Token::Or,
"ORDER" => Token::Order, "ORDER" => Token::Order,
"BY" => Token::By, "BY" => Token::By,
"ASC" => Token::Asc, "ASC" => Token::Asc,
"DESC" => Token::Desc, "DESC" => Token::Desc,
"NULL" => Token::Null, "NULL" => Token::Null,
"LIMIT" => Token::Limit,
_ => Token::Identifier(ident), _ => Token::Identifier(ident),
}; };
tokens.push(token); tokens.push(token);
@ -351,8 +419,24 @@ where
ident ident
} }
fn parse_integer<I>(chars: &mut std::iter::Peekable<I>) -> usize
where
I: Iterator<Item = char>,
{
let mut value: usize = 0;
while let Some(ch) = chars.peek().copied() {
if ch.is_ascii_digit() {
value = value * 10 + (ch as usize - '0' as usize);
chars.next();
} else {
break;
}
}
value
}
fn is_identifier_start(ch: char) -> bool { fn is_identifier_start(ch: char) -> bool {
ch.is_ascii_alphanumeric() || ch == '_' ch.is_ascii_alphabetic() || ch == '_'
} }
fn is_identifier_part(ch: char) -> bool { fn is_identifier_part(ch: char) -> bool {
@ -366,16 +450,20 @@ fn render_token(token: &Token) -> String {
Token::Where => "WHERE".to_string(), Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(), Token::As => "AS".to_string(),
Token::And => "AND".to_string(), Token::And => "AND".to_string(),
Token::Or => "OR".to_string(),
Token::Order => "ORDER".to_string(), Token::Order => "ORDER".to_string(),
Token::By => "BY".to_string(), Token::By => "BY".to_string(),
Token::Asc => "ASC".to_string(), Token::Asc => "ASC".to_string(),
Token::Desc => "DESC".to_string(), Token::Desc => "DESC".to_string(),
Token::Null => "NULL".to_string(), Token::Null => "NULL".to_string(),
Token::Limit => "LIMIT".to_string(),
Token::Identifier(name) => name.clone(), Token::Identifier(name) => name.clone(),
Token::Integer(n) => n.to_string(),
Token::String(value) => format!("'{}'", value), Token::String(value) => format!("'{}'", value),
Token::Star => "*".to_string(), Token::Star => "*".to_string(),
Token::Comma => ",".to_string(), Token::Comma => ",".to_string(),
Token::Eq => "=".to_string(), Token::Eq => "=".to_string(),
Token::Ne => "!=".to_string(),
} }
} }
@ -536,4 +624,115 @@ mod tests {
let error = parse_select("SELECT *, c0 FROM Parent").unwrap_err(); let error = parse_select("SELECT *, c0 FROM Parent").unwrap_err();
assert_eq!(error, ParseError::MixedWildcardProjection); assert_eq!(error, ParseError::MixedWildcardProjection);
} }
#[test]
fn parses_not_equal_with_bang_eq() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Ne,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
})
);
}
#[test]
fn parses_not_equal_with_diamond() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 <> 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Ne,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
})
);
}
#[test]
fn parses_or_expression() {
let select =
parse_select("SELECT c0 FROM Parent WHERE c0 = 'alice' OR c0 = 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("alice".to_string()))),
}),
op: BinaryOp::Or,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
}),
})
);
}
#[test]
fn parses_integer_literal_in_expression() {
let select = parse_select("SELECT c0 FROM Parent WHERE c0 = 42").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(42))),
})
);
}
#[test]
fn parses_limit_clause() {
let select = parse_select("SELECT c0 FROM Parent LIMIT 5").unwrap();
assert_eq!(select.limit, Some(5));
}
#[test]
fn parses_order_by_with_limit() {
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC LIMIT 1").unwrap();
assert_eq!(select.order_by.len(), 1);
assert_eq!(select.limit, Some(1));
}
#[test]
fn parses_or_with_and_precedence() {
// AND binds tighter than OR: a = '1' OR b = '2' AND c = '3'
// should parse as: a = '1' OR (b = '2' AND c = '3')
let select =
parse_select("SELECT c0 FROM Parent WHERE c0 = '1' OR c1 = '2' AND c0 = '3'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("1".to_string()))),
}),
op: BinaryOp::Or,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("2".to_string()))),
}),
op: BinaryOp::And,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("3".to_string()))),
}),
}),
})
);
}
} }

View File

@ -250,3 +250,82 @@ fn select_order_by_desc_sorts_rows() {
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice"); assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice");
} }
#[test]
fn select_integer_literal_in_projection() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0, 42 AS answer FROM Parent").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.schema().len(), 2);
assert_eq!(result.schema().fields()[1].name(), "answer");
assert_eq!(
result.schema().fields()[1].data_type(),
&query_engine::relational::DataType::Integer
);
assert_eq!(result.rows().len(), 2);
assert_eq!(format!("{}", result.rows()[0].values()[1]), "42");
}
#[test]
fn select_limit_restricts_row_count() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
}
#[test]
fn select_where_or_matches_either_condition() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
Atom::new(
"Parent",
vec![Term::constant("carol"), Term::constant("dave")],
),
]
.into_iter()
.collect();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'dave'").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 2);
let mut values = result
.rows()
.iter()
.map(|row| format!("{}", row.values()[0]))
.collect::<Vec<_>>();
values.sort();
assert_eq!(values, vec!["alice".to_string(), "carol".to_string()]);
}
#[test]
fn select_where_not_equal_excludes_matching_rows() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
}