From 52cb492bce4286bb25878545183d1a7cbf96f079 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Fri, 10 Apr 2026 15:22:30 +0200 Subject: [PATCH] Add oblivious chase, broader SQL operators, LIMIT, and integer literals --- .gitignore | 2 +- AGENTS.md | 2 +- README.md | 14 +- ROADMAP.md | 7 +- examples/scripts/sql_filter_ops.ech | 20 +++ src/chase/engine.rs | 167 +++++++++++++++++++++++ src/chase/mod.rs | 2 +- src/execution/mod.rs | 26 ++++ src/lib.rs | 2 +- src/planner/logical.rs | 10 ++ src/planner/sql.rs | 74 ++++++---- src/relational/schema.rs | 2 + src/relational/value.rs | 6 +- src/sql/ast.rs | 10 +- src/sql/parser.rs | 201 +++++++++++++++++++++++++++- tests/sql_pipeline_tests.rs | 79 +++++++++++ 16 files changed, 583 insertions(+), 41 deletions(-) create mode 100644 examples/scripts/sql_filter_ops.ech diff --git a/.gitignore b/.gitignore index 7f61c77..04b2bed 100644 --- a/.gitignore +++ b/.gitignore @@ -77,9 +77,9 @@ tarpaulin-report.html Cargo.lock # Misc +*.proptest-regressions .DS_Store .benchmarks .env .claude/ -*.proptest-regressions .codex diff --git a/AGENTS.md b/AGENTS.md index daac1f4..968c8e4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -71,7 +71,7 @@ Quick examples: - The chase engine should remain largely stateless; pass execution state explicitly. - New chase variants should be composable with existing infrastructure. - Existential variables generate labeled nulls (`Term::Null`). -- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY` over predicate-backed tables, equality predicates combined with `AND`, comma-join style multi-table queries, table aliases, and ordering by output-column names. +- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals. - Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`. - Single-table SQL queries may use the table name as a qualifier when no alias is present. - Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented. diff --git a/README.md b/README.md index 026aa8f..ddd2b2d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ execution boundaries. - Provenance-oriented explanations for derived answers - Script, REPL, and local web UI for experimentation - Relational schema, catalog, logical-plan, and execution scaffolding -- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY` queries over predicate-backed tables +- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY-LIMIT` queries over predicate-backed tables ### Architecture @@ -111,7 +111,7 @@ The repository now has a narrow SQL pipeline with: - relational schemas, rows, and values - SQL parsing for a small subset - logical planning -- execution for filtering, ordering, and basic multi-table joins +- execution for filtering, ordering, limiting, and basic multi-table joins Currently supported examples: @@ -119,9 +119,12 @@ Currently supported examples: SELECT * FROM Parent SELECT c0 FROM Parent SELECT c0 FROM Parent WHERE c1 = 'bob' +SELECT c0 FROM Parent WHERE c1 != 'bob' SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice' +SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'carol' SELECT c0 FROM Parent ORDER BY c0 DESC -SELECT c0 AS parent_name, 'seed' AS label FROM Parent +SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1 +SELECT c0 AS parent_name, 'seed' AS label, 42 AS answer FROM Parent SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent @@ -172,8 +175,10 @@ Current limits: - joins currently use comma-separated tables plus `WHERE` filtering - multi-table queries require qualified column names such as `Parent.child` - table aliases are supported via `FROM Parent AS p` -- `WHERE` supports equality predicates combined with `AND` +- `WHERE` supports `=`, `!=`/`<>`, `AND`, and `OR` (with standard precedence) - `ORDER BY` supports output-column ordering with `ASC`/`DESC` +- `LIMIT` restricts the number of output rows +- literals include strings, integers, and `NULL` - no aggregates - projection aliases only via `AS` @@ -183,6 +188,7 @@ Runnable SQL examples: - `examples/scripts/sql_join.ech` - `examples/scripts/sql_self_join.ech` - `examples/scripts/sql_order_by.ech` +- `examples/scripts/sql_filter_ops.ech` ### Development diff --git a/ROADMAP.md b/ROADMAP.md index d9ccc6e..f8761b6 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -28,10 +28,13 @@ This document tracks the current state and next steps for the repository. - [x] Minimal SQL AST and parser - [x] Logical plan scaffolding - [x] Logical-plan execution for the first SQL slice -- [x] `SELECT-FROM-WHERE-ORDER BY` support with positional or named columns +- [x] `SELECT-FROM-WHERE-ORDER BY-LIMIT` support with positional or named columns - [x] Basic multi-table SQL joins via qualified-column filtering - [x] Table aliases for self-joins and qualified references - [x] Basic `ORDER BY` support over output columns +- [x] `!=`/`<>` inequality and `OR` disjunction in `WHERE` clauses +- [x] `LIMIT` clause for restricting output row count +- [x] Integer literal and `DataType::Integer` support ### Near-Term Cleanup @@ -72,7 +75,7 @@ This document tracks the current state and next steps for the repository. - [x] Restricted chase - [x] Standard chase -- [ ] Oblivious chase +- [x] Oblivious chase - [ ] Skolem chase - [ ] Core chase - [ ] Negative constraints diff --git a/examples/scripts/sql_filter_ops.ech b/examples/scripts/sql_filter_ops.ech new file mode 100644 index 0000000..11a03f3 --- /dev/null +++ b/examples/scripts/sql_filter_ops.ech @@ -0,0 +1,20 @@ +# Demonstrate inequality, OR, LIMIT, and integer literals in the SQL frontend. + +fact Employee(alice, 30, engineering). +fact Employee(bob, 25, sales). +fact Employee(carol, 35, engineering). +fact Employee(dave, 28, marketing). + +schema Employee(name, age, dept). + +# Inequality: exclude engineering. +sql SELECT name FROM Employee WHERE dept != 'engineering'; + +# OR: engineering or marketing. +sql SELECT name, dept FROM Employee WHERE dept = 'engineering' OR dept = 'marketing'; + +# LIMIT: first two rows in name order. +sql SELECT name FROM Employee ORDER BY name ASC LIMIT 2; + +# Integer literal in projection. +sql SELECT name, 1 AS active FROM Employee WHERE dept = 'sales'; diff --git a/src/chase/engine.rs b/src/chase/engine.rs index d0f29c2..88cd2f8 100644 --- a/src/chase/engine.rs +++ b/src/chase/engine.rs @@ -64,6 +64,12 @@ pub enum ChaseVariant { /// rule with the same frontier variable bindings. This is the default. #[default] Restricted, + /// Oblivious chase: fires every matching rule application without checking + /// head satisfaction or tracking triggers. Terminates only when no body + /// match produces a genuinely new fact. For rules with existential + /// variables this variant will typically not terminate (it will hit the + /// step limit) because each application generates fresh nulls. + Oblivious, } /// Configuration for the chase algorithm. @@ -112,6 +118,26 @@ pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult { chase_with_config(instance, rules, config) } +/// Run the oblivious chase algorithm. +/// +/// The oblivious chase fires every rule application whose body matches, +/// without checking whether the head is already satisfied and without +/// tracking previously applied triggers. It terminates only when a full +/// round produces no new facts. +/// +/// For Datalog rules (no existential variables) the oblivious chase +/// reaches the same fixpoint as the restricted and standard variants. +/// For rules with existential variables it will typically not terminate +/// because each application generates fresh labeled nulls; in that case +/// it will run until the step limit. +pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult { + let config = ChaseConfig { + variant: ChaseVariant::Oblivious, + ..Default::default() + }; + chase_with_config(instance, rules, config) +} + /// Run the chase with custom configuration. pub fn chase_with_config( mut instance: Instance, @@ -137,6 +163,7 @@ pub fn chase_with_config( ChaseVariant::Restricted => { restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers) } + ChaseVariant::Oblivious => oblivious_chase_step(&instance, rules, &mut null_gen), }; if new_facts.is_empty() { @@ -231,6 +258,32 @@ fn restricted_chase_step( new_facts } +/// Perform a single oblivious chase step: fire all matching rule applications +/// without checking head satisfaction or tracking triggers. +fn oblivious_chase_step( + instance: &Instance, + rules: &[Rule], + null_gen: &mut NullGenerator, +) -> Vec { + let mut new_facts = Vec::new(); + + for rule in rules { + let matches = find_matches(instance, &rule.body); + + for subst in matches { + let derived = apply_rule_head(rule, &subst, null_gen); + + for fact in derived { + if !instance.contains(&fact) { + new_facts.push(fact); + } + } + } + } + + new_facts +} + /// A trigger for EGD applications, tracking which EGD was applied with which body bindings. #[derive(Debug, Clone, PartialEq, Eq, Hash)] struct EgdTrigger { @@ -329,6 +382,7 @@ pub fn chase_full( ChaseVariant::Restricted => { restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers) } + ChaseVariant::Oblivious => oblivious_chase_step(&instance, tgds, &mut null_gen), }; let tgd_changes = !new_facts.is_empty(); @@ -796,4 +850,117 @@ mod tests { assert!(result.error.is_none()); assert_eq!(result.instance.facts_for_predicate("B").len(), 1); } + + // Oblivious chase tests + + #[test] + fn test_oblivious_chase_datalog_rules() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + + let rule1 = RuleBuilder::new() + .when("Parent", vec![Term::var("X"), Term::var("Y")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let rule2 = RuleBuilder::new() + .when("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .when("Parent", vec![Term::var("Y"), Term::var("Z")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Z")]) + .build(); + + let result = oblivious_chase(instance, &[rule1, rule2]); + + assert!(result.terminated); + let ancestors = result.instance.facts_for_predicate("Ancestor"); + assert_eq!(ancestors.len(), 3); + } + + #[test] + fn test_oblivious_chase_matches_restricted_for_datalog() { + let instance: Instance = vec![ + Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]), + Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]), + Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]), + ] + .into_iter() + .collect(); + + let rule1 = RuleBuilder::new() + .when("Edge", vec![Term::var("X"), Term::var("Y")]) + .then("Path", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let rule2 = RuleBuilder::new() + .when("Path", vec![Term::var("X"), Term::var("Y")]) + .when("Edge", vec![Term::var("Y"), Term::var("Z")]) + .then("Path", vec![Term::var("X"), Term::var("Z")]) + .build(); + + let rules = vec![rule1, rule2]; + + let oblivious_result = oblivious_chase(instance.clone(), &rules); + let restricted_result = chase(instance, &rules); + + assert!(oblivious_result.terminated); + assert!(restricted_result.terminated); + + let oblivious_paths = oblivious_result.instance.facts_for_predicate("Path"); + let restricted_paths = restricted_result.instance.facts_for_predicate("Path"); + assert_eq!(oblivious_paths.len(), restricted_paths.len()); + } + + #[test] + fn test_oblivious_chase_does_not_terminate_with_existentials() { + let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])] + .into_iter() + .collect(); + + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasSSN", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let config = ChaseConfig { + max_steps: 10, + variant: ChaseVariant::Oblivious, + }; + let result = chase_with_config(instance, &[rule], config); + + // The oblivious chase generates a fresh null each round, so it + // should hit the step limit rather than reaching a fixpoint. + assert!(!result.terminated); + assert!(result.instance.facts_for_predicate("HasSSN").len() > 1); + } + + #[test] + fn test_oblivious_chase_via_config() { + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + let rule = RuleBuilder::new() + .when("A", vec![Term::var("X")]) + .then("B", vec![Term::var("X")]) + .build(); + + let config = ChaseConfig { + variant: ChaseVariant::Oblivious, + ..Default::default() + }; + let result = chase_with_config(instance, &[rule], config); + + assert!(result.terminated); + assert_eq!(result.instance.facts_for_predicate("B").len(), 1); + } } diff --git a/src/chase/mod.rs b/src/chase/mod.rs index 8e04578..48ad544 100644 --- a/src/chase/mod.rs +++ b/src/chase/mod.rs @@ -13,7 +13,7 @@ mod engine; pub use atom::Atom; pub use engine::{ ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config, - chase_with_egds, standard_chase, + chase_with_egds, oblivious_chase, standard_chase, }; pub use inference::{Derivation, MaterializedState, find_matches, materialize}; pub use instance::{Instance, InstanceError}; diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 8fe09ca..df3a55d 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -101,6 +101,11 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { + let result = execute(input, instance)?; + let rows = result.rows().iter().take(*count).cloned().collect(); + Ok(ResultSet::new(result.schema().clone(), rows)) + } } } @@ -113,9 +118,16 @@ fn eval_predicate( LogicalExpr::Eq(left, right) => Ok(eval_expr(left, row, schema)? .sql_eq(&eval_expr(right, row, schema)?) .unwrap_or(false)), + LogicalExpr::Ne(left, right) => Ok(eval_expr(left, row, schema)? + .sql_eq(&eval_expr(right, row, schema)?) + .map(|eq| !eq) + .unwrap_or(false)), LogicalExpr::And(left, right) => { Ok(eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?) } + LogicalExpr::Or(left, right) => { + Ok(eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?) + } _ => Ok(false), } } @@ -138,9 +150,19 @@ fn eval_expr( let right = eval_expr(right, row, schema)?; Ok(Value::Boolean(left.sql_eq(&right).unwrap_or(false))) } + LogicalExpr::Ne(left, right) => { + let left = eval_expr(left, row, schema)?; + let right = eval_expr(right, row, schema)?; + Ok(Value::Boolean( + left.sql_eq(&right).map(|eq| !eq).unwrap_or(false), + )) + } LogicalExpr::And(left, right) => Ok(Value::Boolean( eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?, )), + LogicalExpr::Or(left, right) => Ok(Value::Boolean( + eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?, + )), } } @@ -188,7 +210,11 @@ fn compare_values(left: &Value, right: &Value) -> Ordering { (Value::Null, _) => Ordering::Greater, (_, Value::Null) => Ordering::Less, (Value::Text(left), Value::Text(right)) => left.cmp(right), + (Value::Integer(left), Value::Integer(right)) => left.cmp(right), (Value::Boolean(left), Value::Boolean(right)) => left.cmp(right), + // Cross-type ordering: Integer < Text < Boolean + (Value::Integer(_), _) => Ordering::Less, + (_, Value::Integer(_)) => Ordering::Greater, (Value::Text(_), Value::Boolean(_)) => Ordering::Less, (Value::Boolean(_), Value::Text(_)) => Ordering::Greater, } diff --git a/src/lib.rs b/src/lib.rs index 1f2ec94..56096f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,5 +18,5 @@ pub mod sql; // Lower-level reasoning and provenance APIs remain under `query_engine::chase`. pub use chase::{ Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term, - chase, chase_with_config, standard_chase, + chase, chase_with_config, oblivious_chase, standard_chase, }; diff --git a/src/planner/logical.rs b/src/planner/logical.rs index aca23e2..44dabd3 100644 --- a/src/planner/logical.rs +++ b/src/planner/logical.rs @@ -18,8 +18,12 @@ pub enum LogicalExpr { Literal(Value), /// Equality. Eq(Box, Box), + /// Inequality. + Ne(Box, Box), /// Boolean conjunction. And(Box, Box), + /// Boolean disjunction. + Or(Box, Box), } /// A named output expression in a projection. @@ -68,6 +72,11 @@ pub enum LogicalPlan { expressions: Vec, schema: Schema, }, + /// Limit the number of output rows. + Limit { + input: Box, + count: usize, + }, } impl LogicalPlan { @@ -79,6 +88,7 @@ impl LogicalPlan { Self::Filter { input, .. } => input.output_schema(), Self::Sort { schema, .. } => schema, Self::Project { schema, .. } => schema, + Self::Limit { input, .. } => input.output_schema(), } } } diff --git a/src/planner/sql.rs b/src/planner/sql.rs index 8a30e55..d24401e 100644 --- a/src/planner/sql.rs +++ b/src/planner/sql.rs @@ -80,39 +80,46 @@ pub fn plan_select( }; } - if is_wildcard_projection(&select.projection) { - let output_schema = plan.output_schema().clone(); - return maybe_apply_sort(plan, output_schema, &select.order_by, &select.from); - } - - let mut expressions = Vec::new(); - let mut fields = Vec::new(); - for (index, item) in select.projection.iter().enumerate() { - match item { - SelectItem::Expr { expr, alias } => { - let planned_expr = plan_expr(expr, &input_schema, &select.from)?; - let output_name = alias - .clone() - .unwrap_or_else(|| default_projection_name(expr, index + 1)); - let (data_type, nullable) = projection_metadata(expr, &input_schema, &select.from)?; - expressions.push(NamedExpr { - name: output_name.clone(), - expr: planned_expr, - }); - fields.push(Field::new(output_name, data_type, nullable)); + if !is_wildcard_projection(&select.projection) { + let mut expressions = Vec::new(); + let mut fields = Vec::new(); + for (index, item) in select.projection.iter().enumerate() { + match item { + SelectItem::Expr { expr, alias } => { + let planned_expr = plan_expr(expr, &input_schema, &select.from)?; + let output_name = alias + .clone() + .unwrap_or_else(|| default_projection_name(expr, index + 1)); + let (data_type, nullable) = + projection_metadata(expr, &input_schema, &select.from)?; + expressions.push(NamedExpr { + name: output_name.clone(), + expr: planned_expr, + }); + fields.push(Field::new(output_name, data_type, nullable)); + } + SelectItem::Wildcard => return Err(PlannerError::MixedWildcardProjection), } - SelectItem::Wildcard => return Err(PlannerError::MixedWildcardProjection), } - } - let plan = LogicalPlan::Project { - input: Box::new(plan), - expressions, - schema: Schema::new(fields), - }; + plan = LogicalPlan::Project { + input: Box::new(plan), + expressions, + schema: Schema::new(fields), + }; + } let output_schema = plan.output_schema().clone(); - maybe_apply_sort(plan, output_schema, &select.order_by, &select.from) + plan = maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)?; + + if let Some(count) = select.limit { + plan = LogicalPlan::Limit { + input: Box::new(plan), + count, + }; + } + + Ok(plan) } fn is_wildcard_projection(items: &[SelectItem]) -> bool { @@ -182,10 +189,18 @@ fn plan_expr( Box::new(plan_expr(left, schema, tables)?), Box::new(plan_expr(right, schema, tables)?), )), + BinaryOp::Ne => Ok(LogicalExpr::Ne( + Box::new(plan_expr(left, schema, tables)?), + Box::new(plan_expr(right, schema, tables)?), + )), BinaryOp::And => Ok(LogicalExpr::And( Box::new(plan_expr(left, schema, tables)?), Box::new(plan_expr(right, schema, tables)?), )), + BinaryOp::Or => Ok(LogicalExpr::Or( + Box::new(plan_expr(left, schema, tables)?), + Box::new(plan_expr(right, schema, tables)?), + )), }, } } @@ -226,6 +241,7 @@ fn maybe_apply_sort( fn plan_literal(literal: &Literal) -> Value { match literal { Literal::String(value) => Value::text(value.clone()), + Literal::Integer(n) => Value::Integer(*n), Literal::Null => Value::Null, } } @@ -245,6 +261,7 @@ fn projection_metadata( Ok((field.data_type().clone(), field.nullable())) } Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)), + Expr::Literal(Literal::Integer(_)) => Ok((DataType::Integer, false)), Expr::Literal(Literal::Null) => Ok((DataType::Text, true)), Expr::Binary { .. } => Ok((DataType::Boolean, true)), } @@ -550,6 +567,7 @@ mod tests { }], selection: None, order_by: Vec::new(), + limit: None, }; let error = plan_select(&malformed, &catalog).unwrap_err(); assert_eq!( diff --git a/src/relational/schema.rs b/src/relational/schema.rs index 7f83c98..2b4168e 100644 --- a/src/relational/schema.rs +++ b/src/relational/schema.rs @@ -5,6 +5,8 @@ use std::fmt; pub enum DataType { /// UTF-8 text values. Text, + /// 64-bit signed integer values. + Integer, /// Boolean values. Boolean, } diff --git a/src/relational/value.rs b/src/relational/value.rs index a750d50..e26a567 100644 --- a/src/relational/value.rs +++ b/src/relational/value.rs @@ -5,6 +5,8 @@ use std::fmt; pub enum Value { /// Textual data. Text(String), + /// Integer data. + Integer(i64), /// Boolean data. Boolean(bool), /// SQL-style null. @@ -29,8 +31,9 @@ impl Value { match (self, other) { (Self::Null, _) | (_, Self::Null) => None, (Self::Text(left), Self::Text(right)) => Some(left == right), + (Self::Integer(left), Self::Integer(right)) => Some(left == right), (Self::Boolean(left), Self::Boolean(right)) => Some(left == right), - (Self::Text(_), Self::Boolean(_)) | (Self::Boolean(_), Self::Text(_)) => Some(false), + _ => Some(false), } } } @@ -39,6 +42,7 @@ impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Text(value) => write!(f, "{}", value), + Self::Integer(value) => write!(f, "{}", value), Self::Boolean(value) => write!(f, "{}", value), Self::Null => write!(f, "NULL"), } diff --git a/src/sql/ast.rs b/src/sql/ast.rs index 7bbae35..05a27bc 100644 --- a/src/sql/ast.rs +++ b/src/sql/ast.rs @@ -1,4 +1,4 @@ -/// A parsed `SELECT-FROM-WHERE-ORDER BY` statement in the current SQL subset. +/// A parsed `SELECT-FROM-WHERE-ORDER BY-LIMIT` statement in the current SQL subset. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Select { /// Output expressions requested by the query. @@ -9,6 +9,8 @@ pub struct Select { pub selection: Option, /// Optional output ordering. pub order_by: Vec, + /// Optional row limit. + pub limit: Option, } /// One source entry in a `FROM` list. @@ -58,6 +60,8 @@ pub enum Expr { pub enum Literal { /// A string literal. String(String), + /// An integer literal. + Integer(i64), /// The `NULL` literal. Null, } @@ -67,8 +71,12 @@ pub enum Literal { pub enum BinaryOp { /// Equality. Eq, + /// Inequality. + Ne, /// Boolean conjunction. And, + /// Boolean disjunction. + Or, } /// Sort direction for `ORDER BY`. diff --git a/src/sql/parser.rs b/src/sql/parser.rs index d6add09..eb7b89a 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -43,16 +43,20 @@ enum Token { Where, As, And, + Or, Order, By, Asc, Desc, Null, + Limit, Identifier(String), String(String), + Integer(usize), Star, Comma, Eq, + Ne, } /// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset. @@ -91,6 +95,13 @@ impl Parser { Vec::new() }; + let limit = if self.peek() == Some(&Token::Limit) { + self.index += 1; + Some(self.expect_integer()?) + } else { + None + }; + if let Some(token) = self.peek() { return Err(ParseError::UnexpectedToken(render_token(token))); } @@ -100,6 +111,7 @@ impl Parser { from, selection, order_by, + limit, }) } @@ -166,6 +178,22 @@ impl Parser { } fn parse_expr(&mut self) -> Result { + let mut expr = self.parse_and()?; + + while self.peek() == Some(&Token::Or) { + self.index += 1; + let right = self.parse_and()?; + expr = Expr::Binary { + left: Box::new(expr), + op: BinaryOp::Or, + right: Box::new(right), + }; + } + + Ok(expr) + } + + fn parse_and(&mut self) -> Result { let mut expr = self.parse_equality()?; while self.peek() == Some(&Token::And) { @@ -220,6 +248,14 @@ impl Parser { right: Box::new(right), }) } + Token::Ne => { + let right = self.parse_operand()?; + Ok(Expr::Binary { + left: Box::new(left), + op: BinaryOp::Ne, + right: Box::new(right), + }) + } other => Err(ParseError::UnexpectedToken(render_token(&other))), } } @@ -228,6 +264,7 @@ impl Parser { match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Identifier(name) => Ok(Expr::Identifier(name)), Token::String(value) => Ok(Expr::Literal(Literal::String(value))), + Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n as i64))), Token::Null => Ok(Expr::Literal(Literal::Null)), other => Err(ParseError::UnexpectedToken(render_token(&other))), } @@ -249,6 +286,13 @@ impl Parser { } } + fn expect_integer(&mut self) -> Result { + match self.next().ok_or(ParseError::UnexpectedEnd)? { + Token::Integer(n) => Ok(n), + other => Err(ParseError::UnexpectedToken(render_token(&other))), + } + } + fn peek(&self) -> Option<&Token> { self.tokens.get(self.index) } @@ -281,11 +325,33 @@ fn tokenize(input: &str) -> Result, ParseError> { chars.next(); tokens.push(Token::Comma); } + '!' => { + chars.next(); + if chars.peek() == Some(&'=') { + chars.next(); + tokens.push(Token::Ne); + } else { + return Err(ParseError::UnexpectedToken("!".to_string())); + } + } + '<' => { + chars.next(); + if chars.peek() == Some(&'>') { + chars.next(); + tokens.push(Token::Ne); + } else { + return Err(ParseError::UnexpectedToken("<".to_string())); + } + } '=' => { chars.next(); tokens.push(Token::Eq); } '\'' => tokens.push(Token::String(parse_string(&mut chars)?)), + ch if ch.is_ascii_digit() => { + let number = parse_integer(&mut chars); + tokens.push(Token::Integer(number)); + } ch if is_identifier_start(ch) => { let ident = parse_identifier(&mut chars); let token = match ident.to_ascii_uppercase().as_str() { @@ -294,11 +360,13 @@ fn tokenize(input: &str) -> Result, ParseError> { "WHERE" => Token::Where, "AS" => Token::As, "AND" => Token::And, + "OR" => Token::Or, "ORDER" => Token::Order, "BY" => Token::By, "ASC" => Token::Asc, "DESC" => Token::Desc, "NULL" => Token::Null, + "LIMIT" => Token::Limit, _ => Token::Identifier(ident), }; tokens.push(token); @@ -351,8 +419,24 @@ where ident } +fn parse_integer(chars: &mut std::iter::Peekable) -> usize +where + I: Iterator, +{ + let mut value: usize = 0; + while let Some(ch) = chars.peek().copied() { + if ch.is_ascii_digit() { + value = value * 10 + (ch as usize - '0' as usize); + chars.next(); + } else { + break; + } + } + value +} + fn is_identifier_start(ch: char) -> bool { - ch.is_ascii_alphanumeric() || ch == '_' + ch.is_ascii_alphabetic() || ch == '_' } fn is_identifier_part(ch: char) -> bool { @@ -366,16 +450,20 @@ fn render_token(token: &Token) -> String { Token::Where => "WHERE".to_string(), Token::As => "AS".to_string(), Token::And => "AND".to_string(), + Token::Or => "OR".to_string(), Token::Order => "ORDER".to_string(), Token::By => "BY".to_string(), Token::Asc => "ASC".to_string(), Token::Desc => "DESC".to_string(), Token::Null => "NULL".to_string(), + Token::Limit => "LIMIT".to_string(), Token::Identifier(name) => name.clone(), + Token::Integer(n) => n.to_string(), Token::String(value) => format!("'{}'", value), Token::Star => "*".to_string(), Token::Comma => ",".to_string(), Token::Eq => "=".to_string(), + Token::Ne => "!=".to_string(), } } @@ -536,4 +624,115 @@ mod tests { let error = parse_select("SELECT *, c0 FROM Parent").unwrap_err(); assert_eq!(error, ParseError::MixedWildcardProjection); } + + #[test] + fn parses_not_equal_with_bang_eq() { + let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap(); + + assert_eq!( + select.selection, + Some(Expr::Binary { + left: Box::new(Expr::Identifier("c1".to_string())), + op: BinaryOp::Ne, + right: Box::new(Expr::Literal(Literal::String("bob".to_string()))), + }) + ); + } + + #[test] + fn parses_not_equal_with_diamond() { + let select = parse_select("SELECT c0 FROM Parent WHERE c1 <> 'bob'").unwrap(); + + assert_eq!( + select.selection, + Some(Expr::Binary { + left: Box::new(Expr::Identifier("c1".to_string())), + op: BinaryOp::Ne, + right: Box::new(Expr::Literal(Literal::String("bob".to_string()))), + }) + ); + } + + #[test] + fn parses_or_expression() { + let select = + parse_select("SELECT c0 FROM Parent WHERE c0 = 'alice' OR c0 = 'bob'").unwrap(); + + assert_eq!( + select.selection, + Some(Expr::Binary { + left: Box::new(Expr::Binary { + left: Box::new(Expr::Identifier("c0".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::String("alice".to_string()))), + }), + op: BinaryOp::Or, + right: Box::new(Expr::Binary { + left: Box::new(Expr::Identifier("c0".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::String("bob".to_string()))), + }), + }) + ); + } + + #[test] + fn parses_integer_literal_in_expression() { + let select = parse_select("SELECT c0 FROM Parent WHERE c0 = 42").unwrap(); + + assert_eq!( + select.selection, + Some(Expr::Binary { + left: Box::new(Expr::Identifier("c0".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::Integer(42))), + }) + ); + } + + #[test] + fn parses_limit_clause() { + let select = parse_select("SELECT c0 FROM Parent LIMIT 5").unwrap(); + assert_eq!(select.limit, Some(5)); + } + + #[test] + fn parses_order_by_with_limit() { + let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC LIMIT 1").unwrap(); + assert_eq!(select.order_by.len(), 1); + assert_eq!(select.limit, Some(1)); + } + + #[test] + fn parses_or_with_and_precedence() { + // AND binds tighter than OR: a = '1' OR b = '2' AND c = '3' + // should parse as: a = '1' OR (b = '2' AND c = '3') + let select = + parse_select("SELECT c0 FROM Parent WHERE c0 = '1' OR c1 = '2' AND c0 = '3'").unwrap(); + + assert_eq!( + select.selection, + Some(Expr::Binary { + left: Box::new(Expr::Binary { + left: Box::new(Expr::Identifier("c0".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::String("1".to_string()))), + }), + op: BinaryOp::Or, + right: Box::new(Expr::Binary { + left: Box::new(Expr::Binary { + left: Box::new(Expr::Identifier("c1".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::String("2".to_string()))), + }), + op: BinaryOp::And, + right: Box::new(Expr::Binary { + left: Box::new(Expr::Identifier("c0".to_string())), + op: BinaryOp::Eq, + right: Box::new(Expr::Literal(Literal::String("3".to_string()))), + }), + }), + }) + ); + } } diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs index 645fe99..9134585 100644 --- a/tests/sql_pipeline_tests.rs +++ b/tests/sql_pipeline_tests.rs @@ -250,3 +250,82 @@ fn select_order_by_desc_sorts_rows() { assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice"); } + +#[test] +fn select_integer_literal_in_projection() { + let instance = parent_instance(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0, 42 AS answer FROM Parent").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.schema().len(), 2); + assert_eq!(result.schema().fields()[1].name(), "answer"); + assert_eq!( + result.schema().fields()[1].data_type(), + &query_engine::relational::DataType::Integer + ); + assert_eq!(result.rows().len(), 2); + assert_eq!(format!("{}", result.rows()[0].values()[1]), "42"); +} + +#[test] +fn select_limit_restricts_row_count() { + let instance = parent_instance(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.rows().len(), 1); + assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice"); +} + +#[test] +fn select_where_or_matches_either_condition() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + Atom::new( + "Parent", + vec![Term::constant("carol"), Term::constant("dave")], + ), + ] + .into_iter() + .collect(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'dave'").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.rows().len(), 2); + let mut values = result + .rows() + .iter() + .map(|row| format!("{}", row.values()[0])) + .collect::>(); + values.sort(); + assert_eq!(values, vec!["alice".to_string(), "carol".to_string()]); +} + +#[test] +fn select_where_not_equal_excludes_matching_rows() { + let instance = parent_instance(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.rows().len(), 1); + assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); +}