Add oblivious chase, broader SQL operators, LIMIT, and integer literals

This commit is contained in:
Hassan Abedi 2026-04-10 15:22:30 +02:00
parent be8e1388bc
commit 52cb492bce
16 changed files with 583 additions and 41 deletions

2
.gitignore vendored
View File

@ -77,9 +77,9 @@ tarpaulin-report.html
Cargo.lock
# Misc
*.proptest-regressions
.DS_Store
.benchmarks
.env
.claude/
*.proptest-regressions
.codex

View File

@ -71,7 +71,7 @@ Quick examples:
- The chase engine should remain largely stateless; pass execution state explicitly.
- New chase variants should be composable with existing infrastructure.
- Existential variables generate labeled nulls (`Term::Null`).
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY` over predicate-backed tables, equality predicates combined with `AND`, comma-join style multi-table queries, table aliases, and ordering by output-column names.
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals.
- Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`.
- Single-table SQL queries may use the table name as a qualifier when no alias is present.
- Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented.

View File

@ -14,7 +14,7 @@ execution boundaries.
- Provenance-oriented explanations for derived answers
- Script, REPL, and local web UI for experimentation
- Relational schema, catalog, logical-plan, and execution scaffolding
- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY` queries over predicate-backed tables
- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY-LIMIT` queries over predicate-backed tables
### Architecture
@ -111,7 +111,7 @@ The repository now has a narrow SQL pipeline with:
- relational schemas, rows, and values
- SQL parsing for a small subset
- logical planning
- execution for filtering, ordering, and basic multi-table joins
- execution for filtering, ordering, limiting, and basic multi-table joins
Currently supported examples:
@ -119,9 +119,12 @@ Currently supported examples:
SELECT * FROM Parent
SELECT c0 FROM Parent
SELECT c0 FROM Parent WHERE c1 = 'bob'
SELECT c0 FROM Parent WHERE c1 != 'bob'
SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'
SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'carol'
SELECT c0 FROM Parent ORDER BY c0 DESC
SELECT c0 AS parent_name, 'seed' AS label FROM Parent
SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1
SELECT c0 AS parent_name, 'seed' AS label, 42 AS answer FROM Parent
SELECT Parent.parent, Ancestor.child
FROM Parent, Ancestor
WHERE Parent.child = Ancestor.parent
@ -172,8 +175,10 @@ Current limits:
- joins currently use comma-separated tables plus `WHERE` filtering
- multi-table queries require qualified column names such as `Parent.child`
- table aliases are supported via `FROM Parent AS p`
- `WHERE` supports equality predicates combined with `AND`
- `WHERE` supports `=`, `!=`/`<>`, `AND`, and `OR` (with standard precedence)
- `ORDER BY` supports output-column ordering with `ASC`/`DESC`
- `LIMIT` restricts the number of output rows
- literals include strings, integers, and `NULL`
- no aggregates
- projection aliases only via `AS`
@ -183,6 +188,7 @@ Runnable SQL examples:
- `examples/scripts/sql_join.ech`
- `examples/scripts/sql_self_join.ech`
- `examples/scripts/sql_order_by.ech`
- `examples/scripts/sql_filter_ops.ech`
### Development

View File

@ -28,10 +28,13 @@ This document tracks the current state and next steps for the repository.
- [x] Minimal SQL AST and parser
- [x] Logical plan scaffolding
- [x] Logical-plan execution for the first SQL slice
- [x] `SELECT-FROM-WHERE-ORDER BY` support with positional or named columns
- [x] `SELECT-FROM-WHERE-ORDER BY-LIMIT` support with positional or named columns
- [x] Basic multi-table SQL joins via qualified-column filtering
- [x] Table aliases for self-joins and qualified references
- [x] Basic `ORDER BY` support over output columns
- [x] `!=`/`<>` inequality and `OR` disjunction in `WHERE` clauses
- [x] `LIMIT` clause for restricting output row count
- [x] Integer literal and `DataType::Integer` support
### Near-Term Cleanup
@ -72,7 +75,7 @@ This document tracks the current state and next steps for the repository.
- [x] Restricted chase
- [x] Standard chase
- [ ] Oblivious chase
- [x] Oblivious chase
- [ ] Skolem chase
- [ ] Core chase
- [ ] Negative constraints

View File

@ -0,0 +1,20 @@
# Demonstrate inequality, OR, LIMIT, and integer literals in the SQL frontend.
fact Employee(alice, 30, engineering).
fact Employee(bob, 25, sales).
fact Employee(carol, 35, engineering).
fact Employee(dave, 28, marketing).
schema Employee(name, age, dept).
# Inequality: exclude engineering.
sql SELECT name FROM Employee WHERE dept != 'engineering';
# OR: engineering or marketing.
sql SELECT name, dept FROM Employee WHERE dept = 'engineering' OR dept = 'marketing';
# LIMIT: first two rows in name order.
sql SELECT name FROM Employee ORDER BY name ASC LIMIT 2;
# Integer literal in projection.
sql SELECT name, 1 AS active FROM Employee WHERE dept = 'sales';

View File

@ -64,6 +64,12 @@ pub enum ChaseVariant {
/// rule with the same frontier variable bindings. This is the default.
#[default]
Restricted,
/// Oblivious chase: fires every matching rule application without checking
/// head satisfaction or tracking triggers. Terminates only when no body
/// match produces a genuinely new fact. For rules with existential
/// variables this variant will typically not terminate (it will hit the
/// step limit) because each application generates fresh nulls.
Oblivious,
}
/// Configuration for the chase algorithm.
@ -112,6 +118,26 @@ pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
chase_with_config(instance, rules, config)
}
/// Run the oblivious chase algorithm.
///
/// The oblivious chase fires every rule application whose body matches,
/// without checking whether the head is already satisfied and without
/// tracking previously applied triggers. It terminates only when a full
/// round produces no new facts.
///
/// For Datalog rules (no existential variables) the oblivious chase
/// reaches the same fixpoint as the restricted and standard variants.
/// For rules with existential variables it will typically not terminate
/// because each application generates fresh labeled nulls; in that case
/// it will run until the step limit.
pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
let config = ChaseConfig {
variant: ChaseVariant::Oblivious,
..Default::default()
};
chase_with_config(instance, rules, config)
}
/// Run the chase with custom configuration.
pub fn chase_with_config(
mut instance: Instance,
@ -137,6 +163,7 @@ pub fn chase_with_config(
ChaseVariant::Restricted => {
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
}
ChaseVariant::Oblivious => oblivious_chase_step(&instance, rules, &mut null_gen),
};
if new_facts.is_empty() {
@ -231,6 +258,32 @@ fn restricted_chase_step(
new_facts
}
/// Perform a single oblivious chase step: fire all matching rule applications
/// without checking head satisfaction or tracking triggers.
fn oblivious_chase_step(
instance: &Instance,
rules: &[Rule],
null_gen: &mut NullGenerator,
) -> Vec<Atom> {
let mut new_facts = Vec::new();
for rule in rules {
let matches = find_matches(instance, &rule.body);
for subst in matches {
let derived = apply_rule_head(rule, &subst, null_gen);
for fact in derived {
if !instance.contains(&fact) {
new_facts.push(fact);
}
}
}
}
new_facts
}
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct EgdTrigger {
@ -329,6 +382,7 @@ pub fn chase_full(
ChaseVariant::Restricted => {
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
}
ChaseVariant::Oblivious => oblivious_chase_step(&instance, tgds, &mut null_gen),
};
let tgd_changes = !new_facts.is_empty();
@ -796,4 +850,117 @@ mod tests {
assert!(result.error.is_none());
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
}
// Oblivious chase tests
#[test]
fn test_oblivious_chase_datalog_rules() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Parent", vec![Term::var("X"), Term::var("Y")])
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
.build();
let result = oblivious_chase(instance, &[rule1, rule2]);
assert!(result.terminated);
let ancestors = result.instance.facts_for_predicate("Ancestor");
assert_eq!(ancestors.len(), 3);
}
#[test]
fn test_oblivious_chase_matches_restricted_for_datalog() {
let instance: Instance = vec![
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Edge", vec![Term::var("X"), Term::var("Y")])
.then("Path", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Path", vec![Term::var("X"), Term::var("Y")])
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
.then("Path", vec![Term::var("X"), Term::var("Z")])
.build();
let rules = vec![rule1, rule2];
let oblivious_result = oblivious_chase(instance.clone(), &rules);
let restricted_result = chase(instance, &rules);
assert!(oblivious_result.terminated);
assert!(restricted_result.terminated);
let oblivious_paths = oblivious_result.instance.facts_for_predicate("Path");
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
assert_eq!(oblivious_paths.len(), restricted_paths.len());
}
#[test]
fn test_oblivious_chase_does_not_terminate_with_existentials() {
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
.build();
let config = ChaseConfig {
max_steps: 10,
variant: ChaseVariant::Oblivious,
};
let result = chase_with_config(instance, &[rule], config);
// The oblivious chase generates a fresh null each round, so it
// should hit the step limit rather than reaching a fixpoint.
assert!(!result.terminated);
assert!(result.instance.facts_for_predicate("HasSSN").len() > 1);
}
#[test]
fn test_oblivious_chase_via_config() {
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("A", vec![Term::var("X")])
.then("B", vec![Term::var("X")])
.build();
let config = ChaseConfig {
variant: ChaseVariant::Oblivious,
..Default::default()
};
let result = chase_with_config(instance, &[rule], config);
assert!(result.terminated);
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
}
}

View File

@ -13,7 +13,7 @@ mod engine;
pub use atom::Atom;
pub use engine::{
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
chase_with_egds, standard_chase,
chase_with_egds, oblivious_chase, standard_chase,
};
pub use inference::{Derivation, MaterializedState, find_matches, materialize};
pub use instance::{Instance, InstanceError};

View File

@ -101,6 +101,11 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys));
Ok(ResultSet::new(schema.clone(), rows))
}
LogicalPlan::Limit { input, count } => {
let result = execute(input, instance)?;
let rows = result.rows().iter().take(*count).cloned().collect();
Ok(ResultSet::new(result.schema().clone(), rows))
}
}
}
@ -113,9 +118,16 @@ fn eval_predicate(
LogicalExpr::Eq(left, right) => Ok(eval_expr(left, row, schema)?
.sql_eq(&eval_expr(right, row, schema)?)
.unwrap_or(false)),
LogicalExpr::Ne(left, right) => Ok(eval_expr(left, row, schema)?
.sql_eq(&eval_expr(right, row, schema)?)
.map(|eq| !eq)
.unwrap_or(false)),
LogicalExpr::And(left, right) => {
Ok(eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?)
}
LogicalExpr::Or(left, right) => {
Ok(eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?)
}
_ => Ok(false),
}
}
@ -138,9 +150,19 @@ fn eval_expr(
let right = eval_expr(right, row, schema)?;
Ok(Value::Boolean(left.sql_eq(&right).unwrap_or(false)))
}
LogicalExpr::Ne(left, right) => {
let left = eval_expr(left, row, schema)?;
let right = eval_expr(right, row, schema)?;
Ok(Value::Boolean(
left.sql_eq(&right).map(|eq| !eq).unwrap_or(false),
))
}
LogicalExpr::And(left, right) => Ok(Value::Boolean(
eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?,
)),
LogicalExpr::Or(left, right) => Ok(Value::Boolean(
eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?,
)),
}
}
@ -188,7 +210,11 @@ fn compare_values(left: &Value, right: &Value) -> Ordering {
(Value::Null, _) => Ordering::Greater,
(_, Value::Null) => Ordering::Less,
(Value::Text(left), Value::Text(right)) => left.cmp(right),
(Value::Integer(left), Value::Integer(right)) => left.cmp(right),
(Value::Boolean(left), Value::Boolean(right)) => left.cmp(right),
// Cross-type ordering: Integer < Text < Boolean
(Value::Integer(_), _) => Ordering::Less,
(_, Value::Integer(_)) => Ordering::Greater,
(Value::Text(_), Value::Boolean(_)) => Ordering::Less,
(Value::Boolean(_), Value::Text(_)) => Ordering::Greater,
}

View File

@ -18,5 +18,5 @@ pub mod sql;
// Lower-level reasoning and provenance APIs remain under `query_engine::chase`.
pub use chase::{
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term,
chase, chase_with_config, standard_chase,
chase, chase_with_config, oblivious_chase, standard_chase,
};

View File

@ -18,8 +18,12 @@ pub enum LogicalExpr {
Literal(Value),
/// Equality.
Eq(Box<LogicalExpr>, Box<LogicalExpr>),
/// Inequality.
Ne(Box<LogicalExpr>, Box<LogicalExpr>),
/// Boolean conjunction.
And(Box<LogicalExpr>, Box<LogicalExpr>),
/// Boolean disjunction.
Or(Box<LogicalExpr>, Box<LogicalExpr>),
}
/// A named output expression in a projection.
@ -68,6 +72,11 @@ pub enum LogicalPlan {
expressions: Vec<NamedExpr>,
schema: Schema,
},
/// Limit the number of output rows.
Limit {
input: Box<LogicalPlan>,
count: usize,
},
}
impl LogicalPlan {
@ -79,6 +88,7 @@ impl LogicalPlan {
Self::Filter { input, .. } => input.output_schema(),
Self::Sort { schema, .. } => schema,
Self::Project { schema, .. } => schema,
Self::Limit { input, .. } => input.output_schema(),
}
}
}

View File

@ -80,11 +80,7 @@ pub fn plan_select(
};
}
if is_wildcard_projection(&select.projection) {
let output_schema = plan.output_schema().clone();
return maybe_apply_sort(plan, output_schema, &select.order_by, &select.from);
}
if !is_wildcard_projection(&select.projection) {
let mut expressions = Vec::new();
let mut fields = Vec::new();
for (index, item) in select.projection.iter().enumerate() {
@ -94,7 +90,8 @@ pub fn plan_select(
let output_name = alias
.clone()
.unwrap_or_else(|| default_projection_name(expr, index + 1));
let (data_type, nullable) = projection_metadata(expr, &input_schema, &select.from)?;
let (data_type, nullable) =
projection_metadata(expr, &input_schema, &select.from)?;
expressions.push(NamedExpr {
name: output_name.clone(),
expr: planned_expr,
@ -105,14 +102,24 @@ pub fn plan_select(
}
}
let plan = LogicalPlan::Project {
plan = LogicalPlan::Project {
input: Box::new(plan),
expressions,
schema: Schema::new(fields),
};
}
let output_schema = plan.output_schema().clone();
maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)
plan = maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)?;
if let Some(count) = select.limit {
plan = LogicalPlan::Limit {
input: Box::new(plan),
count,
};
}
Ok(plan)
}
fn is_wildcard_projection(items: &[SelectItem]) -> bool {
@ -182,10 +189,18 @@ fn plan_expr(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
BinaryOp::Ne => Ok(LogicalExpr::Ne(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
BinaryOp::And => Ok(LogicalExpr::And(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
BinaryOp::Or => Ok(LogicalExpr::Or(
Box::new(plan_expr(left, schema, tables)?),
Box::new(plan_expr(right, schema, tables)?),
)),
},
}
}
@ -226,6 +241,7 @@ fn maybe_apply_sort(
fn plan_literal(literal: &Literal) -> Value {
match literal {
Literal::String(value) => Value::text(value.clone()),
Literal::Integer(n) => Value::Integer(*n),
Literal::Null => Value::Null,
}
}
@ -245,6 +261,7 @@ fn projection_metadata(
Ok((field.data_type().clone(), field.nullable()))
}
Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)),
Expr::Literal(Literal::Integer(_)) => Ok((DataType::Integer, false)),
Expr::Literal(Literal::Null) => Ok((DataType::Text, true)),
Expr::Binary { .. } => Ok((DataType::Boolean, true)),
}
@ -550,6 +567,7 @@ mod tests {
}],
selection: None,
order_by: Vec::new(),
limit: None,
};
let error = plan_select(&malformed, &catalog).unwrap_err();
assert_eq!(

View File

@ -5,6 +5,8 @@ use std::fmt;
pub enum DataType {
/// UTF-8 text values.
Text,
/// 64-bit signed integer values.
Integer,
/// Boolean values.
Boolean,
}

View File

@ -5,6 +5,8 @@ use std::fmt;
pub enum Value {
/// Textual data.
Text(String),
/// Integer data.
Integer(i64),
/// Boolean data.
Boolean(bool),
/// SQL-style null.
@ -29,8 +31,9 @@ impl Value {
match (self, other) {
(Self::Null, _) | (_, Self::Null) => None,
(Self::Text(left), Self::Text(right)) => Some(left == right),
(Self::Integer(left), Self::Integer(right)) => Some(left == right),
(Self::Boolean(left), Self::Boolean(right)) => Some(left == right),
(Self::Text(_), Self::Boolean(_)) | (Self::Boolean(_), Self::Text(_)) => Some(false),
_ => Some(false),
}
}
}
@ -39,6 +42,7 @@ impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Text(value) => write!(f, "{}", value),
Self::Integer(value) => write!(f, "{}", value),
Self::Boolean(value) => write!(f, "{}", value),
Self::Null => write!(f, "NULL"),
}

View File

@ -1,4 +1,4 @@
/// A parsed `SELECT-FROM-WHERE-ORDER BY` statement in the current SQL subset.
/// A parsed `SELECT-FROM-WHERE-ORDER BY-LIMIT` statement in the current SQL subset.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Select {
/// Output expressions requested by the query.
@ -9,6 +9,8 @@ pub struct Select {
pub selection: Option<Expr>,
/// Optional output ordering.
pub order_by: Vec<OrderByItem>,
/// Optional row limit.
pub limit: Option<usize>,
}
/// One source entry in a `FROM` list.
@ -58,6 +60,8 @@ pub enum Expr {
pub enum Literal {
/// A string literal.
String(String),
/// An integer literal.
Integer(i64),
/// The `NULL` literal.
Null,
}
@ -67,8 +71,12 @@ pub enum Literal {
pub enum BinaryOp {
/// Equality.
Eq,
/// Inequality.
Ne,
/// Boolean conjunction.
And,
/// Boolean disjunction.
Or,
}
/// Sort direction for `ORDER BY`.

View File

@ -43,16 +43,20 @@ enum Token {
Where,
As,
And,
Or,
Order,
By,
Asc,
Desc,
Null,
Limit,
Identifier(String),
String(String),
Integer(usize),
Star,
Comma,
Eq,
Ne,
}
/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset.
@ -91,6 +95,13 @@ impl Parser {
Vec::new()
};
let limit = if self.peek() == Some(&Token::Limit) {
self.index += 1;
Some(self.expect_integer()?)
} else {
None
};
if let Some(token) = self.peek() {
return Err(ParseError::UnexpectedToken(render_token(token)));
}
@ -100,6 +111,7 @@ impl Parser {
from,
selection,
order_by,
limit,
})
}
@ -166,6 +178,22 @@ impl Parser {
}
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
let mut expr = self.parse_and()?;
while self.peek() == Some(&Token::Or) {
self.index += 1;
let right = self.parse_and()?;
expr = Expr::Binary {
left: Box::new(expr),
op: BinaryOp::Or,
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_and(&mut self) -> Result<Expr, ParseError> {
let mut expr = self.parse_equality()?;
while self.peek() == Some(&Token::And) {
@ -220,6 +248,14 @@ impl Parser {
right: Box::new(right),
})
}
Token::Ne => {
let right = self.parse_operand()?;
Ok(Expr::Binary {
left: Box::new(left),
op: BinaryOp::Ne,
right: Box::new(right),
})
}
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
@ -228,6 +264,7 @@ impl Parser {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(Expr::Identifier(name)),
Token::String(value) => Ok(Expr::Literal(Literal::String(value))),
Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n as i64))),
Token::Null => Ok(Expr::Literal(Literal::Null)),
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
@ -249,6 +286,13 @@ impl Parser {
}
}
fn expect_integer(&mut self) -> Result<usize, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Integer(n) => Ok(n),
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.index)
}
@ -281,11 +325,33 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
chars.next();
tokens.push(Token::Comma);
}
'!' => {
chars.next();
if chars.peek() == Some(&'=') {
chars.next();
tokens.push(Token::Ne);
} else {
return Err(ParseError::UnexpectedToken("!".to_string()));
}
}
'<' => {
chars.next();
if chars.peek() == Some(&'>') {
chars.next();
tokens.push(Token::Ne);
} else {
return Err(ParseError::UnexpectedToken("<".to_string()));
}
}
'=' => {
chars.next();
tokens.push(Token::Eq);
}
'\'' => tokens.push(Token::String(parse_string(&mut chars)?)),
ch if ch.is_ascii_digit() => {
let number = parse_integer(&mut chars);
tokens.push(Token::Integer(number));
}
ch if is_identifier_start(ch) => {
let ident = parse_identifier(&mut chars);
let token = match ident.to_ascii_uppercase().as_str() {
@ -294,11 +360,13 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
"WHERE" => Token::Where,
"AS" => Token::As,
"AND" => Token::And,
"OR" => Token::Or,
"ORDER" => Token::Order,
"BY" => Token::By,
"ASC" => Token::Asc,
"DESC" => Token::Desc,
"NULL" => Token::Null,
"LIMIT" => Token::Limit,
_ => Token::Identifier(ident),
};
tokens.push(token);
@ -351,8 +419,24 @@ where
ident
}
fn parse_integer<I>(chars: &mut std::iter::Peekable<I>) -> usize
where
I: Iterator<Item = char>,
{
let mut value: usize = 0;
while let Some(ch) = chars.peek().copied() {
if ch.is_ascii_digit() {
value = value * 10 + (ch as usize - '0' as usize);
chars.next();
} else {
break;
}
}
value
}
fn is_identifier_start(ch: char) -> bool {
ch.is_ascii_alphanumeric() || ch == '_'
ch.is_ascii_alphabetic() || ch == '_'
}
fn is_identifier_part(ch: char) -> bool {
@ -366,16 +450,20 @@ fn render_token(token: &Token) -> String {
Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(),
Token::And => "AND".to_string(),
Token::Or => "OR".to_string(),
Token::Order => "ORDER".to_string(),
Token::By => "BY".to_string(),
Token::Asc => "ASC".to_string(),
Token::Desc => "DESC".to_string(),
Token::Null => "NULL".to_string(),
Token::Limit => "LIMIT".to_string(),
Token::Identifier(name) => name.clone(),
Token::Integer(n) => n.to_string(),
Token::String(value) => format!("'{}'", value),
Token::Star => "*".to_string(),
Token::Comma => ",".to_string(),
Token::Eq => "=".to_string(),
Token::Ne => "!=".to_string(),
}
}
@ -536,4 +624,115 @@ mod tests {
let error = parse_select("SELECT *, c0 FROM Parent").unwrap_err();
assert_eq!(error, ParseError::MixedWildcardProjection);
}
#[test]
fn parses_not_equal_with_bang_eq() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Ne,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
})
);
}
#[test]
fn parses_not_equal_with_diamond() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 <> 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Ne,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
})
);
}
#[test]
fn parses_or_expression() {
let select =
parse_select("SELECT c0 FROM Parent WHERE c0 = 'alice' OR c0 = 'bob'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("alice".to_string()))),
}),
op: BinaryOp::Or,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
}),
})
);
}
#[test]
fn parses_integer_literal_in_expression() {
let select = parse_select("SELECT c0 FROM Parent WHERE c0 = 42").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::Integer(42))),
})
);
}
#[test]
fn parses_limit_clause() {
let select = parse_select("SELECT c0 FROM Parent LIMIT 5").unwrap();
assert_eq!(select.limit, Some(5));
}
#[test]
fn parses_order_by_with_limit() {
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC LIMIT 1").unwrap();
assert_eq!(select.order_by.len(), 1);
assert_eq!(select.limit, Some(1));
}
#[test]
fn parses_or_with_and_precedence() {
// AND binds tighter than OR: a = '1' OR b = '2' AND c = '3'
// should parse as: a = '1' OR (b = '2' AND c = '3')
let select =
parse_select("SELECT c0 FROM Parent WHERE c0 = '1' OR c1 = '2' AND c0 = '3'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("1".to_string()))),
}),
op: BinaryOp::Or,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("2".to_string()))),
}),
op: BinaryOp::And,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("3".to_string()))),
}),
}),
})
);
}
}

View File

@ -250,3 +250,82 @@ fn select_order_by_desc_sorts_rows() {
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice");
}
#[test]
fn select_integer_literal_in_projection() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0, 42 AS answer FROM Parent").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.schema().len(), 2);
assert_eq!(result.schema().fields()[1].name(), "answer");
assert_eq!(
result.schema().fields()[1].data_type(),
&query_engine::relational::DataType::Integer
);
assert_eq!(result.rows().len(), 2);
assert_eq!(format!("{}", result.rows()[0].values()[1]), "42");
}
#[test]
fn select_limit_restricts_row_count() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
}
#[test]
fn select_where_or_matches_either_condition() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
Atom::new(
"Parent",
vec![Term::constant("carol"), Term::constant("dave")],
),
]
.into_iter()
.collect();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'dave'").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 2);
let mut values = result
.rows()
.iter()
.map(|row| format!("{}", row.values()[0]))
.collect::<Vec<_>>();
values.sort();
assert_eq!(values, vec!["alice".to_string(), "carol".to_string()]);
}
#[test]
fn select_where_not_equal_excludes_matching_rows() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
}