Add oblivious chase, broader SQL operators, LIMIT, and integer literals
This commit is contained in:
parent
be8e1388bc
commit
52cb492bce
2
.gitignore
vendored
2
.gitignore
vendored
@ -77,9 +77,9 @@ tarpaulin-report.html
|
||||
Cargo.lock
|
||||
|
||||
# Misc
|
||||
*.proptest-regressions
|
||||
.DS_Store
|
||||
.benchmarks
|
||||
.env
|
||||
.claude/
|
||||
*.proptest-regressions
|
||||
.codex
|
||||
|
||||
@ -71,7 +71,7 @@ Quick examples:
|
||||
- The chase engine should remain largely stateless; pass execution state explicitly.
|
||||
- New chase variants should be composable with existing infrastructure.
|
||||
- Existential variables generate labeled nulls (`Term::Null`).
|
||||
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY` over predicate-backed tables, equality predicates combined with `AND`, comma-join style multi-table queries, table aliases, and ordering by output-column names.
|
||||
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals.
|
||||
- Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`.
|
||||
- Single-table SQL queries may use the table name as a qualifier when no alias is present.
|
||||
- Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented.
|
||||
|
||||
14
README.md
14
README.md
@ -14,7 +14,7 @@ execution boundaries.
|
||||
- Provenance-oriented explanations for derived answers
|
||||
- Script, REPL, and local web UI for experimentation
|
||||
- Relational schema, catalog, logical-plan, and execution scaffolding
|
||||
- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY` queries over predicate-backed tables
|
||||
- A minimal SQL slice for `SELECT-FROM-WHERE-ORDER BY-LIMIT` queries over predicate-backed tables
|
||||
|
||||
### Architecture
|
||||
|
||||
@ -111,7 +111,7 @@ The repository now has a narrow SQL pipeline with:
|
||||
- relational schemas, rows, and values
|
||||
- SQL parsing for a small subset
|
||||
- logical planning
|
||||
- execution for filtering, ordering, and basic multi-table joins
|
||||
- execution for filtering, ordering, limiting, and basic multi-table joins
|
||||
|
||||
Currently supported examples:
|
||||
|
||||
@ -119,9 +119,12 @@ Currently supported examples:
|
||||
SELECT * FROM Parent
|
||||
SELECT c0 FROM Parent
|
||||
SELECT c0 FROM Parent WHERE c1 = 'bob'
|
||||
SELECT c0 FROM Parent WHERE c1 != 'bob'
|
||||
SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'
|
||||
SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'carol'
|
||||
SELECT c0 FROM Parent ORDER BY c0 DESC
|
||||
SELECT c0 AS parent_name, 'seed' AS label FROM Parent
|
||||
SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1
|
||||
SELECT c0 AS parent_name, 'seed' AS label, 42 AS answer FROM Parent
|
||||
SELECT Parent.parent, Ancestor.child
|
||||
FROM Parent, Ancestor
|
||||
WHERE Parent.child = Ancestor.parent
|
||||
@ -172,8 +175,10 @@ Current limits:
|
||||
- joins currently use comma-separated tables plus `WHERE` filtering
|
||||
- multi-table queries require qualified column names such as `Parent.child`
|
||||
- table aliases are supported via `FROM Parent AS p`
|
||||
- `WHERE` supports equality predicates combined with `AND`
|
||||
- `WHERE` supports `=`, `!=`/`<>`, `AND`, and `OR` (with standard precedence)
|
||||
- `ORDER BY` supports output-column ordering with `ASC`/`DESC`
|
||||
- `LIMIT` restricts the number of output rows
|
||||
- literals include strings, integers, and `NULL`
|
||||
- no aggregates
|
||||
- projection aliases only via `AS`
|
||||
|
||||
@ -183,6 +188,7 @@ Runnable SQL examples:
|
||||
- `examples/scripts/sql_join.ech`
|
||||
- `examples/scripts/sql_self_join.ech`
|
||||
- `examples/scripts/sql_order_by.ech`
|
||||
- `examples/scripts/sql_filter_ops.ech`
|
||||
|
||||
### Development
|
||||
|
||||
|
||||
@ -28,10 +28,13 @@ This document tracks the current state and next steps for the repository.
|
||||
- [x] Minimal SQL AST and parser
|
||||
- [x] Logical plan scaffolding
|
||||
- [x] Logical-plan execution for the first SQL slice
|
||||
- [x] `SELECT-FROM-WHERE-ORDER BY` support with positional or named columns
|
||||
- [x] `SELECT-FROM-WHERE-ORDER BY-LIMIT` support with positional or named columns
|
||||
- [x] Basic multi-table SQL joins via qualified-column filtering
|
||||
- [x] Table aliases for self-joins and qualified references
|
||||
- [x] Basic `ORDER BY` support over output columns
|
||||
- [x] `!=`/`<>` inequality and `OR` disjunction in `WHERE` clauses
|
||||
- [x] `LIMIT` clause for restricting output row count
|
||||
- [x] Integer literal and `DataType::Integer` support
|
||||
|
||||
### Near-Term Cleanup
|
||||
|
||||
@ -72,7 +75,7 @@ This document tracks the current state and next steps for the repository.
|
||||
|
||||
- [x] Restricted chase
|
||||
- [x] Standard chase
|
||||
- [ ] Oblivious chase
|
||||
- [x] Oblivious chase
|
||||
- [ ] Skolem chase
|
||||
- [ ] Core chase
|
||||
- [ ] Negative constraints
|
||||
|
||||
20
examples/scripts/sql_filter_ops.ech
Normal file
20
examples/scripts/sql_filter_ops.ech
Normal file
@ -0,0 +1,20 @@
|
||||
# Demonstrate inequality, OR, LIMIT, and integer literals in the SQL frontend.
|
||||
|
||||
fact Employee(alice, 30, engineering).
|
||||
fact Employee(bob, 25, sales).
|
||||
fact Employee(carol, 35, engineering).
|
||||
fact Employee(dave, 28, marketing).
|
||||
|
||||
schema Employee(name, age, dept).
|
||||
|
||||
# Inequality: exclude engineering.
|
||||
sql SELECT name FROM Employee WHERE dept != 'engineering';
|
||||
|
||||
# OR: engineering or marketing.
|
||||
sql SELECT name, dept FROM Employee WHERE dept = 'engineering' OR dept = 'marketing';
|
||||
|
||||
# LIMIT: first two rows in name order.
|
||||
sql SELECT name FROM Employee ORDER BY name ASC LIMIT 2;
|
||||
|
||||
# Integer literal in projection.
|
||||
sql SELECT name, 1 AS active FROM Employee WHERE dept = 'sales';
|
||||
@ -64,6 +64,12 @@ pub enum ChaseVariant {
|
||||
/// rule with the same frontier variable bindings. This is the default.
|
||||
#[default]
|
||||
Restricted,
|
||||
/// Oblivious chase: fires every matching rule application without checking
|
||||
/// head satisfaction or tracking triggers. Terminates only when no body
|
||||
/// match produces a genuinely new fact. For rules with existential
|
||||
/// variables this variant will typically not terminate (it will hit the
|
||||
/// step limit) because each application generates fresh nulls.
|
||||
Oblivious,
|
||||
}
|
||||
|
||||
/// Configuration for the chase algorithm.
|
||||
@ -112,6 +118,26 @@ pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||
chase_with_config(instance, rules, config)
|
||||
}
|
||||
|
||||
/// Run the oblivious chase algorithm.
|
||||
///
|
||||
/// The oblivious chase fires every rule application whose body matches,
|
||||
/// without checking whether the head is already satisfied and without
|
||||
/// tracking previously applied triggers. It terminates only when a full
|
||||
/// round produces no new facts.
|
||||
///
|
||||
/// For Datalog rules (no existential variables) the oblivious chase
|
||||
/// reaches the same fixpoint as the restricted and standard variants.
|
||||
/// For rules with existential variables it will typically not terminate
|
||||
/// because each application generates fresh labeled nulls; in that case
|
||||
/// it will run until the step limit.
|
||||
pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||
let config = ChaseConfig {
|
||||
variant: ChaseVariant::Oblivious,
|
||||
..Default::default()
|
||||
};
|
||||
chase_with_config(instance, rules, config)
|
||||
}
|
||||
|
||||
/// Run the chase with custom configuration.
|
||||
pub fn chase_with_config(
|
||||
mut instance: Instance,
|
||||
@ -137,6 +163,7 @@ pub fn chase_with_config(
|
||||
ChaseVariant::Restricted => {
|
||||
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
|
||||
}
|
||||
ChaseVariant::Oblivious => oblivious_chase_step(&instance, rules, &mut null_gen),
|
||||
};
|
||||
|
||||
if new_facts.is_empty() {
|
||||
@ -231,6 +258,32 @@ fn restricted_chase_step(
|
||||
new_facts
|
||||
}
|
||||
|
||||
/// Perform a single oblivious chase step: fire all matching rule applications
|
||||
/// without checking head satisfaction or tracking triggers.
|
||||
fn oblivious_chase_step(
|
||||
instance: &Instance,
|
||||
rules: &[Rule],
|
||||
null_gen: &mut NullGenerator,
|
||||
) -> Vec<Atom> {
|
||||
let mut new_facts = Vec::new();
|
||||
|
||||
for rule in rules {
|
||||
let matches = find_matches(instance, &rule.body);
|
||||
|
||||
for subst in matches {
|
||||
let derived = apply_rule_head(rule, &subst, null_gen);
|
||||
|
||||
for fact in derived {
|
||||
if !instance.contains(&fact) {
|
||||
new_facts.push(fact);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_facts
|
||||
}
|
||||
|
||||
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct EgdTrigger {
|
||||
@ -329,6 +382,7 @@ pub fn chase_full(
|
||||
ChaseVariant::Restricted => {
|
||||
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
|
||||
}
|
||||
ChaseVariant::Oblivious => oblivious_chase_step(&instance, tgds, &mut null_gen),
|
||||
};
|
||||
|
||||
let tgd_changes = !new_facts.is_empty();
|
||||
@ -796,4 +850,117 @@ mod tests {
|
||||
assert!(result.error.is_none());
|
||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||
}
|
||||
|
||||
// Oblivious chase tests
|
||||
|
||||
#[test]
|
||||
fn test_oblivious_chase_datalog_rules() {
|
||||
let instance: Instance = vec![
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("bob")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("bob"), Term::constant("carol")],
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule1 = RuleBuilder::new()
|
||||
.when("Parent", vec![Term::var("X"), Term::var("Y")])
|
||||
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let rule2 = RuleBuilder::new()
|
||||
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
|
||||
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
|
||||
.build();
|
||||
|
||||
let result = oblivious_chase(instance, &[rule1, rule2]);
|
||||
|
||||
assert!(result.terminated);
|
||||
let ancestors = result.instance.facts_for_predicate("Ancestor");
|
||||
assert_eq!(ancestors.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oblivious_chase_matches_restricted_for_datalog() {
|
||||
let instance: Instance = vec![
|
||||
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
|
||||
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
|
||||
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule1 = RuleBuilder::new()
|
||||
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let rule2 = RuleBuilder::new()
|
||||
.when("Path", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
|
||||
.then("Path", vec![Term::var("X"), Term::var("Z")])
|
||||
.build();
|
||||
|
||||
let rules = vec![rule1, rule2];
|
||||
|
||||
let oblivious_result = oblivious_chase(instance.clone(), &rules);
|
||||
let restricted_result = chase(instance, &rules);
|
||||
|
||||
assert!(oblivious_result.terminated);
|
||||
assert!(restricted_result.terminated);
|
||||
|
||||
let oblivious_paths = oblivious_result.instance.facts_for_predicate("Path");
|
||||
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
|
||||
assert_eq!(oblivious_paths.len(), restricted_paths.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oblivious_chase_does_not_terminate_with_existentials() {
|
||||
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule = RuleBuilder::new()
|
||||
.when("Person", vec![Term::var("X")])
|
||||
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let config = ChaseConfig {
|
||||
max_steps: 10,
|
||||
variant: ChaseVariant::Oblivious,
|
||||
};
|
||||
let result = chase_with_config(instance, &[rule], config);
|
||||
|
||||
// The oblivious chase generates a fresh null each round, so it
|
||||
// should hit the step limit rather than reaching a fixpoint.
|
||||
assert!(!result.terminated);
|
||||
assert!(result.instance.facts_for_predicate("HasSSN").len() > 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oblivious_chase_via_config() {
|
||||
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule = RuleBuilder::new()
|
||||
.when("A", vec![Term::var("X")])
|
||||
.then("B", vec![Term::var("X")])
|
||||
.build();
|
||||
|
||||
let config = ChaseConfig {
|
||||
variant: ChaseVariant::Oblivious,
|
||||
..Default::default()
|
||||
};
|
||||
let result = chase_with_config(instance, &[rule], config);
|
||||
|
||||
assert!(result.terminated);
|
||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ mod engine;
|
||||
pub use atom::Atom;
|
||||
pub use engine::{
|
||||
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
|
||||
chase_with_egds, standard_chase,
|
||||
chase_with_egds, oblivious_chase, standard_chase,
|
||||
};
|
||||
pub use inference::{Derivation, MaterializedState, find_matches, materialize};
|
||||
pub use instance::{Instance, InstanceError};
|
||||
|
||||
@ -101,6 +101,11 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
|
||||
rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys));
|
||||
Ok(ResultSet::new(schema.clone(), rows))
|
||||
}
|
||||
LogicalPlan::Limit { input, count } => {
|
||||
let result = execute(input, instance)?;
|
||||
let rows = result.rows().iter().take(*count).cloned().collect();
|
||||
Ok(ResultSet::new(result.schema().clone(), rows))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,9 +118,16 @@ fn eval_predicate(
|
||||
LogicalExpr::Eq(left, right) => Ok(eval_expr(left, row, schema)?
|
||||
.sql_eq(&eval_expr(right, row, schema)?)
|
||||
.unwrap_or(false)),
|
||||
LogicalExpr::Ne(left, right) => Ok(eval_expr(left, row, schema)?
|
||||
.sql_eq(&eval_expr(right, row, schema)?)
|
||||
.map(|eq| !eq)
|
||||
.unwrap_or(false)),
|
||||
LogicalExpr::And(left, right) => {
|
||||
Ok(eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?)
|
||||
}
|
||||
LogicalExpr::Or(left, right) => {
|
||||
Ok(eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?)
|
||||
}
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
@ -138,9 +150,19 @@ fn eval_expr(
|
||||
let right = eval_expr(right, row, schema)?;
|
||||
Ok(Value::Boolean(left.sql_eq(&right).unwrap_or(false)))
|
||||
}
|
||||
LogicalExpr::Ne(left, right) => {
|
||||
let left = eval_expr(left, row, schema)?;
|
||||
let right = eval_expr(right, row, schema)?;
|
||||
Ok(Value::Boolean(
|
||||
left.sql_eq(&right).map(|eq| !eq).unwrap_or(false),
|
||||
))
|
||||
}
|
||||
LogicalExpr::And(left, right) => Ok(Value::Boolean(
|
||||
eval_predicate(left, row, schema)? && eval_predicate(right, row, schema)?,
|
||||
)),
|
||||
LogicalExpr::Or(left, right) => Ok(Value::Boolean(
|
||||
eval_predicate(left, row, schema)? || eval_predicate(right, row, schema)?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,7 +210,11 @@ fn compare_values(left: &Value, right: &Value) -> Ordering {
|
||||
(Value::Null, _) => Ordering::Greater,
|
||||
(_, Value::Null) => Ordering::Less,
|
||||
(Value::Text(left), Value::Text(right)) => left.cmp(right),
|
||||
(Value::Integer(left), Value::Integer(right)) => left.cmp(right),
|
||||
(Value::Boolean(left), Value::Boolean(right)) => left.cmp(right),
|
||||
// Cross-type ordering: Integer < Text < Boolean
|
||||
(Value::Integer(_), _) => Ordering::Less,
|
||||
(_, Value::Integer(_)) => Ordering::Greater,
|
||||
(Value::Text(_), Value::Boolean(_)) => Ordering::Less,
|
||||
(Value::Boolean(_), Value::Text(_)) => Ordering::Greater,
|
||||
}
|
||||
|
||||
@ -18,5 +18,5 @@ pub mod sql;
|
||||
// Lower-level reasoning and provenance APIs remain under `query_engine::chase`.
|
||||
pub use chase::{
|
||||
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term,
|
||||
chase, chase_with_config, standard_chase,
|
||||
chase, chase_with_config, oblivious_chase, standard_chase,
|
||||
};
|
||||
|
||||
@ -18,8 +18,12 @@ pub enum LogicalExpr {
|
||||
Literal(Value),
|
||||
/// Equality.
|
||||
Eq(Box<LogicalExpr>, Box<LogicalExpr>),
|
||||
/// Inequality.
|
||||
Ne(Box<LogicalExpr>, Box<LogicalExpr>),
|
||||
/// Boolean conjunction.
|
||||
And(Box<LogicalExpr>, Box<LogicalExpr>),
|
||||
/// Boolean disjunction.
|
||||
Or(Box<LogicalExpr>, Box<LogicalExpr>),
|
||||
}
|
||||
|
||||
/// A named output expression in a projection.
|
||||
@ -68,6 +72,11 @@ pub enum LogicalPlan {
|
||||
expressions: Vec<NamedExpr>,
|
||||
schema: Schema,
|
||||
},
|
||||
/// Limit the number of output rows.
|
||||
Limit {
|
||||
input: Box<LogicalPlan>,
|
||||
count: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl LogicalPlan {
|
||||
@ -79,6 +88,7 @@ impl LogicalPlan {
|
||||
Self::Filter { input, .. } => input.output_schema(),
|
||||
Self::Sort { schema, .. } => schema,
|
||||
Self::Project { schema, .. } => schema,
|
||||
Self::Limit { input, .. } => input.output_schema(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -80,11 +80,7 @@ pub fn plan_select(
|
||||
};
|
||||
}
|
||||
|
||||
if is_wildcard_projection(&select.projection) {
|
||||
let output_schema = plan.output_schema().clone();
|
||||
return maybe_apply_sort(plan, output_schema, &select.order_by, &select.from);
|
||||
}
|
||||
|
||||
if !is_wildcard_projection(&select.projection) {
|
||||
let mut expressions = Vec::new();
|
||||
let mut fields = Vec::new();
|
||||
for (index, item) in select.projection.iter().enumerate() {
|
||||
@ -94,7 +90,8 @@ pub fn plan_select(
|
||||
let output_name = alias
|
||||
.clone()
|
||||
.unwrap_or_else(|| default_projection_name(expr, index + 1));
|
||||
let (data_type, nullable) = projection_metadata(expr, &input_schema, &select.from)?;
|
||||
let (data_type, nullable) =
|
||||
projection_metadata(expr, &input_schema, &select.from)?;
|
||||
expressions.push(NamedExpr {
|
||||
name: output_name.clone(),
|
||||
expr: planned_expr,
|
||||
@ -105,14 +102,24 @@ pub fn plan_select(
|
||||
}
|
||||
}
|
||||
|
||||
let plan = LogicalPlan::Project {
|
||||
plan = LogicalPlan::Project {
|
||||
input: Box::new(plan),
|
||||
expressions,
|
||||
schema: Schema::new(fields),
|
||||
};
|
||||
}
|
||||
|
||||
let output_schema = plan.output_schema().clone();
|
||||
maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)
|
||||
plan = maybe_apply_sort(plan, output_schema, &select.order_by, &select.from)?;
|
||||
|
||||
if let Some(count) = select.limit {
|
||||
plan = LogicalPlan::Limit {
|
||||
input: Box::new(plan),
|
||||
count,
|
||||
};
|
||||
}
|
||||
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
fn is_wildcard_projection(items: &[SelectItem]) -> bool {
|
||||
@ -182,10 +189,18 @@ fn plan_expr(
|
||||
Box::new(plan_expr(left, schema, tables)?),
|
||||
Box::new(plan_expr(right, schema, tables)?),
|
||||
)),
|
||||
BinaryOp::Ne => Ok(LogicalExpr::Ne(
|
||||
Box::new(plan_expr(left, schema, tables)?),
|
||||
Box::new(plan_expr(right, schema, tables)?),
|
||||
)),
|
||||
BinaryOp::And => Ok(LogicalExpr::And(
|
||||
Box::new(plan_expr(left, schema, tables)?),
|
||||
Box::new(plan_expr(right, schema, tables)?),
|
||||
)),
|
||||
BinaryOp::Or => Ok(LogicalExpr::Or(
|
||||
Box::new(plan_expr(left, schema, tables)?),
|
||||
Box::new(plan_expr(right, schema, tables)?),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -226,6 +241,7 @@ fn maybe_apply_sort(
|
||||
fn plan_literal(literal: &Literal) -> Value {
|
||||
match literal {
|
||||
Literal::String(value) => Value::text(value.clone()),
|
||||
Literal::Integer(n) => Value::Integer(*n),
|
||||
Literal::Null => Value::Null,
|
||||
}
|
||||
}
|
||||
@ -245,6 +261,7 @@ fn projection_metadata(
|
||||
Ok((field.data_type().clone(), field.nullable()))
|
||||
}
|
||||
Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)),
|
||||
Expr::Literal(Literal::Integer(_)) => Ok((DataType::Integer, false)),
|
||||
Expr::Literal(Literal::Null) => Ok((DataType::Text, true)),
|
||||
Expr::Binary { .. } => Ok((DataType::Boolean, true)),
|
||||
}
|
||||
@ -550,6 +567,7 @@ mod tests {
|
||||
}],
|
||||
selection: None,
|
||||
order_by: Vec::new(),
|
||||
limit: None,
|
||||
};
|
||||
let error = plan_select(&malformed, &catalog).unwrap_err();
|
||||
assert_eq!(
|
||||
|
||||
@ -5,6 +5,8 @@ use std::fmt;
|
||||
pub enum DataType {
|
||||
/// UTF-8 text values.
|
||||
Text,
|
||||
/// 64-bit signed integer values.
|
||||
Integer,
|
||||
/// Boolean values.
|
||||
Boolean,
|
||||
}
|
||||
|
||||
@ -5,6 +5,8 @@ use std::fmt;
|
||||
pub enum Value {
|
||||
/// Textual data.
|
||||
Text(String),
|
||||
/// Integer data.
|
||||
Integer(i64),
|
||||
/// Boolean data.
|
||||
Boolean(bool),
|
||||
/// SQL-style null.
|
||||
@ -29,8 +31,9 @@ impl Value {
|
||||
match (self, other) {
|
||||
(Self::Null, _) | (_, Self::Null) => None,
|
||||
(Self::Text(left), Self::Text(right)) => Some(left == right),
|
||||
(Self::Integer(left), Self::Integer(right)) => Some(left == right),
|
||||
(Self::Boolean(left), Self::Boolean(right)) => Some(left == right),
|
||||
(Self::Text(_), Self::Boolean(_)) | (Self::Boolean(_), Self::Text(_)) => Some(false),
|
||||
_ => Some(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -39,6 +42,7 @@ impl fmt::Display for Value {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Text(value) => write!(f, "{}", value),
|
||||
Self::Integer(value) => write!(f, "{}", value),
|
||||
Self::Boolean(value) => write!(f, "{}", value),
|
||||
Self::Null => write!(f, "NULL"),
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/// A parsed `SELECT-FROM-WHERE-ORDER BY` statement in the current SQL subset.
|
||||
/// A parsed `SELECT-FROM-WHERE-ORDER BY-LIMIT` statement in the current SQL subset.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Select {
|
||||
/// Output expressions requested by the query.
|
||||
@ -9,6 +9,8 @@ pub struct Select {
|
||||
pub selection: Option<Expr>,
|
||||
/// Optional output ordering.
|
||||
pub order_by: Vec<OrderByItem>,
|
||||
/// Optional row limit.
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
/// One source entry in a `FROM` list.
|
||||
@ -58,6 +60,8 @@ pub enum Expr {
|
||||
pub enum Literal {
|
||||
/// A string literal.
|
||||
String(String),
|
||||
/// An integer literal.
|
||||
Integer(i64),
|
||||
/// The `NULL` literal.
|
||||
Null,
|
||||
}
|
||||
@ -67,8 +71,12 @@ pub enum Literal {
|
||||
pub enum BinaryOp {
|
||||
/// Equality.
|
||||
Eq,
|
||||
/// Inequality.
|
||||
Ne,
|
||||
/// Boolean conjunction.
|
||||
And,
|
||||
/// Boolean disjunction.
|
||||
Or,
|
||||
}
|
||||
|
||||
/// Sort direction for `ORDER BY`.
|
||||
|
||||
@ -43,16 +43,20 @@ enum Token {
|
||||
Where,
|
||||
As,
|
||||
And,
|
||||
Or,
|
||||
Order,
|
||||
By,
|
||||
Asc,
|
||||
Desc,
|
||||
Null,
|
||||
Limit,
|
||||
Identifier(String),
|
||||
String(String),
|
||||
Integer(usize),
|
||||
Star,
|
||||
Comma,
|
||||
Eq,
|
||||
Ne,
|
||||
}
|
||||
|
||||
/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset.
|
||||
@ -91,6 +95,13 @@ impl Parser {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let limit = if self.peek() == Some(&Token::Limit) {
|
||||
self.index += 1;
|
||||
Some(self.expect_integer()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(token) = self.peek() {
|
||||
return Err(ParseError::UnexpectedToken(render_token(token)));
|
||||
}
|
||||
@ -100,6 +111,7 @@ impl Parser {
|
||||
from,
|
||||
selection,
|
||||
order_by,
|
||||
limit,
|
||||
})
|
||||
}
|
||||
|
||||
@ -166,6 +178,22 @@ impl Parser {
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
|
||||
let mut expr = self.parse_and()?;
|
||||
|
||||
while self.peek() == Some(&Token::Or) {
|
||||
self.index += 1;
|
||||
let right = self.parse_and()?;
|
||||
expr = Expr::Binary {
|
||||
left: Box::new(expr),
|
||||
op: BinaryOp::Or,
|
||||
right: Box::new(right),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_and(&mut self) -> Result<Expr, ParseError> {
|
||||
let mut expr = self.parse_equality()?;
|
||||
|
||||
while self.peek() == Some(&Token::And) {
|
||||
@ -220,6 +248,14 @@ impl Parser {
|
||||
right: Box::new(right),
|
||||
})
|
||||
}
|
||||
Token::Ne => {
|
||||
let right = self.parse_operand()?;
|
||||
Ok(Expr::Binary {
|
||||
left: Box::new(left),
|
||||
op: BinaryOp::Ne,
|
||||
right: Box::new(right),
|
||||
})
|
||||
}
|
||||
other => Err(ParseError::UnexpectedToken(render_token(&other))),
|
||||
}
|
||||
}
|
||||
@ -228,6 +264,7 @@ impl Parser {
|
||||
match self.next().ok_or(ParseError::UnexpectedEnd)? {
|
||||
Token::Identifier(name) => Ok(Expr::Identifier(name)),
|
||||
Token::String(value) => Ok(Expr::Literal(Literal::String(value))),
|
||||
Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n as i64))),
|
||||
Token::Null => Ok(Expr::Literal(Literal::Null)),
|
||||
other => Err(ParseError::UnexpectedToken(render_token(&other))),
|
||||
}
|
||||
@ -249,6 +286,13 @@ impl Parser {
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_integer(&mut self) -> Result<usize, ParseError> {
|
||||
match self.next().ok_or(ParseError::UnexpectedEnd)? {
|
||||
Token::Integer(n) => Ok(n),
|
||||
other => Err(ParseError::UnexpectedToken(render_token(&other))),
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&self) -> Option<&Token> {
|
||||
self.tokens.get(self.index)
|
||||
}
|
||||
@ -281,11 +325,33 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
|
||||
chars.next();
|
||||
tokens.push(Token::Comma);
|
||||
}
|
||||
'!' => {
|
||||
chars.next();
|
||||
if chars.peek() == Some(&'=') {
|
||||
chars.next();
|
||||
tokens.push(Token::Ne);
|
||||
} else {
|
||||
return Err(ParseError::UnexpectedToken("!".to_string()));
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
chars.next();
|
||||
if chars.peek() == Some(&'>') {
|
||||
chars.next();
|
||||
tokens.push(Token::Ne);
|
||||
} else {
|
||||
return Err(ParseError::UnexpectedToken("<".to_string()));
|
||||
}
|
||||
}
|
||||
'=' => {
|
||||
chars.next();
|
||||
tokens.push(Token::Eq);
|
||||
}
|
||||
'\'' => tokens.push(Token::String(parse_string(&mut chars)?)),
|
||||
ch if ch.is_ascii_digit() => {
|
||||
let number = parse_integer(&mut chars);
|
||||
tokens.push(Token::Integer(number));
|
||||
}
|
||||
ch if is_identifier_start(ch) => {
|
||||
let ident = parse_identifier(&mut chars);
|
||||
let token = match ident.to_ascii_uppercase().as_str() {
|
||||
@ -294,11 +360,13 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
|
||||
"WHERE" => Token::Where,
|
||||
"AS" => Token::As,
|
||||
"AND" => Token::And,
|
||||
"OR" => Token::Or,
|
||||
"ORDER" => Token::Order,
|
||||
"BY" => Token::By,
|
||||
"ASC" => Token::Asc,
|
||||
"DESC" => Token::Desc,
|
||||
"NULL" => Token::Null,
|
||||
"LIMIT" => Token::Limit,
|
||||
_ => Token::Identifier(ident),
|
||||
};
|
||||
tokens.push(token);
|
||||
@ -351,8 +419,24 @@ where
|
||||
ident
|
||||
}
|
||||
|
||||
fn parse_integer<I>(chars: &mut std::iter::Peekable<I>) -> usize
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
let mut value: usize = 0;
|
||||
while let Some(ch) = chars.peek().copied() {
|
||||
if ch.is_ascii_digit() {
|
||||
value = value * 10 + (ch as usize - '0' as usize);
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
fn is_identifier_start(ch: char) -> bool {
|
||||
ch.is_ascii_alphanumeric() || ch == '_'
|
||||
ch.is_ascii_alphabetic() || ch == '_'
|
||||
}
|
||||
|
||||
fn is_identifier_part(ch: char) -> bool {
|
||||
@ -366,16 +450,20 @@ fn render_token(token: &Token) -> String {
|
||||
Token::Where => "WHERE".to_string(),
|
||||
Token::As => "AS".to_string(),
|
||||
Token::And => "AND".to_string(),
|
||||
Token::Or => "OR".to_string(),
|
||||
Token::Order => "ORDER".to_string(),
|
||||
Token::By => "BY".to_string(),
|
||||
Token::Asc => "ASC".to_string(),
|
||||
Token::Desc => "DESC".to_string(),
|
||||
Token::Null => "NULL".to_string(),
|
||||
Token::Limit => "LIMIT".to_string(),
|
||||
Token::Identifier(name) => name.clone(),
|
||||
Token::Integer(n) => n.to_string(),
|
||||
Token::String(value) => format!("'{}'", value),
|
||||
Token::Star => "*".to_string(),
|
||||
Token::Comma => ",".to_string(),
|
||||
Token::Eq => "=".to_string(),
|
||||
Token::Ne => "!=".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -536,4 +624,115 @@ mod tests {
|
||||
let error = parse_select("SELECT *, c0 FROM Parent").unwrap_err();
|
||||
assert_eq!(error, ParseError::MixedWildcardProjection);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_not_equal_with_bang_eq() {
|
||||
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
select.selection,
|
||||
Some(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c1".to_string())),
|
||||
op: BinaryOp::Ne,
|
||||
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_not_equal_with_diamond() {
|
||||
let select = parse_select("SELECT c0 FROM Parent WHERE c1 <> 'bob'").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
select.selection,
|
||||
Some(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c1".to_string())),
|
||||
op: BinaryOp::Ne,
|
||||
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_or_expression() {
|
||||
let select =
|
||||
parse_select("SELECT c0 FROM Parent WHERE c0 = 'alice' OR c0 = 'bob'").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
select.selection,
|
||||
Some(Expr::Binary {
|
||||
left: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c0".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::String("alice".to_string()))),
|
||||
}),
|
||||
op: BinaryOp::Or,
|
||||
right: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c0".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
|
||||
}),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_integer_literal_in_expression() {
|
||||
let select = parse_select("SELECT c0 FROM Parent WHERE c0 = 42").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
select.selection,
|
||||
Some(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c0".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::Integer(42))),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_limit_clause() {
|
||||
let select = parse_select("SELECT c0 FROM Parent LIMIT 5").unwrap();
|
||||
assert_eq!(select.limit, Some(5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_order_by_with_limit() {
|
||||
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC LIMIT 1").unwrap();
|
||||
assert_eq!(select.order_by.len(), 1);
|
||||
assert_eq!(select.limit, Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_or_with_and_precedence() {
|
||||
// AND binds tighter than OR: a = '1' OR b = '2' AND c = '3'
|
||||
// should parse as: a = '1' OR (b = '2' AND c = '3')
|
||||
let select =
|
||||
parse_select("SELECT c0 FROM Parent WHERE c0 = '1' OR c1 = '2' AND c0 = '3'").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
select.selection,
|
||||
Some(Expr::Binary {
|
||||
left: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c0".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::String("1".to_string()))),
|
||||
}),
|
||||
op: BinaryOp::Or,
|
||||
right: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c1".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::String("2".to_string()))),
|
||||
}),
|
||||
op: BinaryOp::And,
|
||||
right: Box::new(Expr::Binary {
|
||||
left: Box::new(Expr::Identifier("c0".to_string())),
|
||||
op: BinaryOp::Eq,
|
||||
right: Box::new(Expr::Literal(Literal::String("3".to_string()))),
|
||||
}),
|
||||
}),
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -250,3 +250,82 @@ fn select_order_by_desc_sorts_rows() {
|
||||
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
|
||||
assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_integer_literal_in_projection() {
|
||||
let instance = parent_instance();
|
||||
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||
let select = parse_select("SELECT c0, 42 AS answer FROM Parent").unwrap();
|
||||
|
||||
let plan = plan_select(&select, &catalog).unwrap();
|
||||
let result = execute(&plan, &instance).unwrap();
|
||||
|
||||
assert_eq!(result.schema().len(), 2);
|
||||
assert_eq!(result.schema().fields()[1].name(), "answer");
|
||||
assert_eq!(
|
||||
result.schema().fields()[1].data_type(),
|
||||
&query_engine::relational::DataType::Integer
|
||||
);
|
||||
assert_eq!(result.rows().len(), 2);
|
||||
assert_eq!(format!("{}", result.rows()[0].values()[1]), "42");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_limit_restricts_row_count() {
|
||||
let instance = parent_instance();
|
||||
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 ASC LIMIT 1").unwrap();
|
||||
|
||||
let plan = plan_select(&select, &catalog).unwrap();
|
||||
let result = execute(&plan, &instance).unwrap();
|
||||
|
||||
assert_eq!(result.rows().len(), 1);
|
||||
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_where_or_matches_either_condition() {
|
||||
let instance: Instance = vec![
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("bob")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("bob"), Term::constant("carol")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("carol"), Term::constant("dave")],
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' OR c1 = 'dave'").unwrap();
|
||||
|
||||
let plan = plan_select(&select, &catalog).unwrap();
|
||||
let result = execute(&plan, &instance).unwrap();
|
||||
|
||||
assert_eq!(result.rows().len(), 2);
|
||||
let mut values = result
|
||||
.rows()
|
||||
.iter()
|
||||
.map(|row| format!("{}", row.values()[0]))
|
||||
.collect::<Vec<_>>();
|
||||
values.sort();
|
||||
assert_eq!(values, vec!["alice".to_string(), "carol".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_where_not_equal_excludes_matching_rows() {
|
||||
let instance = parent_instance();
|
||||
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||
let select = parse_select("SELECT c0 FROM Parent WHERE c1 != 'bob'").unwrap();
|
||||
|
||||
let plan = plan_select(&select, &catalog).unwrap();
|
||||
let result = execute(&plan, &instance).unwrap();
|
||||
|
||||
assert_eq!(result.rows().len(), 1);
|
||||
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user