From b7c90a18deb5d84b14fb243c550132ab9869d367 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Tue, 14 Apr 2026 10:26:36 +0200 Subject: [PATCH] WIP --- AGENTS.md | 6 +- README.md | 10 +-- examples/scripts/README.md | 4 + src/chase/engine.rs | 4 +- src/chase/inference.rs | 144 ++++++++++++++++++++++++++++++++++-- src/chase/stratification.rs | 30 ++++++++ src/execution/mod.rs | 49 ++++++++++++ src/frontend/highlight.rs | 2 +- src/frontend/language.rs | 15 ++++ 9 files changed, 247 insertions(+), 17 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0c3705f..a105d8c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,7 +6,7 @@ This file provides guidance to coding agents collaborating on this repository. Query Engine is an experimental Rust project for building query-engine components. The current implementation is centered on a chase-based reasoning -core, lightweight interactive frontends, and an early relational/SQL scaffold. +core, interactive frontends, and an early relational/SQL scaffold. Priorities, in order: @@ -55,7 +55,7 @@ Quick examples: - `inference.rs`: shared matching, negation filtering, and provenance-aware materialization helpers. - `stratification.rs`: stratification analysis for rules with negation. - `union_find.rs`: equality merging support. -- `src/frontend/`: lightweight interactive surface for scripts, REPL, local web UI, TUI (behind `tui` feature), and syntax highlighting. +- `src/frontend/`: interactive surface for scripts, REPL, local web UI, TUI (behind `tui` feature), and syntax highlighting. - `src/relational/`: schemas, values, rows, and result sets for relational execution. - `src/catalog/`: predicate-to-table schema inference and catalog access. - `src/io/`: CSV-based fact import and export. @@ -76,7 +76,7 @@ Quick examples: - The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals; `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates with optional `GROUP BY`. - Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`. - Single-table SQL queries may use the table name as a qualifier when no alias is present. -- Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented. +- Do not describe unsupported SQL features (such as subqueries, window functions, or arbitrary expressions) as implemented. - The executor operates on the `DataSource` trait, not on `Instance` directly. `Instance` and `TableStore` are the two built-in implementations. - Relational and SQL modules should build on explicit schemas and logical plans, not call frontend helpers directly. - If you add parser, planner, or executor layers, keep their responsibilities separate. diff --git a/README.md b/README.md index 0275da4..ca4ae8e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ An experimental Rust project for building query-engine components. -Right now the repository is centered on a chase-based reasoning core, a small +Right now the repository is centered on a chase-based reasoning core, an interactive frontend, and an early relational/SQL scaffold. The broader target shape is a query engine with clearer front-end, planning, optimization, and execution boundaries. @@ -15,8 +15,8 @@ execution boundaries. - Provenance-oriented explanations for derived answers - Script, REPL, local web UI, and optional TUI for experimentation (all with syntax highlighting) - Relational schema, catalog, logical-plan, and execution scaffolding -- Physical operator scaffolding with a small rule-based rewrite layer -- A minimal SQL slice for `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` queries over predicate-backed tables, including `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates +- Physical operator scaffolding with a rule-based rewrite layer +- A SQL slice for `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` queries over predicate-backed tables, including `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates - Filter push-down across joins in the physical rewrite pass ### Architecture @@ -28,7 +28,7 @@ The repository is currently organized around a few clear subsystems: - `src/frontend/`: REPL, script, GUI, and explanation rendering - `src/relational/`: schemas, values, rows, and result sets - `src/catalog/`: predicate-backed table metadata -- `src/sql/`: minimal SQL AST and parser +- `src/sql/`: SQL AST and parser - `src/planner/`: logical plan structures and SQL-to-plan translation - `src/execution/`: execution for the current logical-plan subset, the `DataSource` trait, the `TableStore`, and a physical operator layer with rule-based rewrites @@ -124,7 +124,7 @@ The repository now has a narrow SQL pipeline with: - predicate-backed catalog inference - relational schemas, rows, and values -- SQL parsing for a small subset +- SQL parsing for the supported subset - logical planning - execution for filtering, ordering, limiting, and basic multi-table joins diff --git a/examples/scripts/README.md b/examples/scripts/README.md index c355a22..e8c83dd 100644 --- a/examples/scripts/README.md +++ b/examples/scripts/README.md @@ -11,8 +11,12 @@ Available examples: - `ancestor.ech`: transitive closure over `Parent/2` - `employee_departments.ech`: existential rule that creates labeled nulls +- `negation.ech`: stratified negation-as-failure with `NOT` in rule bodies - `same_team.ech`: conjunctive query with a self-join +- `skolem_chase.ech`: Skolem chase with deterministic labeled nulls +- `sql_aggregate.ech`: `GROUP BY` with `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` - `sql_basic.ech`: named-column filtering in the SQL frontend +- `sql_filter_ops.ech`: inequality, `OR`, `LIMIT`, and integer literals - `sql_join.ech`: multi-table SQL join over predicate-backed tables - `sql_self_join.ech`: self-join with SQL table aliases - `sql_order_by.ech`: ordered SQL output with `ORDER BY` diff --git a/src/chase/engine.rs b/src/chase/engine.rs index 8146aba..ef9abc9 100644 --- a/src/chase/engine.rs +++ b/src/chase/engine.rs @@ -205,12 +205,12 @@ pub fn chase_stratified(instance: Instance, rules: &[Rule], config: ChaseConfig) total_steps += result.steps; current_instance = result.instance; - if let Some(error) = result.error { + if !result.terminated || result.error.is_some() { return ChaseResult { instance: current_instance, steps: total_steps, terminated: false, - error: Some(error), + error: result.error, }; } } diff --git a/src/chase/inference.rs b/src/chase/inference.rs index bd0bf4c..50e0640 100644 --- a/src/chase/inference.rs +++ b/src/chase/inference.rs @@ -137,6 +137,67 @@ struct PendingFact { } pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState { + let has_negation = rules.iter().any(|r| r.has_negation()); + if has_negation { + return materialize_stratified(base_instance, rules); + } + materialize_flat(base_instance, rules) +} + +fn materialize_stratified(base_instance: Instance, rules: &[Rule]) -> MaterializedState { + use super::stratification::stratify; + + let strata = match stratify(rules) { + Ok(s) => s, + Err(_) => { + // Unstratifiable: fall back to flat materialization. + return materialize_flat(base_instance, rules); + } + }; + + let mut instance = base_instance; + let mut provenance: HashMap = instance + .iter() + .cloned() + .map(|fact| (fact, Derivation::Input)) + .collect(); + let mut total_steps = 0; + + for stratum_indexes in &strata { + let stratum_rules: Vec = stratum_indexes.iter().map(|&i| rules[i].clone()).collect(); + + let state = materialize_flat(instance, &stratum_rules); + total_steps += state.result.steps; + instance = state.result.instance; + for (atom, derivation) in state.provenance { + provenance.entry(atom).or_insert(derivation); + } + + if !state.result.terminated { + return MaterializedState { + result: ChaseResult { + instance, + steps: total_steps, + terminated: false, + error: state.result.error, + }, + provenance, + }; + } + } + + MaterializedState { + result: ChaseResult { + instance, + steps: total_steps, + terminated: true, + error: None, + }, + provenance, + } +} + +fn materialize_flat(base_instance: Instance, rules: &[Rule]) -> MaterializedState { let mut instance = base_instance; let mut provenance = instance .iter() @@ -289,10 +350,11 @@ impl MaterializedState { /// Filter a set of body-match substitutions against negated atoms. /// -/// A substitution is removed if, after applying it to any negated atom, the -/// resulting ground atom exists in the instance. This implements -/// negation-as-failure semantics: the negated atom must be absent for the -/// rule to fire. +/// A substitution is removed if, after applying it to any negated atom, any +/// matching ground fact exists in the instance. When the substitution +/// leaves unbound variables in a negated atom, the check uses pattern +/// matching against the instance (existential semantics: the negated atom +/// blocks if any witness exists). pub(crate) fn filter_negated( instance: &Instance, results: Vec, @@ -305,8 +367,14 @@ pub(crate) fn filter_negated( .into_iter() .filter(|subst| { negated_body.iter().all(|atom| { - let ground = subst.apply_atom(atom); - !instance.contains(&ground) + let applied = subst.apply_atom(atom); + if applied.is_ground() { + !instance.contains(&applied) + } else { + // Unbound variables remain: check whether any matching + // fact exists via pattern matching. + instance.facts_matching_pattern(&applied).is_empty() + } }) }) .collect() @@ -441,3 +509,67 @@ fn term_null_id(term: &Term) -> Option { _ => None, } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn filter_negated_blocks_when_fact_present() { + let instance: Instance = vec![ + Atom::new("A", vec![Term::constant("x")]), + Atom::new("B", vec![Term::constant("x")]), + ] + .into_iter() + .collect(); + + let mut subst = Substitution::new(); + subst.bind("X".into(), Term::constant("x")); + + let negated = vec![Atom::new("B", vec![Term::var("X")])]; + let result = filter_negated(&instance, vec![subst], &negated); + assert!(result.is_empty()); + } + + #[test] + fn filter_negated_passes_when_fact_absent() { + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + let mut subst = Substitution::new(); + subst.bind("X".into(), Term::constant("x")); + + let negated = vec![Atom::new("B", vec![Term::var("X")])]; + let result = filter_negated(&instance, vec![subst], &negated); + assert_eq!(result.len(), 1); + } + + #[test] + fn filter_negated_handles_unbound_variables() { + // NOT Q(X, Y) where Y is not bound. Should block if any Q(x, _) exists. + let instance: Instance = vec![ + Atom::new("A", vec![Term::constant("x")]), + Atom::new("Q", vec![Term::constant("x"), Term::constant("anything")]), + ] + .into_iter() + .collect(); + + let mut subst = Substitution::new(); + subst.bind("X".into(), Term::constant("x")); + // Y is NOT bound + + let negated = vec![Atom::new("Q", vec![Term::var("X"), Term::var("Y")])]; + let result = filter_negated(&instance, vec![subst], &negated); + // Should be blocked because Q(x, anything) exists + assert!(result.is_empty()); + } + + #[test] + fn filter_negated_passes_empty_negated_body() { + let instance = Instance::new(); + let subst = Substitution::new(); + let result = filter_negated(&instance, vec![subst], &[]); + assert_eq!(result.len(), 1); + } +} diff --git a/src/chase/stratification.rs b/src/chase/stratification.rs index 84f055c..f7208d0 100644 --- a/src/chase/stratification.rs +++ b/src/chase/stratification.rs @@ -47,6 +47,22 @@ impl Error for StratificationError {} /// dependencies) upward. Rules without negation all land in stratum 0 when /// there are no dependency chains through negation. pub fn stratify(rules: &[Rule]) -> Result>, StratificationError> { + // Reject rules that have negated body atoms but no positive body atoms. + // Such rules cannot anchor to the instance and are not supported by + // standard stratified evaluation. + for rule in rules { + if rule.body.is_empty() && !rule.negated_body.is_empty() { + let preds: Vec = rule + .negated_body + .iter() + .map(|a| a.predicate.clone()) + .collect(); + return Err(StratificationError { + cycle_predicates: preds, + }); + } + } + // Collect all predicates. let mut all_predicates: HashSet = HashSet::new(); for rule in rules { @@ -219,4 +235,18 @@ mod tests { let result = stratify(&rules); assert!(result.is_err()); } + + #[test] + fn rejects_rule_with_only_negated_body() { + // NOT A(X) -> B(X) has no positive body atoms. + let rules = vec![ + RuleBuilder::new() + .when_not("A", vec![Term::var("X")]) + .then("B", vec![Term::var("X")]) + .build(), + ]; + + let result = stratify(&rules); + assert!(result.is_err()); + } } diff --git a/src/execution/mod.rs b/src/execution/mod.rs index bf0a19a..00616fa 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -485,4 +485,53 @@ mod tests { let result = instance.scan("Missing", &schema).unwrap(); assert_eq!(result.rows().len(), 0); } + + #[test] + fn value_from_term_parses_integer_constants() { + assert_eq!( + value_from_term(&Term::constant("42")).unwrap(), + Value::Integer(42) + ); + assert_eq!( + value_from_term(&Term::constant("alice")).unwrap(), + Value::text("alice") + ); + assert_eq!(value_from_term(&Term::Null(0)).unwrap(), Value::Null); + assert!(value_from_term(&Term::var("X")).is_err()); + } + + #[test] + fn aggregate_execution_count_and_sum() { + let schema = Schema::new(vec![ + Field::new("dept", DataType::Text, false), + Field::new("salary", DataType::Integer, false), + ]); + let rows = vec![ + Row::new(vec![Value::text("eng"), Value::Integer(100)]), + Row::new(vec![Value::text("eng"), Value::Integer(200)]), + Row::new(vec![Value::text("sales"), Value::Integer(50)]), + ]; + + let aggregates = vec![ + PlanAggregateExpr { + name: "__agg_0".into(), + func: AggregateFunc::Count, + arg: None, + }, + PlanAggregateExpr { + name: "__agg_1".into(), + func: AggregateFunc::Sum, + arg: Some("salary".into()), + }, + ]; + let result = compute_aggregate(&rows, &schema, &["dept".into()], &aggregates).unwrap(); + assert_eq!(result.len(), 2); + // Each row: [dept, count, sum] + let eng = result + .iter() + .find(|r| r.values()[0] == Value::text("eng")) + .unwrap(); + assert_eq!(eng.values()[1], Value::Integer(2)); + assert_eq!(eng.values()[2], Value::Integer(300)); + } } diff --git a/src/frontend/highlight.rs b/src/frontend/highlight.rs index a95d00d..52b7919 100644 --- a/src/frontend/highlight.rs +++ b/src/frontend/highlight.rs @@ -118,7 +118,7 @@ pub fn highlight_line(line: &str) -> Vec { continue; } - // Variable (?X) — must be checked before the single-char operator set + // Variable (?X): must be checked before the single-char operator set // since `?` also appears there for standalone query terminators. if ch == '?' && i + 1 < len && (chars[i + 1].is_alphanumeric() || chars[i + 1] == '_') { let start = i; diff --git a/src/frontend/language.rs b/src/frontend/language.rs index 1635664..124ce6c 100644 --- a/src/frontend/language.rs +++ b/src/frontend/language.rs @@ -598,4 +598,19 @@ mod tests { let error = parse_script("help\nbogus\nrun.").unwrap_err(); assert!(error.contains("line 2")); } + + #[test] + fn parse_rule_with_negation() { + let command = parse_command("rule Node(?X), NOT Connected(?X) -> Isolated(?X).").unwrap(); + match command { + Command::Rule(rule) => { + assert_eq!(rule.body.len(), 1); + assert_eq!(rule.body[0].predicate, "Node"); + assert_eq!(rule.negated_body.len(), 1); + assert_eq!(rule.negated_body[0].predicate, "Connected"); + assert_eq!(rule.head.len(), 1); + } + other => panic!("unexpected command: {:?}", other), + } + } }