This commit is contained in:
Hassan Abedi 2026-04-14 10:26:36 +02:00
parent 5b52a45b81
commit b7c90a18de
9 changed files with 247 additions and 17 deletions

View File

@ -6,7 +6,7 @@ This file provides guidance to coding agents collaborating on this repository.
Query Engine is an experimental Rust project for building query-engine
components. The current implementation is centered on a chase-based reasoning
core, lightweight interactive frontends, and an early relational/SQL scaffold.
core, interactive frontends, and an early relational/SQL scaffold.
Priorities, in order:
@ -55,7 +55,7 @@ Quick examples:
- `inference.rs`: shared matching, negation filtering, and provenance-aware materialization helpers.
- `stratification.rs`: stratification analysis for rules with negation.
- `union_find.rs`: equality merging support.
- `src/frontend/`: lightweight interactive surface for scripts, REPL, local web UI, TUI (behind `tui` feature), and syntax highlighting.
- `src/frontend/`: interactive surface for scripts, REPL, local web UI, TUI (behind `tui` feature), and syntax highlighting.
- `src/relational/`: schemas, values, rows, and result sets for relational execution.
- `src/catalog/`: predicate-to-table schema inference and catalog access.
- `src/io/`: CSV-based fact import and export.
@ -76,7 +76,7 @@ Quick examples:
- The current SQL support is intentionally narrow: `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` over predicate-backed tables; equality and inequality predicates combined with `AND` and `OR`; comma-join style multi-table queries; table aliases; ordering by output-column names; integer and string literals; `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates with optional `GROUP BY`.
- Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`.
- Single-table SQL queries may use the table name as a qualifier when no alias is present.
- Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented.
- Do not describe unsupported SQL features (such as subqueries, window functions, or arbitrary expressions) as implemented.
- The executor operates on the `DataSource` trait, not on `Instance` directly. `Instance` and `TableStore` are the two built-in implementations.
- Relational and SQL modules should build on explicit schemas and logical plans, not call frontend helpers directly.
- If you add parser, planner, or executor layers, keep their responsibilities separate.

View File

@ -2,7 +2,7 @@
An experimental Rust project for building query-engine components.
Right now the repository is centered on a chase-based reasoning core, a small
Right now the repository is centered on a chase-based reasoning core, an
interactive frontend, and an early relational/SQL scaffold. The broader target
shape is a query engine with clearer front-end, planning, optimization, and
execution boundaries.
@ -15,8 +15,8 @@ execution boundaries.
- Provenance-oriented explanations for derived answers
- Script, REPL, local web UI, and optional TUI for experimentation (all with syntax highlighting)
- Relational schema, catalog, logical-plan, and execution scaffolding
- Physical operator scaffolding with a small rule-based rewrite layer
- A minimal SQL slice for `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` queries over predicate-backed tables, including `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates
- Physical operator scaffolding with a rule-based rewrite layer
- A SQL slice for `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` queries over predicate-backed tables, including `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG` aggregates
- Filter push-down across joins in the physical rewrite pass
### Architecture
@ -28,7 +28,7 @@ The repository is currently organized around a few clear subsystems:
- `src/frontend/`: REPL, script, GUI, and explanation rendering
- `src/relational/`: schemas, values, rows, and result sets
- `src/catalog/`: predicate-backed table metadata
- `src/sql/`: minimal SQL AST and parser
- `src/sql/`: SQL AST and parser
- `src/planner/`: logical plan structures and SQL-to-plan translation
- `src/execution/`: execution for the current logical-plan subset, the `DataSource` trait, the `TableStore`, and a physical operator layer with rule-based rewrites
@ -124,7 +124,7 @@ The repository now has a narrow SQL pipeline with:
- predicate-backed catalog inference
- relational schemas, rows, and values
- SQL parsing for a small subset
- SQL parsing for the supported subset
- logical planning
- execution for filtering, ordering, limiting, and basic multi-table joins

View File

@ -11,8 +11,12 @@ Available examples:
- `ancestor.ech`: transitive closure over `Parent/2`
- `employee_departments.ech`: existential rule that creates labeled nulls
- `negation.ech`: stratified negation-as-failure with `NOT` in rule bodies
- `same_team.ech`: conjunctive query with a self-join
- `skolem_chase.ech`: Skolem chase with deterministic labeled nulls
- `sql_aggregate.ech`: `GROUP BY` with `COUNT`, `SUM`, `MIN`, `MAX`, and `AVG`
- `sql_basic.ech`: named-column filtering in the SQL frontend
- `sql_filter_ops.ech`: inequality, `OR`, `LIMIT`, and integer literals
- `sql_join.ech`: multi-table SQL join over predicate-backed tables
- `sql_self_join.ech`: self-join with SQL table aliases
- `sql_order_by.ech`: ordered SQL output with `ORDER BY`

View File

@ -205,12 +205,12 @@ pub fn chase_stratified(instance: Instance, rules: &[Rule], config: ChaseConfig)
total_steps += result.steps;
current_instance = result.instance;
if let Some(error) = result.error {
if !result.terminated || result.error.is_some() {
return ChaseResult {
instance: current_instance,
steps: total_steps,
terminated: false,
error: Some(error),
error: result.error,
};
}
}

View File

@ -137,6 +137,67 @@ struct PendingFact {
}
pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState {
let has_negation = rules.iter().any(|r| r.has_negation());
if has_negation {
return materialize_stratified(base_instance, rules);
}
materialize_flat(base_instance, rules)
}
fn materialize_stratified(base_instance: Instance, rules: &[Rule]) -> MaterializedState {
use super::stratification::stratify;
let strata = match stratify(rules) {
Ok(s) => s,
Err(_) => {
// Unstratifiable: fall back to flat materialization.
return materialize_flat(base_instance, rules);
}
};
let mut instance = base_instance;
let mut provenance: HashMap<Atom, Derivation> = instance
.iter()
.cloned()
.map(|fact| (fact, Derivation::Input))
.collect();
let mut total_steps = 0;
for stratum_indexes in &strata {
let stratum_rules: Vec<Rule> = stratum_indexes.iter().map(|&i| rules[i].clone()).collect();
let state = materialize_flat(instance, &stratum_rules);
total_steps += state.result.steps;
instance = state.result.instance;
for (atom, derivation) in state.provenance {
provenance.entry(atom).or_insert(derivation);
}
if !state.result.terminated {
return MaterializedState {
result: ChaseResult {
instance,
steps: total_steps,
terminated: false,
error: state.result.error,
},
provenance,
};
}
}
MaterializedState {
result: ChaseResult {
instance,
steps: total_steps,
terminated: true,
error: None,
},
provenance,
}
}
fn materialize_flat(base_instance: Instance, rules: &[Rule]) -> MaterializedState {
let mut instance = base_instance;
let mut provenance = instance
.iter()
@ -289,10 +350,11 @@ impl MaterializedState {
/// Filter a set of body-match substitutions against negated atoms.
///
/// A substitution is removed if, after applying it to any negated atom, the
/// resulting ground atom exists in the instance. This implements
/// negation-as-failure semantics: the negated atom must be absent for the
/// rule to fire.
/// A substitution is removed if, after applying it to any negated atom, any
/// matching ground fact exists in the instance. When the substitution
/// leaves unbound variables in a negated atom, the check uses pattern
/// matching against the instance (existential semantics: the negated atom
/// blocks if any witness exists).
pub(crate) fn filter_negated(
instance: &Instance,
results: Vec<Substitution>,
@ -305,8 +367,14 @@ pub(crate) fn filter_negated(
.into_iter()
.filter(|subst| {
negated_body.iter().all(|atom| {
let ground = subst.apply_atom(atom);
!instance.contains(&ground)
let applied = subst.apply_atom(atom);
if applied.is_ground() {
!instance.contains(&applied)
} else {
// Unbound variables remain: check whether any matching
// fact exists via pattern matching.
instance.facts_matching_pattern(&applied).is_empty()
}
})
})
.collect()
@ -441,3 +509,67 @@ fn term_null_id(term: &Term) -> Option<usize> {
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn filter_negated_blocks_when_fact_present() {
let instance: Instance = vec![
Atom::new("A", vec![Term::constant("x")]),
Atom::new("B", vec![Term::constant("x")]),
]
.into_iter()
.collect();
let mut subst = Substitution::new();
subst.bind("X".into(), Term::constant("x"));
let negated = vec![Atom::new("B", vec![Term::var("X")])];
let result = filter_negated(&instance, vec![subst], &negated);
assert!(result.is_empty());
}
#[test]
fn filter_negated_passes_when_fact_absent() {
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
.into_iter()
.collect();
let mut subst = Substitution::new();
subst.bind("X".into(), Term::constant("x"));
let negated = vec![Atom::new("B", vec![Term::var("X")])];
let result = filter_negated(&instance, vec![subst], &negated);
assert_eq!(result.len(), 1);
}
#[test]
fn filter_negated_handles_unbound_variables() {
// NOT Q(X, Y) where Y is not bound. Should block if any Q(x, _) exists.
let instance: Instance = vec![
Atom::new("A", vec![Term::constant("x")]),
Atom::new("Q", vec![Term::constant("x"), Term::constant("anything")]),
]
.into_iter()
.collect();
let mut subst = Substitution::new();
subst.bind("X".into(), Term::constant("x"));
// Y is NOT bound
let negated = vec![Atom::new("Q", vec![Term::var("X"), Term::var("Y")])];
let result = filter_negated(&instance, vec![subst], &negated);
// Should be blocked because Q(x, anything) exists
assert!(result.is_empty());
}
#[test]
fn filter_negated_passes_empty_negated_body() {
let instance = Instance::new();
let subst = Substitution::new();
let result = filter_negated(&instance, vec![subst], &[]);
assert_eq!(result.len(), 1);
}
}

View File

@ -47,6 +47,22 @@ impl Error for StratificationError {}
/// dependencies) upward. Rules without negation all land in stratum 0 when
/// there are no dependency chains through negation.
pub fn stratify(rules: &[Rule]) -> Result<Vec<Vec<usize>>, StratificationError> {
// Reject rules that have negated body atoms but no positive body atoms.
// Such rules cannot anchor to the instance and are not supported by
// standard stratified evaluation.
for rule in rules {
if rule.body.is_empty() && !rule.negated_body.is_empty() {
let preds: Vec<String> = rule
.negated_body
.iter()
.map(|a| a.predicate.clone())
.collect();
return Err(StratificationError {
cycle_predicates: preds,
});
}
}
// Collect all predicates.
let mut all_predicates: HashSet<String> = HashSet::new();
for rule in rules {
@ -219,4 +235,18 @@ mod tests {
let result = stratify(&rules);
assert!(result.is_err());
}
#[test]
fn rejects_rule_with_only_negated_body() {
// NOT A(X) -> B(X) has no positive body atoms.
let rules = vec![
RuleBuilder::new()
.when_not("A", vec![Term::var("X")])
.then("B", vec![Term::var("X")])
.build(),
];
let result = stratify(&rules);
assert!(result.is_err());
}
}

View File

@ -485,4 +485,53 @@ mod tests {
let result = instance.scan("Missing", &schema).unwrap();
assert_eq!(result.rows().len(), 0);
}
#[test]
fn value_from_term_parses_integer_constants() {
assert_eq!(
value_from_term(&Term::constant("42")).unwrap(),
Value::Integer(42)
);
assert_eq!(
value_from_term(&Term::constant("alice")).unwrap(),
Value::text("alice")
);
assert_eq!(value_from_term(&Term::Null(0)).unwrap(), Value::Null);
assert!(value_from_term(&Term::var("X")).is_err());
}
#[test]
fn aggregate_execution_count_and_sum() {
let schema = Schema::new(vec![
Field::new("dept", DataType::Text, false),
Field::new("salary", DataType::Integer, false),
]);
let rows = vec![
Row::new(vec![Value::text("eng"), Value::Integer(100)]),
Row::new(vec![Value::text("eng"), Value::Integer(200)]),
Row::new(vec![Value::text("sales"), Value::Integer(50)]),
];
let aggregates = vec![
PlanAggregateExpr {
name: "__agg_0".into(),
func: AggregateFunc::Count,
arg: None,
},
PlanAggregateExpr {
name: "__agg_1".into(),
func: AggregateFunc::Sum,
arg: Some("salary".into()),
},
];
let result = compute_aggregate(&rows, &schema, &["dept".into()], &aggregates).unwrap();
assert_eq!(result.len(), 2);
// Each row: [dept, count, sum]
let eng = result
.iter()
.find(|r| r.values()[0] == Value::text("eng"))
.unwrap();
assert_eq!(eng.values()[1], Value::Integer(2));
assert_eq!(eng.values()[2], Value::Integer(300));
}
}

View File

@ -118,7 +118,7 @@ pub fn highlight_line(line: &str) -> Vec<HighlightToken> {
continue;
}
// Variable (?X) must be checked before the single-char operator set
// Variable (?X): must be checked before the single-char operator set
// since `?` also appears there for standalone query terminators.
if ch == '?' && i + 1 < len && (chars[i + 1].is_alphanumeric() || chars[i + 1] == '_') {
let start = i;

View File

@ -598,4 +598,19 @@ mod tests {
let error = parse_script("help\nbogus\nrun.").unwrap_err();
assert!(error.contains("line 2"));
}
#[test]
fn parse_rule_with_negation() {
let command = parse_command("rule Node(?X), NOT Connected(?X) -> Isolated(?X).").unwrap();
match command {
Command::Rule(rule) => {
assert_eq!(rule.body.len(), 1);
assert_eq!(rule.body[0].predicate, "Node");
assert_eq!(rule.negated_body.len(), 1);
assert_eq!(rule.negated_body[0].predicate, "Connected");
assert_eq!(rule.head.len(), 1);
}
other => panic!("unexpected command: {:?}", other),
}
}
}