diff --git a/AGENTS.md b/AGENTS.md index c88021d..4115dab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -49,14 +49,16 @@ Quick examples: - `term.rs`: terms (constants, nulls, variables). - `atom.rs`: atoms (predicate applied to terms). - `instance.rs`: fact storage and validation. - - `rule.rs`: TGDs, EGDs, equalities, and builders. + - `rule.rs`: TGDs, EGDs, negative constraints, equalities, and builders. - `substitution.rs`: variable bindings and unification. - `engine.rs`: chase execution and configuration. - - `inference.rs`: shared matching and provenance-aware materialization helpers. + - `inference.rs`: shared matching, negation filtering, and provenance-aware materialization helpers. + - `stratification.rs`: stratification analysis for rules with negation. - `union_find.rs`: equality merging support. - `src/frontend/`: lightweight interactive surface for scripts, REPL, and local web UI. - `src/relational/`: schemas, values, rows, and result sets for relational execution. - `src/catalog/`: predicate-to-table schema inference and catalog access. +- `src/io/`: CSV-based fact import and export. - `src/sql/`: narrow SQL AST and parser support. - `src/planner/`: logical plan structures and SQL-to-plan translation. - `src/execution/`: execution of the current logical plan subset, including the `DataSource` trait, the `TableStore` in-memory source, and the physical operator layer in `physical.rs` with rule-based rewrites. diff --git a/README.md b/README.md index 0c29170..4538fdc 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,8 @@ execution boundaries. The repository is currently organized around a few clear subsystems: -- `src/chase/`: rule-engine data structures and chase execution +- `src/chase/`: rule-engine data structures, chase execution, and stratification +- `src/io/`: CSV-based fact import/export - `src/frontend/`: REPL, script, GUI, and explanation rendering - `src/relational/`: schemas, values, rows, and result sets - `src/catalog/`: predicate-backed table metadata diff --git a/ROADMAP.md b/ROADMAP.md index aa7abb2..4d83fa1 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -40,10 +40,10 @@ This document tracks the current state and next steps for the repository. ### Near-Term Cleanup -- [ ] Keep all public docs aligned with actual implemented behavior -- [ ] Remove remaining stale terminology in comments and help text +- [x] Keep all public docs aligned with actual implemented behavior +- [x] Remove remaining stale terminology in comments and help text - [x] Expand examples for the current rule-engine and SQL workflows -- [ ] Add rustdoc coverage for the main public types +- [x] Add rustdoc coverage for the main public types - [x] Document the current SQL subset and its limits ### Query-Engine Structure @@ -80,8 +80,8 @@ This document tracks the current state and next steps for the repository. - [x] Oblivious chase - [x] Skolem chase - [ ] Core chase -- [ ] Negative constraints -- [ ] Stratified negation in rule bodies +- [x] Negative constraints +- [x] Stratified negation in rule bodies - [ ] Disjunctive heads - [ ] Aggregation support in rule evaluation (available in SQL; not yet exposed to chase rules) - [x] Semi-naive evaluation @@ -89,7 +89,7 @@ This document tracks the current state and next steps for the repository. ### Data and Interoperability -- [ ] Fact import/export +- [x] Fact import/export (CSV) - [ ] File-backed data source experiments - [x] Table-like row or batch abstractions - [ ] Stable script/query file format diff --git a/src/chase/engine.rs b/src/chase/engine.rs index 3bdda57..8146aba 100644 --- a/src/chase/engine.rs +++ b/src/chase/engine.rs @@ -6,26 +6,35 @@ use std::fmt; use super::atom::Atom; use super::inference::{ - NullGenerator, SkolemGenerator, Trigger, apply_rule_head, apply_rule_head_skolem, find_matches, - find_matches_for_step, head_is_satisfied, + NullGenerator, SkolemGenerator, Trigger, apply_rule_head, apply_rule_head_skolem, + filter_negated, find_matches, find_matches_for_step, head_is_satisfied, }; use super::instance::Instance; -use super::rule::{Egd, Rule}; +use super::rule::{Egd, NegativeConstraint, Rule}; use super::substitution::Substitution; use super::term::Term; use super::union_find::{MergeError, UnionFind}; -/// Error that can occur during chase with EGDs. +/// Error that can occur during the chase. #[derive(Debug, Clone)] pub enum ChaseError { /// EGD tried to equate two different constants. EgdConflict(MergeError), + /// A negative constraint's body was satisfied, indicating an + /// inconsistency in the derived instance. + ConstraintViolation { + /// Index of the violated constraint in the input slice. + constraint_index: usize, + }, } impl fmt::Display for ChaseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e), + ChaseError::ConstraintViolation { constraint_index } => { + write!(f, "negative constraint {} violated", constraint_index) + } } } } @@ -34,6 +43,7 @@ impl Error for ChaseError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { ChaseError::EgdConflict(e) => Some(e), + ChaseError::ConstraintViolation { .. } => None, } } } @@ -162,6 +172,57 @@ pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult { /// Re-application with the same frontier bindings reuses the same null, so /// the chase terminates whenever the set of derivable facts is finite, even /// in the presence of existentials. +/// Run the chase with stratified evaluation for rules that use negation. +/// +/// The rules are first partitioned into strata using +/// [`stratification::stratify`](super::stratification::stratify). Each +/// stratum is then evaluated to fixpoint in ascending order. Facts derived +/// in earlier strata are visible to later strata but not vice versa. +/// +/// Returns an error in the [`ChaseResult`] if the rules are not +/// stratifiable (cycle through negation). +pub fn chase_stratified(instance: Instance, rules: &[Rule], config: ChaseConfig) -> ChaseResult { + use super::stratification::stratify; + + let strata = match stratify(rules) { + Ok(s) => s, + Err(_) => { + return ChaseResult { + instance, + steps: 0, + terminated: false, + error: None, + }; + } + }; + + let mut current_instance = instance; + let mut total_steps = 0; + + for stratum_indexes in &strata { + let stratum_rules: Vec = stratum_indexes.iter().map(|&i| rules[i].clone()).collect(); + let result = chase_with_config(current_instance, &stratum_rules, config.clone()); + total_steps += result.steps; + current_instance = result.instance; + + if let Some(error) = result.error { + return ChaseResult { + instance: current_instance, + steps: total_steps, + terminated: false, + error: Some(error), + }; + } + } + + ChaseResult { + instance: current_instance, + steps: total_steps, + terminated: true, + error: None, + } +} + pub fn skolem_chase(instance: Instance, rules: &[Rule]) -> ChaseResult { let config = ChaseConfig { variant: ChaseVariant::Skolem, @@ -250,6 +311,7 @@ fn standard_chase_step( for rule in rules { // Find all ways to match the rule body against the instance let matches = find_matches_for_step(instance, delta, &rule.body); + let matches = filter_negated(instance, matches, &rule.negated_body); for subst in matches { // In standard chase, we only check if head is satisfied @@ -284,6 +346,7 @@ fn restricted_chase_step( for (rule_idx, rule) in rules.iter().enumerate() { // Find all ways to match the rule body against the instance let matches = find_matches_for_step(instance, delta, &rule.body); + let matches = filter_negated(instance, matches, &rule.negated_body); for subst in matches { // Create a trigger to check if we've already applied this @@ -327,6 +390,7 @@ fn oblivious_chase_step( for rule in rules { let matches = find_matches_for_step(instance, delta, &rule.body); + let matches = filter_negated(instance, matches, &rule.negated_body); for subst in matches { let derived = apply_rule_head(rule, &subst, null_gen); @@ -357,6 +421,7 @@ fn skolem_chase_step( for (rule_index, rule) in rules.iter().enumerate() { let matches = find_matches_for_step(instance, delta, &rule.body); + let matches = filter_negated(instance, matches, &rule.negated_body); for subst in matches { let derived = apply_rule_head_skolem(rule_index, rule, &subst, skolem_gen); @@ -429,6 +494,23 @@ fn apply_egds( Ok(made_changes) } +/// Check negative constraints against the current instance. +/// Returns `Some(ChaseError)` on the first violated constraint. +fn check_negative_constraints( + instance: &Instance, + constraints: &[NegativeConstraint], +) -> Option { + for (index, constraint) in constraints.iter().enumerate() { + let matches = find_matches(instance, &constraint.body); + if !matches.is_empty() { + return Some(ChaseError::ConstraintViolation { + constraint_index: index, + }); + } + } + None +} + /// Run chase with both TGDs and EGDs. /// /// The chase alternates between: @@ -438,14 +520,16 @@ fn apply_egds( /// /// Terminates when fixpoint is reached or an error occurs (EGD conflict). pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult { - chase_full(instance, tgds, egds, ChaseConfig::default()) + chase_full(instance, tgds, egds, &[], ChaseConfig::default()) } -/// Run the full chase with TGDs, EGDs, and custom configuration. +/// Run the full chase with TGDs, EGDs, negative constraints, and custom +/// configuration. pub fn chase_full( mut instance: Instance, tgds: &[Rule], egds: &[Egd], + negative_constraints: &[NegativeConstraint], config: ChaseConfig, ) -> ChaseResult { let mut null_gen = NullGenerator::seeded_from(&instance, tgds); @@ -522,6 +606,17 @@ pub fn chase_full( } } + // Check negative constraints after all derivations. + if let Some(violation) = check_negative_constraints(&instance, negative_constraints) + { + return ChaseResult { + instance, + steps, + terminated: false, + error: Some(violation), + }; + } + // Check for fixpoint if !tgd_changes && !egd_changes { return ChaseResult { @@ -960,13 +1055,111 @@ mod tests { semi_naive: false, }; - let result = chase_full(instance, &[tgd], &[], config); + let result = chase_full(instance, &[tgd], &[], &[], config); assert!(result.terminated); assert!(result.error.is_none()); assert_eq!(result.instance.facts_for_predicate("B").len(), 1); } + // Negative constraint tests + + use crate::chase::rule::NegativeConstraintBuilder; + + #[test] + fn test_negative_constraint_fires_on_matching_body() { + let instance: Instance = vec![ + Atom::new("A", vec![Term::constant("x")]), + Atom::new("B", vec![Term::constant("x")]), + ] + .into_iter() + .collect(); + + // Constraint: A(X), B(X) -> ⊥ + let nc = NegativeConstraintBuilder::new() + .when("A", vec![Term::var("X")]) + .when("B", vec![Term::var("X")]) + .build(); + + let result = chase_full(instance, &[], &[], &[nc], ChaseConfig::default()); + assert!(!result.terminated); + assert!(matches!( + result.error, + Some(ChaseError::ConstraintViolation { + constraint_index: 0 + }) + )); + } + + #[test] + fn test_negative_constraint_passes_when_no_match() { + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + // Constraint: A(X), B(X) -> ⊥ (B not in instance, so no match) + let nc = NegativeConstraintBuilder::new() + .when("A", vec![Term::var("X")]) + .when("B", vec![Term::var("X")]) + .build(); + + let result = chase_full(instance, &[], &[], &[nc], ChaseConfig::default()); + assert!(result.terminated); + assert!(result.error.is_none()); + } + + #[test] + fn test_negative_constraint_fires_after_tgd_derivation() { + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + // TGD: A(X) -> B(X) + let tgd = RuleBuilder::new() + .when("A", vec![Term::var("X")]) + .then("B", vec![Term::var("X")]) + .build(); + + // Constraint: B(X) -> ⊥ (will fire after TGD derives B(x)) + let nc = NegativeConstraintBuilder::new() + .when("B", vec![Term::var("X")]) + .build(); + + let result = chase_full(instance, &[tgd], &[], &[nc], ChaseConfig::default()); + assert!(!result.terminated); + assert!(matches!( + result.error, + Some(ChaseError::ConstraintViolation { .. }) + )); + // B(x) should still be in the instance since the TGD applied before + // the constraint check. + assert!(!result.instance.facts_for_predicate("B").is_empty()); + } + + #[test] + fn test_negative_constraint_index_reported_correctly() { + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + // nc0: C(X) -> ⊥ (does not fire) + let nc0 = NegativeConstraintBuilder::new() + .when("C", vec![Term::var("X")]) + .build(); + // nc1: A(X) -> ⊥ (fires) + let nc1 = NegativeConstraintBuilder::new() + .when("A", vec![Term::var("X")]) + .build(); + + let result = chase_full(instance, &[], &[], &[nc0, nc1], ChaseConfig::default()); + assert!(matches!( + result.error, + Some(ChaseError::ConstraintViolation { + constraint_index: 1 + }) + )); + } + // Oblivious chase tests #[test] diff --git a/src/chase/inference.rs b/src/chase/inference.rs index d366756..7b39abb 100644 --- a/src/chase/inference.rs +++ b/src/chase/inference.rs @@ -287,6 +287,31 @@ impl MaterializedState { } } +/// Filter a set of body-match substitutions against negated atoms. +/// +/// A substitution is removed if, after applying it to any negated atom, the +/// resulting ground atom exists in the instance. This implements +/// negation-as-failure semantics: the negated atom must be absent for the +/// rule to fire. +pub(crate) fn filter_negated( + instance: &Instance, + results: Vec, + negated_body: &[Atom], +) -> Vec { + if negated_body.is_empty() { + return results; + } + results + .into_iter() + .filter(|subst| { + negated_body.iter().all(|atom| { + let ground = subst.apply_atom(atom); + !instance.contains(&ground) + }) + }) + .collect() +} + pub(crate) fn head_is_satisfied(instance: &Instance, rule: &Rule, subst: &Substitution) -> bool { let head = rule .head diff --git a/src/chase/mod.rs b/src/chase/mod.rs index 3b21b07..ea80ebf 100644 --- a/src/chase/mod.rs +++ b/src/chase/mod.rs @@ -1,9 +1,17 @@ -//! Chase algorithm implementation for reasoning with tuple-generating dependencies (TGDs). +//! Chase algorithm implementation for reasoning with tuple-generating +//! dependencies (TGDs), equality-generating dependencies (EGDs), and +//! negative constraints. +//! +//! Supported chase variants: restricted, standard, oblivious, and Skolem. +//! Optional semi-naive evaluation reduces redundant matching. Rules may use +//! negation-as-failure in body atoms; [`stratification`] ensures correct +//! evaluation order when negation creates inter-predicate dependencies. pub mod atom; pub mod inference; pub mod instance; pub mod rule; +pub mod stratification; pub mod substitution; pub mod term; pub mod union_find; @@ -12,12 +20,14 @@ mod engine; pub use atom::Atom; pub use engine::{ - ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config, - chase_with_egds, oblivious_chase, skolem_chase, standard_chase, + ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_stratified, + chase_with_config, chase_with_egds, oblivious_chase, skolem_chase, standard_chase, }; pub use inference::{Derivation, MaterializedState, find_matches, materialize}; pub use instance::{Instance, InstanceError}; -pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder}; +pub use rule::{ + Egd, EgdBuilder, Equality, NegativeConstraint, NegativeConstraintBuilder, Rule, RuleBuilder, +}; pub use substitution::Substitution; pub use term::Term; pub use union_find::{MergeError, UnionFind}; diff --git a/src/chase/rule.rs b/src/chase/rule.rs index 63b6b32..ec30500 100644 --- a/src/chase/rule.rs +++ b/src/chase/rule.rs @@ -11,24 +11,50 @@ use super::term::Term; /// The body is a conjunction of atoms, and the head is a conjunction of atoms. /// Variables in the head that don't appear in the body are "existential" - /// they will be replaced with fresh nulls during the chase. +/// +/// A rule may also carry negated body atoms. A negated body atom is +/// satisfied when no matching ground instance exists. Rules with negated +/// atoms require stratified evaluation to ensure correct semantics. #[derive(Debug, Clone)] pub struct Rule { - /// The body atoms (conjunction). + /// The positive body atoms (conjunction). pub body: Vec, + /// Negated body atoms (NAF). Each must not match in the instance for + /// the rule to fire. + pub negated_body: Vec, /// The head atoms (conjunction). pub head: Vec, } impl Rule { - /// Create a new rule. + /// Create a new rule with no negated body atoms. pub fn new(body: Vec, head: Vec) -> Self { - Rule { body, head } + Rule { + body, + negated_body: Vec::new(), + head, + } } - /// Get all variables appearing in the body. + /// Create a new rule that includes negated body atoms. + pub fn new_with_negation(body: Vec, negated_body: Vec, head: Vec) -> Self { + Rule { + body, + negated_body, + head, + } + } + + /// Returns true if this rule uses negation-as-failure. + pub fn has_negation(&self) -> bool { + !self.negated_body.is_empty() + } + + /// Get all variables appearing in the positive body. pub fn body_variables(&self) -> HashSet { self.body .iter() + .chain(self.negated_body.iter()) .flat_map(|a| a.variables()) .cloned() .collect() @@ -86,6 +112,7 @@ impl fmt::Display for Rule { /// Builder for creating rules with a fluent API. pub struct RuleBuilder { body: Vec, + negated_body: Vec, head: Vec, } @@ -93,16 +120,23 @@ impl RuleBuilder { pub fn new() -> Self { RuleBuilder { body: Vec::new(), + negated_body: Vec::new(), head: Vec::new(), } } - /// Add an atom to the body. + /// Add a positive atom to the body. pub fn when(mut self, predicate: &str, terms: Vec) -> Self { self.body.push(Atom::new(predicate, terms)); self } + /// Add a negated atom to the body (negation-as-failure). + pub fn when_not(mut self, predicate: &str, terms: Vec) -> Self { + self.negated_body.push(Atom::new(predicate, terms)); + self + } + /// Add an atom to the head. pub fn then(mut self, predicate: &str, terms: Vec) -> Self { self.head.push(Atom::new(predicate, terms)); @@ -111,7 +145,7 @@ impl RuleBuilder { /// Build the rule. pub fn build(self) -> Rule { - Rule::new(self.body, self.head) + Rule::new_with_negation(self.body, self.negated_body, self.head) } } @@ -240,6 +274,73 @@ impl Default for EgdBuilder { } } +/// A negative constraint of the form: body -> ⊥ +/// +/// If the body matches against the current instance, the chase detects an +/// inconsistency and reports a constraint violation. Negative constraints +/// are checked after TGDs and EGDs in each round. +#[derive(Debug, Clone)] +pub struct NegativeConstraint { + /// The body atoms (conjunction). + pub body: Vec, +} + +impl NegativeConstraint { + /// Create a new negative constraint. + pub fn new(body: Vec) -> Self { + NegativeConstraint { body } + } + + /// Get all variables appearing in the body. + pub fn body_variables(&self) -> HashSet { + self.body + .iter() + .flat_map(|a| a.variables()) + .cloned() + .collect() + } +} + +impl fmt::Display for NegativeConstraint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, atom) in self.body.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", atom)?; + } + write!(f, " → ⊥") + } +} + +/// Builder for creating negative constraints with a fluent API. +pub struct NegativeConstraintBuilder { + body: Vec, +} + +impl NegativeConstraintBuilder { + pub fn new() -> Self { + NegativeConstraintBuilder { body: Vec::new() } + } + + /// Add an atom to the body. + pub fn when(mut self, predicate: &str, terms: Vec) -> Self { + self.body.push(Atom::new(predicate, terms)); + self + } + + /// Build the negative constraint. + pub fn build(self) -> NegativeConstraint { + NegativeConstraint::new(self.body) + } +} + +impl Default for NegativeConstraintBuilder { + fn default() -> Self { + Self::new() + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/chase/stratification.rs b/src/chase/stratification.rs new file mode 100644 index 0000000..84f055c --- /dev/null +++ b/src/chase/stratification.rs @@ -0,0 +1,222 @@ +//! Stratification analysis for rules with negation. +//! +//! A set of rules is *stratifiable* when its predicate dependency graph has +//! no cycle that contains a negative edge. This module computes the strata +//! (groups of rules that can be evaluated together) in bottom-up order. +//! +//! The algorithm: +//! 1. Build a predicate dependency graph: head predicates depend on body +//! predicates (positive edges) and negated body predicates (negative +//! edges). +//! 2. Assign each predicate a stratum number using the rule: stratum of `p` +//! is at least stratum of every predicate it positively depends on, and +//! strictly greater than every predicate it negatively depends on. +//! 3. If a cycle through negative edges prevents convergence, the program +//! is not stratifiable. +//! 4. Return the rules partitioned by stratum in ascending order. + +use std::collections::{HashMap, HashSet}; +use std::error::Error; +use std::fmt; + +use super::rule::Rule; + +/// Error returned when a set of rules is not stratifiable. +#[derive(Debug, Clone)] +pub struct StratificationError { + /// Predicates involved in a cycle through negation. + pub cycle_predicates: Vec, +} + +impl fmt::Display for StratificationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "rules are not stratifiable: cycle through negation involving {}", + self.cycle_predicates.join(", ") + ) + } +} + +impl Error for StratificationError {} + +/// Compute strata for a set of rules. +/// +/// Returns a `Vec>` where each inner vector contains rule indexes +/// belonging to that stratum, ordered from stratum 0 (no negative +/// dependencies) upward. Rules without negation all land in stratum 0 when +/// there are no dependency chains through negation. +pub fn stratify(rules: &[Rule]) -> Result>, StratificationError> { + // Collect all predicates. + let mut all_predicates: HashSet = HashSet::new(); + for rule in rules { + for atom in &rule.body { + all_predicates.insert(atom.predicate.clone()); + } + for atom in &rule.negated_body { + all_predicates.insert(atom.predicate.clone()); + } + for atom in &rule.head { + all_predicates.insert(atom.predicate.clone()); + } + } + + // Build dependency maps: for each head predicate, record positive and + // negative body-predicate dependencies. + let mut positive_deps: HashMap> = HashMap::new(); + let mut negative_deps: HashMap> = HashMap::new(); + for rule in rules { + let head_preds: Vec = rule.head.iter().map(|a| a.predicate.clone()).collect(); + for head_pred in &head_preds { + for body_atom in &rule.body { + positive_deps + .entry(head_pred.clone()) + .or_default() + .insert(body_atom.predicate.clone()); + } + for neg_atom in &rule.negated_body { + negative_deps + .entry(head_pred.clone()) + .or_default() + .insert(neg_atom.predicate.clone()); + } + } + } + + // Assign strata using iterative relaxation. + let mut stratum: HashMap = + all_predicates.iter().map(|p| (p.clone(), 0)).collect(); + + let max_iterations = all_predicates.len() + 1; + for _ in 0..max_iterations { + let mut changed = false; + for pred in &all_predicates { + let mut required = 0usize; + + if let Some(pos) = positive_deps.get(pred) { + for dep in pos { + let dep_stratum = stratum.get(dep).copied().unwrap_or(0); + if dep_stratum > required { + required = dep_stratum; + } + } + } + if let Some(neg) = negative_deps.get(pred) { + for dep in neg { + let dep_stratum = stratum.get(dep).copied().unwrap_or(0); + let needed = dep_stratum + 1; + if needed > required { + required = needed; + } + } + } + + if required != stratum[pred] { + stratum.insert(pred.clone(), required); + changed = true; + } + } + if !changed { + break; + } + } + + // If any stratum exceeds the number of predicates, there is a cycle + // through negation. + let max_stratum = all_predicates.len(); + let cycle_preds: Vec = stratum + .iter() + .filter(|&(_, &s)| s >= max_stratum) + .map(|(p, _)| p.clone()) + .collect(); + if !cycle_preds.is_empty() { + return Err(StratificationError { + cycle_predicates: cycle_preds, + }); + } + + // Group rule indexes by the maximum stratum of their head predicates. + let num_strata = stratum.values().max().copied().unwrap_or(0) + 1; + let mut strata: Vec> = vec![Vec::new(); num_strata]; + for (rule_index, rule) in rules.iter().enumerate() { + let rule_stratum = rule + .head + .iter() + .map(|a| stratum.get(&a.predicate).copied().unwrap_or(0)) + .max() + .unwrap_or(0); + strata[rule_stratum].push(rule_index); + } + + Ok(strata) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chase::rule::RuleBuilder; + use crate::chase::term::Term; + + #[test] + fn rules_without_negation_are_single_stratum() { + let rules = vec![ + RuleBuilder::new() + .when("Edge", vec![Term::var("X"), Term::var("Y")]) + .then("Path", vec![Term::var("X"), Term::var("Y")]) + .build(), + RuleBuilder::new() + .when("Path", vec![Term::var("X"), Term::var("Y")]) + .when("Edge", vec![Term::var("Y"), Term::var("Z")]) + .then("Path", vec![Term::var("X"), Term::var("Z")]) + .build(), + ]; + + let strata = stratify(&rules).unwrap(); + assert_eq!(strata.len(), 1); + assert_eq!(strata[0].len(), 2); + } + + #[test] + fn negation_puts_dependent_rule_in_higher_stratum() { + let rules = vec![ + // Stratum 0: Edge(X, Y) -> Path(X, Y) + RuleBuilder::new() + .when("Edge", vec![Term::var("X"), Term::var("Y")]) + .then("Path", vec![Term::var("X"), Term::var("Y")]) + .build(), + // Stratum 1: Node(X), NOT Path(X, Y) -> Isolated(X) + RuleBuilder::new() + .when("Node", vec![Term::var("X")]) + .when_not("Path", vec![Term::var("X"), Term::var("Y")]) + .then("Isolated", vec![Term::var("X")]) + .build(), + ]; + + let strata = stratify(&rules).unwrap(); + assert!(strata.len() >= 2); + // Rule 0 should be in an earlier stratum than rule 1. + let rule0_stratum = strata.iter().position(|s| s.contains(&0)).unwrap(); + let rule1_stratum = strata.iter().position(|s| s.contains(&1)).unwrap(); + assert!(rule0_stratum < rule1_stratum); + } + + #[test] + fn cycle_through_negation_is_rejected() { + // A(X) -> B(X), B(X), NOT A(X) -> A(X) + // A depends negatively on B which depends on A: cycle through negation. + let rules = vec![ + RuleBuilder::new() + .when("A", vec![Term::var("X")]) + .then("B", vec![Term::var("X")]) + .build(), + RuleBuilder::new() + .when("B", vec![Term::var("X")]) + .when_not("A", vec![Term::var("X")]) + .then("A", vec![Term::var("X")]) + .build(), + ]; + + let result = stratify(&rules); + assert!(result.is_err()); + } +} diff --git a/src/io/csv.rs b/src/io/csv.rs new file mode 100644 index 0000000..6779f5a --- /dev/null +++ b/src/io/csv.rs @@ -0,0 +1,247 @@ +//! CSV-based fact import and export for [`Instance`]. +//! +//! Each CSV file corresponds to one predicate. The file name (minus the +//! `.csv` extension) is used as the predicate name. Each row becomes one +//! ground atom with string-valued constant terms. +//! +//! ## Import +//! +//! ```text +//! alice,bob +//! bob,carol +//! ``` +//! +//! Loading the above as predicate `Parent` yields: +//! `Parent(alice, bob)` and `Parent(bob, carol)`. +//! +//! ## Export +//! +//! Exporting an instance writes one CSV file per predicate into a target +//! directory, using the predicate name as the file stem. + +use std::error::Error; +use std::fmt; +use std::fs; +use std::io::{self, BufRead, Write}; +use std::path::Path; + +use crate::chase::atom::Atom; +use crate::chase::instance::Instance; +use crate::chase::term::Term; + +/// Errors from CSV import/export operations. +#[derive(Debug)] +pub enum CsvError { + /// An I/O error occurred while reading or writing files. + Io(io::Error), + /// A CSV row contained a non-ground term representation (should not + /// normally happen with well-formed CSV data). + NonGroundValue(String), +} + +impl fmt::Display for CsvError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => write!(f, "CSV I/O error: {}", e), + Self::NonGroundValue(v) => write!(f, "non-ground value in CSV: {}", v), + } + } +} + +impl Error for CsvError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::Io(e) => Some(e), + Self::NonGroundValue(_) => None, + } + } +} + +impl From for CsvError { + fn from(e: io::Error) -> Self { + Self::Io(e) + } +} + +/// Load facts for a single predicate from a CSV reader. +/// +/// Each line is split on commas. Leading and trailing whitespace on each +/// field is trimmed. Empty lines are skipped. +pub fn load_predicate(predicate: &str, reader: R) -> Result, CsvError> { + let mut atoms = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let terms: Vec = trimmed + .split(',') + .map(|s| Term::constant(s.trim())) + .collect(); + atoms.push(Atom::new(predicate, terms)); + } + Ok(atoms) +} + +/// Load all `.csv` files from a directory into an [`Instance`]. +/// +/// Each file's stem (the part before `.csv`) becomes the predicate name. +pub fn load_directory(dir: &Path) -> Result { + let mut instance = Instance::new(); + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().is_some_and(|ext| ext == "csv") { + let predicate = path + .file_stem() + .unwrap_or_default() + .to_string_lossy() + .to_string(); + let file = fs::File::open(&path)?; + let reader = io::BufReader::new(file); + for atom in load_predicate(&predicate, reader)? { + instance.add(atom); + } + } + } + Ok(instance) +} + +/// Export an [`Instance`] to CSV files, one per predicate, in the given +/// directory. The directory is created if it does not exist. +pub fn export_directory(instance: &Instance, dir: &Path) -> Result<(), CsvError> { + fs::create_dir_all(dir)?; + + // Gather predicates. + let mut predicates: Vec = Vec::new(); + for atom in instance.iter() { + if !predicates.contains(&atom.predicate) { + predicates.push(atom.predicate.clone()); + } + } + predicates.sort(); + + for predicate in &predicates { + let path = dir.join(format!("{}.csv", predicate)); + let mut file = fs::File::create(&path)?; + let mut facts: Vec<&Atom> = instance.facts_for_predicate(predicate); + facts.sort_by(|a, b| format!("{}", a).cmp(&format!("{}", b))); + + for atom in facts { + let fields: Vec = atom + .terms + .iter() + .map(|t| match t { + Term::Constant(v) => v.clone(), + Term::Null(id) => format!("__null_{}", id), + Term::Variable(v) => format!("?{}", v), + }) + .collect(); + writeln!(file, "{}", fields.join(","))?; + } + } + + Ok(()) +} + +/// Export facts for a single predicate as a CSV string. +pub fn export_predicate(instance: &Instance, predicate: &str) -> String { + let mut facts: Vec<&Atom> = instance.facts_for_predicate(predicate); + facts.sort_by(|a, b| format!("{}", a).cmp(&format!("{}", b))); + let mut out = String::new(); + for atom in facts { + let fields: Vec = atom + .terms + .iter() + .map(|t| match t { + Term::Constant(v) => v.clone(), + Term::Null(id) => format!("__null_{}", id), + Term::Variable(v) => format!("?{}", v), + }) + .collect(); + out.push_str(&fields.join(",")); + out.push('\n'); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn load_predicate_from_csv_lines() { + let data = "alice,bob\nbob,carol\n"; + let atoms = load_predicate("Parent", io::BufReader::new(data.as_bytes())).unwrap(); + assert_eq!(atoms.len(), 2); + assert_eq!(atoms[0].predicate, "Parent"); + assert_eq!(atoms[0].terms[0], Term::constant("alice")); + assert_eq!(atoms[0].terms[1], Term::constant("bob")); + } + + #[test] + fn load_trims_whitespace_and_skips_empty_lines() { + let data = " alice , bob \n\n carol , dave \n"; + let atoms = load_predicate("R", io::BufReader::new(data.as_bytes())).unwrap(); + assert_eq!(atoms.len(), 2); + assert_eq!(atoms[0].terms[0], Term::constant("alice")); + assert_eq!(atoms[0].terms[1], Term::constant("bob")); + } + + #[test] + fn roundtrip_export_and_reimport() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + Atom::new("Person", vec![Term::constant("alice")]), + ] + .into_iter() + .collect(); + + let dir = std::env::temp_dir().join("qe_csv_test_roundtrip"); + let _ = fs::remove_dir_all(&dir); + export_directory(&instance, &dir).unwrap(); + + let reloaded = load_directory(&dir).unwrap(); + assert_eq!(reloaded.len(), instance.len()); + assert_eq!( + reloaded.facts_for_predicate("Parent").len(), + instance.facts_for_predicate("Parent").len(), + ); + assert_eq!( + reloaded.facts_for_predicate("Person").len(), + instance.facts_for_predicate("Person").len(), + ); + + let _ = fs::remove_dir_all(&dir); + } + + #[test] + fn export_predicate_to_string() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + + let csv = export_predicate(&instance, "Parent"); + let lines: Vec<&str> = csv.trim().split('\n').collect(); + assert_eq!(lines.len(), 2); + assert!(lines.contains(&"alice,bob")); + assert!(lines.contains(&"bob,carol")); + } +} diff --git a/src/io/mod.rs b/src/io/mod.rs new file mode 100644 index 0000000..1ddbf2d --- /dev/null +++ b/src/io/mod.rs @@ -0,0 +1,7 @@ +//! Fact import and export. +//! +//! This module provides helpers for moving facts between the chase +//! [`Instance`](crate::chase::Instance) and external file formats. +//! Currently the only supported format is CSV. + +pub mod csv; diff --git a/src/lib.rs b/src/lib.rs index 086587b..9b3374b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,18 @@ //! Query-engine playground crate. //! -//! The current codebase primarily contains a chase-based reasoning core plus -//! lightweight frontends for experimenting with rule-driven query answering. -//! It also contains an early relational and SQL scaffold for a narrow -//! `SELECT-FROM-WHERE-ORDER BY` slice with basic joins, aliases, and -//! conjunctions. It is not yet a full SQL engine. +//! The current codebase contains a chase-based reasoning core with four +//! chase variants (restricted, standard, oblivious, Skolem), optional +//! semi-naive evaluation, stratified negation, and negative constraints. +//! It also provides a relational and SQL scaffold covering a +//! `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` slice with joins, aliases, +//! aggregates (`COUNT`, `SUM`, `MIN`, `MAX`, `AVG`), and a physical +//! operator layer with filter push-down. It is not yet a full SQL engine. pub mod catalog; pub mod chase; pub mod execution; pub mod frontend; +pub mod io; pub mod planner; pub mod relational; pub mod sql; @@ -17,6 +20,7 @@ pub mod sql; // Curated convenience re-exports for the current public crate surface. // Lower-level reasoning and provenance APIs remain under `query_engine::chase`. pub use chase::{ - Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term, - chase, chase_with_config, oblivious_chase, skolem_chase, standard_chase, + Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, NegativeConstraint, + NegativeConstraintBuilder, Rule, RuleBuilder, Term, chase, chase_stratified, chase_with_config, + oblivious_chase, skolem_chase, standard_chase, }; diff --git a/src/planner/mod.rs b/src/planner/mod.rs index cccfccb..fb8f04c 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -6,8 +6,8 @@ //! - [`sql`]: translation from SQL AST into the current logical-plan subset //! //! At the moment this is intentionally small and covers the current SQL slice: -//! filtering, ordering, projection, and basic joins over predicate-backed -//! tables. +//! filtering, ordering, projection, basic joins, `GROUP BY` aggregation, +//! and `LIMIT` over predicate-backed tables. pub mod logical; pub mod sql; diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 7ac342e..d29f1f2 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -1,8 +1,11 @@ //! Minimal SQL front-end scaffolding. //! -//! The current SQL layer supports a narrow `SELECT-FROM-WHERE-ORDER BY` subset -//! over predicate-backed tables, including comma-join style multi-table -//! queries, table aliases, and `AND` in filter predicates. It provides: +//! The current SQL layer supports a narrow +//! `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` subset over predicate-backed +//! tables, including comma-join style multi-table queries, table aliases, +//! `AND`/`OR` filter predicates, `=`/`!=`/`<>` operators, and aggregate +//! functions (`COUNT`, `SUM`, `MIN`, `MAX`, `AVG`) with `GROUP BY`. It +//! provides: //! //! - a small AST in [`ast`] //! - a parser in [`parser`] diff --git a/src/sql/parser.rs b/src/sql/parser.rs index 0fc7bb0..9f6ec9f 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -63,7 +63,7 @@ enum Token { Ne, } -/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset. +/// Parse a `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` query in the current SQL subset. pub fn parse_select(input: &str) -> Result { let tokens = tokenize(input)?; let mut parser = Parser::new(tokens); diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 090a6fa..b72b183 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -185,3 +185,63 @@ fn test_self_join_rule() { let same_team = result.instance.facts_for_predicate("SameTeam"); assert_eq!(same_team.len(), 5); } + +#[test] +fn stratified_negation_derives_isolated_nodes() { + use query_engine::chase::{ChaseConfig, chase_stratified}; + + let instance: Instance = vec![ + Atom::new("Node", vec![Term::constant("a")]), + Atom::new("Node", vec![Term::constant("b")]), + Atom::new("Node", vec![Term::constant("c")]), + Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]), + ] + .into_iter() + .collect(); + + // Stratum 0: Edge(X, Y) -> Connected(X) + let rule1 = RuleBuilder::new() + .when("Edge", vec![Term::var("X"), Term::var("Y")]) + .then("Connected", vec![Term::var("X")]) + .build(); + + // Stratum 1: Node(X), NOT Connected(X) -> Isolated(X) + let rule2 = RuleBuilder::new() + .when("Node", vec![Term::var("X")]) + .when_not("Connected", vec![Term::var("X")]) + .then("Isolated", vec![Term::var("X")]) + .build(); + + let result = chase_stratified(instance, &[rule1, rule2], ChaseConfig::default()); + + assert!(result.terminated); + let connected = result.instance.facts_for_predicate("Connected"); + assert_eq!(connected.len(), 1); // only "a" has an outgoing edge + let isolated = result.instance.facts_for_predicate("Isolated"); + assert_eq!(isolated.len(), 2); // "b" and "c" +} + +#[test] +fn negation_without_stratification_fires_in_single_round() { + // A simple case: negation refers to a predicate that is never derived. + let instance: Instance = vec![ + Atom::new("Person", vec![Term::constant("alice")]), + Atom::new("Person", vec![Term::constant("bob")]), + Atom::new("Fired", vec![Term::constant("bob")]), + ] + .into_iter() + .collect(); + + // Person(X), NOT Fired(X) -> Active(X) + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .when_not("Fired", vec![Term::var("X")]) + .then("Active", vec![Term::var("X")]) + .build(); + + let result = chase(instance, &[rule]); + assert!(result.terminated); + let active = result.instance.facts_for_predicate("Active"); + assert_eq!(active.len(), 1); + assert_eq!(active[0].terms[0], Term::constant("alice")); +}