Add negative constraints, stratified negation, and CSV import/export
This commit is contained in:
parent
5b206bfc23
commit
c3a1c7d9dd
@ -49,14 +49,16 @@ Quick examples:
|
|||||||
- `term.rs`: terms (constants, nulls, variables).
|
- `term.rs`: terms (constants, nulls, variables).
|
||||||
- `atom.rs`: atoms (predicate applied to terms).
|
- `atom.rs`: atoms (predicate applied to terms).
|
||||||
- `instance.rs`: fact storage and validation.
|
- `instance.rs`: fact storage and validation.
|
||||||
- `rule.rs`: TGDs, EGDs, equalities, and builders.
|
- `rule.rs`: TGDs, EGDs, negative constraints, equalities, and builders.
|
||||||
- `substitution.rs`: variable bindings and unification.
|
- `substitution.rs`: variable bindings and unification.
|
||||||
- `engine.rs`: chase execution and configuration.
|
- `engine.rs`: chase execution and configuration.
|
||||||
- `inference.rs`: shared matching and provenance-aware materialization helpers.
|
- `inference.rs`: shared matching, negation filtering, and provenance-aware materialization helpers.
|
||||||
|
- `stratification.rs`: stratification analysis for rules with negation.
|
||||||
- `union_find.rs`: equality merging support.
|
- `union_find.rs`: equality merging support.
|
||||||
- `src/frontend/`: lightweight interactive surface for scripts, REPL, and local web UI.
|
- `src/frontend/`: lightweight interactive surface for scripts, REPL, and local web UI.
|
||||||
- `src/relational/`: schemas, values, rows, and result sets for relational execution.
|
- `src/relational/`: schemas, values, rows, and result sets for relational execution.
|
||||||
- `src/catalog/`: predicate-to-table schema inference and catalog access.
|
- `src/catalog/`: predicate-to-table schema inference and catalog access.
|
||||||
|
- `src/io/`: CSV-based fact import and export.
|
||||||
- `src/sql/`: narrow SQL AST and parser support.
|
- `src/sql/`: narrow SQL AST and parser support.
|
||||||
- `src/planner/`: logical plan structures and SQL-to-plan translation.
|
- `src/planner/`: logical plan structures and SQL-to-plan translation.
|
||||||
- `src/execution/`: execution of the current logical plan subset, including the `DataSource` trait, the `TableStore` in-memory source, and the physical operator layer in `physical.rs` with rule-based rewrites.
|
- `src/execution/`: execution of the current logical plan subset, including the `DataSource` trait, the `TableStore` in-memory source, and the physical operator layer in `physical.rs` with rule-based rewrites.
|
||||||
|
|||||||
@ -23,7 +23,8 @@ execution boundaries.
|
|||||||
|
|
||||||
The repository is currently organized around a few clear subsystems:
|
The repository is currently organized around a few clear subsystems:
|
||||||
|
|
||||||
- `src/chase/`: rule-engine data structures and chase execution
|
- `src/chase/`: rule-engine data structures, chase execution, and stratification
|
||||||
|
- `src/io/`: CSV-based fact import/export
|
||||||
- `src/frontend/`: REPL, script, GUI, and explanation rendering
|
- `src/frontend/`: REPL, script, GUI, and explanation rendering
|
||||||
- `src/relational/`: schemas, values, rows, and result sets
|
- `src/relational/`: schemas, values, rows, and result sets
|
||||||
- `src/catalog/`: predicate-backed table metadata
|
- `src/catalog/`: predicate-backed table metadata
|
||||||
|
|||||||
12
ROADMAP.md
12
ROADMAP.md
@ -40,10 +40,10 @@ This document tracks the current state and next steps for the repository.
|
|||||||
|
|
||||||
### Near-Term Cleanup
|
### Near-Term Cleanup
|
||||||
|
|
||||||
- [ ] Keep all public docs aligned with actual implemented behavior
|
- [x] Keep all public docs aligned with actual implemented behavior
|
||||||
- [ ] Remove remaining stale terminology in comments and help text
|
- [x] Remove remaining stale terminology in comments and help text
|
||||||
- [x] Expand examples for the current rule-engine and SQL workflows
|
- [x] Expand examples for the current rule-engine and SQL workflows
|
||||||
- [ ] Add rustdoc coverage for the main public types
|
- [x] Add rustdoc coverage for the main public types
|
||||||
- [x] Document the current SQL subset and its limits
|
- [x] Document the current SQL subset and its limits
|
||||||
|
|
||||||
### Query-Engine Structure
|
### Query-Engine Structure
|
||||||
@ -80,8 +80,8 @@ This document tracks the current state and next steps for the repository.
|
|||||||
- [x] Oblivious chase
|
- [x] Oblivious chase
|
||||||
- [x] Skolem chase
|
- [x] Skolem chase
|
||||||
- [ ] Core chase
|
- [ ] Core chase
|
||||||
- [ ] Negative constraints
|
- [x] Negative constraints
|
||||||
- [ ] Stratified negation in rule bodies
|
- [x] Stratified negation in rule bodies
|
||||||
- [ ] Disjunctive heads
|
- [ ] Disjunctive heads
|
||||||
- [ ] Aggregation support in rule evaluation (available in SQL; not yet exposed to chase rules)
|
- [ ] Aggregation support in rule evaluation (available in SQL; not yet exposed to chase rules)
|
||||||
- [x] Semi-naive evaluation
|
- [x] Semi-naive evaluation
|
||||||
@ -89,7 +89,7 @@ This document tracks the current state and next steps for the repository.
|
|||||||
|
|
||||||
### Data and Interoperability
|
### Data and Interoperability
|
||||||
|
|
||||||
- [ ] Fact import/export
|
- [x] Fact import/export (CSV)
|
||||||
- [ ] File-backed data source experiments
|
- [ ] File-backed data source experiments
|
||||||
- [x] Table-like row or batch abstractions
|
- [x] Table-like row or batch abstractions
|
||||||
- [ ] Stable script/query file format
|
- [ ] Stable script/query file format
|
||||||
|
|||||||
@ -6,26 +6,35 @@ use std::fmt;
|
|||||||
|
|
||||||
use super::atom::Atom;
|
use super::atom::Atom;
|
||||||
use super::inference::{
|
use super::inference::{
|
||||||
NullGenerator, SkolemGenerator, Trigger, apply_rule_head, apply_rule_head_skolem, find_matches,
|
NullGenerator, SkolemGenerator, Trigger, apply_rule_head, apply_rule_head_skolem,
|
||||||
find_matches_for_step, head_is_satisfied,
|
filter_negated, find_matches, find_matches_for_step, head_is_satisfied,
|
||||||
};
|
};
|
||||||
use super::instance::Instance;
|
use super::instance::Instance;
|
||||||
use super::rule::{Egd, Rule};
|
use super::rule::{Egd, NegativeConstraint, Rule};
|
||||||
use super::substitution::Substitution;
|
use super::substitution::Substitution;
|
||||||
use super::term::Term;
|
use super::term::Term;
|
||||||
use super::union_find::{MergeError, UnionFind};
|
use super::union_find::{MergeError, UnionFind};
|
||||||
|
|
||||||
/// Error that can occur during chase with EGDs.
|
/// Error that can occur during the chase.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ChaseError {
|
pub enum ChaseError {
|
||||||
/// EGD tried to equate two different constants.
|
/// EGD tried to equate two different constants.
|
||||||
EgdConflict(MergeError),
|
EgdConflict(MergeError),
|
||||||
|
/// A negative constraint's body was satisfied, indicating an
|
||||||
|
/// inconsistency in the derived instance.
|
||||||
|
ConstraintViolation {
|
||||||
|
/// Index of the violated constraint in the input slice.
|
||||||
|
constraint_index: usize,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for ChaseError {
|
impl fmt::Display for ChaseError {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
|
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
|
||||||
|
ChaseError::ConstraintViolation { constraint_index } => {
|
||||||
|
write!(f, "negative constraint {} violated", constraint_index)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -34,6 +43,7 @@ impl Error for ChaseError {
|
|||||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
match self {
|
match self {
|
||||||
ChaseError::EgdConflict(e) => Some(e),
|
ChaseError::EgdConflict(e) => Some(e),
|
||||||
|
ChaseError::ConstraintViolation { .. } => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,6 +172,57 @@ pub fn oblivious_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
|||||||
/// Re-application with the same frontier bindings reuses the same null, so
|
/// Re-application with the same frontier bindings reuses the same null, so
|
||||||
/// the chase terminates whenever the set of derivable facts is finite, even
|
/// the chase terminates whenever the set of derivable facts is finite, even
|
||||||
/// in the presence of existentials.
|
/// in the presence of existentials.
|
||||||
|
/// Run the chase with stratified evaluation for rules that use negation.
|
||||||
|
///
|
||||||
|
/// The rules are first partitioned into strata using
|
||||||
|
/// [`stratification::stratify`](super::stratification::stratify). Each
|
||||||
|
/// stratum is then evaluated to fixpoint in ascending order. Facts derived
|
||||||
|
/// in earlier strata are visible to later strata but not vice versa.
|
||||||
|
///
|
||||||
|
/// Returns an error in the [`ChaseResult`] if the rules are not
|
||||||
|
/// stratifiable (cycle through negation).
|
||||||
|
pub fn chase_stratified(instance: Instance, rules: &[Rule], config: ChaseConfig) -> ChaseResult {
|
||||||
|
use super::stratification::stratify;
|
||||||
|
|
||||||
|
let strata = match stratify(rules) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => {
|
||||||
|
return ChaseResult {
|
||||||
|
instance,
|
||||||
|
steps: 0,
|
||||||
|
terminated: false,
|
||||||
|
error: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut current_instance = instance;
|
||||||
|
let mut total_steps = 0;
|
||||||
|
|
||||||
|
for stratum_indexes in &strata {
|
||||||
|
let stratum_rules: Vec<Rule> = stratum_indexes.iter().map(|&i| rules[i].clone()).collect();
|
||||||
|
let result = chase_with_config(current_instance, &stratum_rules, config.clone());
|
||||||
|
total_steps += result.steps;
|
||||||
|
current_instance = result.instance;
|
||||||
|
|
||||||
|
if let Some(error) = result.error {
|
||||||
|
return ChaseResult {
|
||||||
|
instance: current_instance,
|
||||||
|
steps: total_steps,
|
||||||
|
terminated: false,
|
||||||
|
error: Some(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ChaseResult {
|
||||||
|
instance: current_instance,
|
||||||
|
steps: total_steps,
|
||||||
|
terminated: true,
|
||||||
|
error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn skolem_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
pub fn skolem_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||||
let config = ChaseConfig {
|
let config = ChaseConfig {
|
||||||
variant: ChaseVariant::Skolem,
|
variant: ChaseVariant::Skolem,
|
||||||
@ -250,6 +311,7 @@ fn standard_chase_step(
|
|||||||
for rule in rules {
|
for rule in rules {
|
||||||
// Find all ways to match the rule body against the instance
|
// Find all ways to match the rule body against the instance
|
||||||
let matches = find_matches_for_step(instance, delta, &rule.body);
|
let matches = find_matches_for_step(instance, delta, &rule.body);
|
||||||
|
let matches = filter_negated(instance, matches, &rule.negated_body);
|
||||||
|
|
||||||
for subst in matches {
|
for subst in matches {
|
||||||
// In standard chase, we only check if head is satisfied
|
// In standard chase, we only check if head is satisfied
|
||||||
@ -284,6 +346,7 @@ fn restricted_chase_step(
|
|||||||
for (rule_idx, rule) in rules.iter().enumerate() {
|
for (rule_idx, rule) in rules.iter().enumerate() {
|
||||||
// Find all ways to match the rule body against the instance
|
// Find all ways to match the rule body against the instance
|
||||||
let matches = find_matches_for_step(instance, delta, &rule.body);
|
let matches = find_matches_for_step(instance, delta, &rule.body);
|
||||||
|
let matches = filter_negated(instance, matches, &rule.negated_body);
|
||||||
|
|
||||||
for subst in matches {
|
for subst in matches {
|
||||||
// Create a trigger to check if we've already applied this
|
// Create a trigger to check if we've already applied this
|
||||||
@ -327,6 +390,7 @@ fn oblivious_chase_step(
|
|||||||
|
|
||||||
for rule in rules {
|
for rule in rules {
|
||||||
let matches = find_matches_for_step(instance, delta, &rule.body);
|
let matches = find_matches_for_step(instance, delta, &rule.body);
|
||||||
|
let matches = filter_negated(instance, matches, &rule.negated_body);
|
||||||
|
|
||||||
for subst in matches {
|
for subst in matches {
|
||||||
let derived = apply_rule_head(rule, &subst, null_gen);
|
let derived = apply_rule_head(rule, &subst, null_gen);
|
||||||
@ -357,6 +421,7 @@ fn skolem_chase_step(
|
|||||||
|
|
||||||
for (rule_index, rule) in rules.iter().enumerate() {
|
for (rule_index, rule) in rules.iter().enumerate() {
|
||||||
let matches = find_matches_for_step(instance, delta, &rule.body);
|
let matches = find_matches_for_step(instance, delta, &rule.body);
|
||||||
|
let matches = filter_negated(instance, matches, &rule.negated_body);
|
||||||
|
|
||||||
for subst in matches {
|
for subst in matches {
|
||||||
let derived = apply_rule_head_skolem(rule_index, rule, &subst, skolem_gen);
|
let derived = apply_rule_head_skolem(rule_index, rule, &subst, skolem_gen);
|
||||||
@ -429,6 +494,23 @@ fn apply_egds(
|
|||||||
Ok(made_changes)
|
Ok(made_changes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check negative constraints against the current instance.
|
||||||
|
/// Returns `Some(ChaseError)` on the first violated constraint.
|
||||||
|
fn check_negative_constraints(
|
||||||
|
instance: &Instance,
|
||||||
|
constraints: &[NegativeConstraint],
|
||||||
|
) -> Option<ChaseError> {
|
||||||
|
for (index, constraint) in constraints.iter().enumerate() {
|
||||||
|
let matches = find_matches(instance, &constraint.body);
|
||||||
|
if !matches.is_empty() {
|
||||||
|
return Some(ChaseError::ConstraintViolation {
|
||||||
|
constraint_index: index,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
/// Run chase with both TGDs and EGDs.
|
/// Run chase with both TGDs and EGDs.
|
||||||
///
|
///
|
||||||
/// The chase alternates between:
|
/// The chase alternates between:
|
||||||
@ -438,14 +520,16 @@ fn apply_egds(
|
|||||||
///
|
///
|
||||||
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
|
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
|
||||||
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
|
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
|
||||||
chase_full(instance, tgds, egds, ChaseConfig::default())
|
chase_full(instance, tgds, egds, &[], ChaseConfig::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the full chase with TGDs, EGDs, and custom configuration.
|
/// Run the full chase with TGDs, EGDs, negative constraints, and custom
|
||||||
|
/// configuration.
|
||||||
pub fn chase_full(
|
pub fn chase_full(
|
||||||
mut instance: Instance,
|
mut instance: Instance,
|
||||||
tgds: &[Rule],
|
tgds: &[Rule],
|
||||||
egds: &[Egd],
|
egds: &[Egd],
|
||||||
|
negative_constraints: &[NegativeConstraint],
|
||||||
config: ChaseConfig,
|
config: ChaseConfig,
|
||||||
) -> ChaseResult {
|
) -> ChaseResult {
|
||||||
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
|
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
|
||||||
@ -522,6 +606,17 @@ pub fn chase_full(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check negative constraints after all derivations.
|
||||||
|
if let Some(violation) = check_negative_constraints(&instance, negative_constraints)
|
||||||
|
{
|
||||||
|
return ChaseResult {
|
||||||
|
instance,
|
||||||
|
steps,
|
||||||
|
terminated: false,
|
||||||
|
error: Some(violation),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Check for fixpoint
|
// Check for fixpoint
|
||||||
if !tgd_changes && !egd_changes {
|
if !tgd_changes && !egd_changes {
|
||||||
return ChaseResult {
|
return ChaseResult {
|
||||||
@ -960,13 +1055,111 @@ mod tests {
|
|||||||
semi_naive: false,
|
semi_naive: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let result = chase_full(instance, &[tgd], &[], config);
|
let result = chase_full(instance, &[tgd], &[], &[], config);
|
||||||
|
|
||||||
assert!(result.terminated);
|
assert!(result.terminated);
|
||||||
assert!(result.error.is_none());
|
assert!(result.error.is_none());
|
||||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Negative constraint tests
|
||||||
|
|
||||||
|
use crate::chase::rule::NegativeConstraintBuilder;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_negative_constraint_fires_on_matching_body() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("A", vec![Term::constant("x")]),
|
||||||
|
Atom::new("B", vec![Term::constant("x")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Constraint: A(X), B(X) -> ⊥
|
||||||
|
let nc = NegativeConstraintBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.when("B", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_full(instance, &[], &[], &[nc], ChaseConfig::default());
|
||||||
|
assert!(!result.terminated);
|
||||||
|
assert!(matches!(
|
||||||
|
result.error,
|
||||||
|
Some(ChaseError::ConstraintViolation {
|
||||||
|
constraint_index: 0
|
||||||
|
})
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_negative_constraint_passes_when_no_match() {
|
||||||
|
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Constraint: A(X), B(X) -> ⊥ (B not in instance, so no match)
|
||||||
|
let nc = NegativeConstraintBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.when("B", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_full(instance, &[], &[], &[nc], ChaseConfig::default());
|
||||||
|
assert!(result.terminated);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_negative_constraint_fires_after_tgd_derivation() {
|
||||||
|
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// TGD: A(X) -> B(X)
|
||||||
|
let tgd = RuleBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.then("B", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Constraint: B(X) -> ⊥ (will fire after TGD derives B(x))
|
||||||
|
let nc = NegativeConstraintBuilder::new()
|
||||||
|
.when("B", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_full(instance, &[tgd], &[], &[nc], ChaseConfig::default());
|
||||||
|
assert!(!result.terminated);
|
||||||
|
assert!(matches!(
|
||||||
|
result.error,
|
||||||
|
Some(ChaseError::ConstraintViolation { .. })
|
||||||
|
));
|
||||||
|
// B(x) should still be in the instance since the TGD applied before
|
||||||
|
// the constraint check.
|
||||||
|
assert!(!result.instance.facts_for_predicate("B").is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_negative_constraint_index_reported_correctly() {
|
||||||
|
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// nc0: C(X) -> ⊥ (does not fire)
|
||||||
|
let nc0 = NegativeConstraintBuilder::new()
|
||||||
|
.when("C", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
// nc1: A(X) -> ⊥ (fires)
|
||||||
|
let nc1 = NegativeConstraintBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_full(instance, &[], &[], &[nc0, nc1], ChaseConfig::default());
|
||||||
|
assert!(matches!(
|
||||||
|
result.error,
|
||||||
|
Some(ChaseError::ConstraintViolation {
|
||||||
|
constraint_index: 1
|
||||||
|
})
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
// Oblivious chase tests
|
// Oblivious chase tests
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@ -287,6 +287,31 @@ impl MaterializedState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Filter a set of body-match substitutions against negated atoms.
|
||||||
|
///
|
||||||
|
/// A substitution is removed if, after applying it to any negated atom, the
|
||||||
|
/// resulting ground atom exists in the instance. This implements
|
||||||
|
/// negation-as-failure semantics: the negated atom must be absent for the
|
||||||
|
/// rule to fire.
|
||||||
|
pub(crate) fn filter_negated(
|
||||||
|
instance: &Instance,
|
||||||
|
results: Vec<Substitution>,
|
||||||
|
negated_body: &[Atom],
|
||||||
|
) -> Vec<Substitution> {
|
||||||
|
if negated_body.is_empty() {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
results
|
||||||
|
.into_iter()
|
||||||
|
.filter(|subst| {
|
||||||
|
negated_body.iter().all(|atom| {
|
||||||
|
let ground = subst.apply_atom(atom);
|
||||||
|
!instance.contains(&ground)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn head_is_satisfied(instance: &Instance, rule: &Rule, subst: &Substitution) -> bool {
|
pub(crate) fn head_is_satisfied(instance: &Instance, rule: &Rule, subst: &Substitution) -> bool {
|
||||||
let head = rule
|
let head = rule
|
||||||
.head
|
.head
|
||||||
|
|||||||
@ -1,9 +1,17 @@
|
|||||||
//! Chase algorithm implementation for reasoning with tuple-generating dependencies (TGDs).
|
//! Chase algorithm implementation for reasoning with tuple-generating
|
||||||
|
//! dependencies (TGDs), equality-generating dependencies (EGDs), and
|
||||||
|
//! negative constraints.
|
||||||
|
//!
|
||||||
|
//! Supported chase variants: restricted, standard, oblivious, and Skolem.
|
||||||
|
//! Optional semi-naive evaluation reduces redundant matching. Rules may use
|
||||||
|
//! negation-as-failure in body atoms; [`stratification`] ensures correct
|
||||||
|
//! evaluation order when negation creates inter-predicate dependencies.
|
||||||
|
|
||||||
pub mod atom;
|
pub mod atom;
|
||||||
pub mod inference;
|
pub mod inference;
|
||||||
pub mod instance;
|
pub mod instance;
|
||||||
pub mod rule;
|
pub mod rule;
|
||||||
|
pub mod stratification;
|
||||||
pub mod substitution;
|
pub mod substitution;
|
||||||
pub mod term;
|
pub mod term;
|
||||||
pub mod union_find;
|
pub mod union_find;
|
||||||
@ -12,12 +20,14 @@ mod engine;
|
|||||||
|
|
||||||
pub use atom::Atom;
|
pub use atom::Atom;
|
||||||
pub use engine::{
|
pub use engine::{
|
||||||
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
|
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_stratified,
|
||||||
chase_with_egds, oblivious_chase, skolem_chase, standard_chase,
|
chase_with_config, chase_with_egds, oblivious_chase, skolem_chase, standard_chase,
|
||||||
};
|
};
|
||||||
pub use inference::{Derivation, MaterializedState, find_matches, materialize};
|
pub use inference::{Derivation, MaterializedState, find_matches, materialize};
|
||||||
pub use instance::{Instance, InstanceError};
|
pub use instance::{Instance, InstanceError};
|
||||||
pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder};
|
pub use rule::{
|
||||||
|
Egd, EgdBuilder, Equality, NegativeConstraint, NegativeConstraintBuilder, Rule, RuleBuilder,
|
||||||
|
};
|
||||||
pub use substitution::Substitution;
|
pub use substitution::Substitution;
|
||||||
pub use term::Term;
|
pub use term::Term;
|
||||||
pub use union_find::{MergeError, UnionFind};
|
pub use union_find::{MergeError, UnionFind};
|
||||||
|
|||||||
@ -11,24 +11,50 @@ use super::term::Term;
|
|||||||
/// The body is a conjunction of atoms, and the head is a conjunction of atoms.
|
/// The body is a conjunction of atoms, and the head is a conjunction of atoms.
|
||||||
/// Variables in the head that don't appear in the body are "existential" -
|
/// Variables in the head that don't appear in the body are "existential" -
|
||||||
/// they will be replaced with fresh nulls during the chase.
|
/// they will be replaced with fresh nulls during the chase.
|
||||||
|
///
|
||||||
|
/// A rule may also carry negated body atoms. A negated body atom is
|
||||||
|
/// satisfied when no matching ground instance exists. Rules with negated
|
||||||
|
/// atoms require stratified evaluation to ensure correct semantics.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Rule {
|
pub struct Rule {
|
||||||
/// The body atoms (conjunction).
|
/// The positive body atoms (conjunction).
|
||||||
pub body: Vec<Atom>,
|
pub body: Vec<Atom>,
|
||||||
|
/// Negated body atoms (NAF). Each must not match in the instance for
|
||||||
|
/// the rule to fire.
|
||||||
|
pub negated_body: Vec<Atom>,
|
||||||
/// The head atoms (conjunction).
|
/// The head atoms (conjunction).
|
||||||
pub head: Vec<Atom>,
|
pub head: Vec<Atom>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Rule {
|
impl Rule {
|
||||||
/// Create a new rule.
|
/// Create a new rule with no negated body atoms.
|
||||||
pub fn new(body: Vec<Atom>, head: Vec<Atom>) -> Self {
|
pub fn new(body: Vec<Atom>, head: Vec<Atom>) -> Self {
|
||||||
Rule { body, head }
|
Rule {
|
||||||
|
body,
|
||||||
|
negated_body: Vec::new(),
|
||||||
|
head,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get all variables appearing in the body.
|
/// Create a new rule that includes negated body atoms.
|
||||||
|
pub fn new_with_negation(body: Vec<Atom>, negated_body: Vec<Atom>, head: Vec<Atom>) -> Self {
|
||||||
|
Rule {
|
||||||
|
body,
|
||||||
|
negated_body,
|
||||||
|
head,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this rule uses negation-as-failure.
|
||||||
|
pub fn has_negation(&self) -> bool {
|
||||||
|
!self.negated_body.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all variables appearing in the positive body.
|
||||||
pub fn body_variables(&self) -> HashSet<String> {
|
pub fn body_variables(&self) -> HashSet<String> {
|
||||||
self.body
|
self.body
|
||||||
.iter()
|
.iter()
|
||||||
|
.chain(self.negated_body.iter())
|
||||||
.flat_map(|a| a.variables())
|
.flat_map(|a| a.variables())
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect()
|
.collect()
|
||||||
@ -86,6 +112,7 @@ impl fmt::Display for Rule {
|
|||||||
/// Builder for creating rules with a fluent API.
|
/// Builder for creating rules with a fluent API.
|
||||||
pub struct RuleBuilder {
|
pub struct RuleBuilder {
|
||||||
body: Vec<Atom>,
|
body: Vec<Atom>,
|
||||||
|
negated_body: Vec<Atom>,
|
||||||
head: Vec<Atom>,
|
head: Vec<Atom>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,16 +120,23 @@ impl RuleBuilder {
|
|||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
RuleBuilder {
|
RuleBuilder {
|
||||||
body: Vec::new(),
|
body: Vec::new(),
|
||||||
|
negated_body: Vec::new(),
|
||||||
head: Vec::new(),
|
head: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add an atom to the body.
|
/// Add a positive atom to the body.
|
||||||
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||||
self.body.push(Atom::new(predicate, terms));
|
self.body.push(Atom::new(predicate, terms));
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a negated atom to the body (negation-as-failure).
|
||||||
|
pub fn when_not(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||||
|
self.negated_body.push(Atom::new(predicate, terms));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Add an atom to the head.
|
/// Add an atom to the head.
|
||||||
pub fn then(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
pub fn then(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||||
self.head.push(Atom::new(predicate, terms));
|
self.head.push(Atom::new(predicate, terms));
|
||||||
@ -111,7 +145,7 @@ impl RuleBuilder {
|
|||||||
|
|
||||||
/// Build the rule.
|
/// Build the rule.
|
||||||
pub fn build(self) -> Rule {
|
pub fn build(self) -> Rule {
|
||||||
Rule::new(self.body, self.head)
|
Rule::new_with_negation(self.body, self.negated_body, self.head)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,6 +274,73 @@ impl Default for EgdBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A negative constraint of the form: body -> ⊥
|
||||||
|
///
|
||||||
|
/// If the body matches against the current instance, the chase detects an
|
||||||
|
/// inconsistency and reports a constraint violation. Negative constraints
|
||||||
|
/// are checked after TGDs and EGDs in each round.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct NegativeConstraint {
|
||||||
|
/// The body atoms (conjunction).
|
||||||
|
pub body: Vec<Atom>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NegativeConstraint {
|
||||||
|
/// Create a new negative constraint.
|
||||||
|
pub fn new(body: Vec<Atom>) -> Self {
|
||||||
|
NegativeConstraint { body }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all variables appearing in the body.
|
||||||
|
pub fn body_variables(&self) -> HashSet<String> {
|
||||||
|
self.body
|
||||||
|
.iter()
|
||||||
|
.flat_map(|a| a.variables())
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for NegativeConstraint {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
for (i, atom) in self.body.iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
write!(f, "{}", atom)?;
|
||||||
|
}
|
||||||
|
write!(f, " → ⊥")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for creating negative constraints with a fluent API.
|
||||||
|
pub struct NegativeConstraintBuilder {
|
||||||
|
body: Vec<Atom>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NegativeConstraintBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
NegativeConstraintBuilder { body: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an atom to the body.
|
||||||
|
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||||
|
self.body.push(Atom::new(predicate, terms));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the negative constraint.
|
||||||
|
pub fn build(self) -> NegativeConstraint {
|
||||||
|
NegativeConstraint::new(self.body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for NegativeConstraintBuilder {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
222
src/chase/stratification.rs
Normal file
222
src/chase/stratification.rs
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
//! Stratification analysis for rules with negation.
|
||||||
|
//!
|
||||||
|
//! A set of rules is *stratifiable* when its predicate dependency graph has
|
||||||
|
//! no cycle that contains a negative edge. This module computes the strata
|
||||||
|
//! (groups of rules that can be evaluated together) in bottom-up order.
|
||||||
|
//!
|
||||||
|
//! The algorithm:
|
||||||
|
//! 1. Build a predicate dependency graph: head predicates depend on body
|
||||||
|
//! predicates (positive edges) and negated body predicates (negative
|
||||||
|
//! edges).
|
||||||
|
//! 2. Assign each predicate a stratum number using the rule: stratum of `p`
|
||||||
|
//! is at least stratum of every predicate it positively depends on, and
|
||||||
|
//! strictly greater than every predicate it negatively depends on.
|
||||||
|
//! 3. If a cycle through negative edges prevents convergence, the program
|
||||||
|
//! is not stratifiable.
|
||||||
|
//! 4. Return the rules partitioned by stratum in ascending order.
|
||||||
|
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use super::rule::Rule;
|
||||||
|
|
||||||
|
/// Error returned when a set of rules is not stratifiable.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct StratificationError {
|
||||||
|
/// Predicates involved in a cycle through negation.
|
||||||
|
pub cycle_predicates: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for StratificationError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"rules are not stratifiable: cycle through negation involving {}",
|
||||||
|
self.cycle_predicates.join(", ")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for StratificationError {}
|
||||||
|
|
||||||
|
/// Compute strata for a set of rules.
|
||||||
|
///
|
||||||
|
/// Returns a `Vec<Vec<usize>>` where each inner vector contains rule indexes
|
||||||
|
/// belonging to that stratum, ordered from stratum 0 (no negative
|
||||||
|
/// dependencies) upward. Rules without negation all land in stratum 0 when
|
||||||
|
/// there are no dependency chains through negation.
|
||||||
|
pub fn stratify(rules: &[Rule]) -> Result<Vec<Vec<usize>>, StratificationError> {
|
||||||
|
// Collect all predicates.
|
||||||
|
let mut all_predicates: HashSet<String> = HashSet::new();
|
||||||
|
for rule in rules {
|
||||||
|
for atom in &rule.body {
|
||||||
|
all_predicates.insert(atom.predicate.clone());
|
||||||
|
}
|
||||||
|
for atom in &rule.negated_body {
|
||||||
|
all_predicates.insert(atom.predicate.clone());
|
||||||
|
}
|
||||||
|
for atom in &rule.head {
|
||||||
|
all_predicates.insert(atom.predicate.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build dependency maps: for each head predicate, record positive and
|
||||||
|
// negative body-predicate dependencies.
|
||||||
|
let mut positive_deps: HashMap<String, HashSet<String>> = HashMap::new();
|
||||||
|
let mut negative_deps: HashMap<String, HashSet<String>> = HashMap::new();
|
||||||
|
for rule in rules {
|
||||||
|
let head_preds: Vec<String> = rule.head.iter().map(|a| a.predicate.clone()).collect();
|
||||||
|
for head_pred in &head_preds {
|
||||||
|
for body_atom in &rule.body {
|
||||||
|
positive_deps
|
||||||
|
.entry(head_pred.clone())
|
||||||
|
.or_default()
|
||||||
|
.insert(body_atom.predicate.clone());
|
||||||
|
}
|
||||||
|
for neg_atom in &rule.negated_body {
|
||||||
|
negative_deps
|
||||||
|
.entry(head_pred.clone())
|
||||||
|
.or_default()
|
||||||
|
.insert(neg_atom.predicate.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign strata using iterative relaxation.
|
||||||
|
let mut stratum: HashMap<String, usize> =
|
||||||
|
all_predicates.iter().map(|p| (p.clone(), 0)).collect();
|
||||||
|
|
||||||
|
let max_iterations = all_predicates.len() + 1;
|
||||||
|
for _ in 0..max_iterations {
|
||||||
|
let mut changed = false;
|
||||||
|
for pred in &all_predicates {
|
||||||
|
let mut required = 0usize;
|
||||||
|
|
||||||
|
if let Some(pos) = positive_deps.get(pred) {
|
||||||
|
for dep in pos {
|
||||||
|
let dep_stratum = stratum.get(dep).copied().unwrap_or(0);
|
||||||
|
if dep_stratum > required {
|
||||||
|
required = dep_stratum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(neg) = negative_deps.get(pred) {
|
||||||
|
for dep in neg {
|
||||||
|
let dep_stratum = stratum.get(dep).copied().unwrap_or(0);
|
||||||
|
let needed = dep_stratum + 1;
|
||||||
|
if needed > required {
|
||||||
|
required = needed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if required != stratum[pred] {
|
||||||
|
stratum.insert(pred.clone(), required);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !changed {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If any stratum exceeds the number of predicates, there is a cycle
|
||||||
|
// through negation.
|
||||||
|
let max_stratum = all_predicates.len();
|
||||||
|
let cycle_preds: Vec<String> = stratum
|
||||||
|
.iter()
|
||||||
|
.filter(|&(_, &s)| s >= max_stratum)
|
||||||
|
.map(|(p, _)| p.clone())
|
||||||
|
.collect();
|
||||||
|
if !cycle_preds.is_empty() {
|
||||||
|
return Err(StratificationError {
|
||||||
|
cycle_predicates: cycle_preds,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group rule indexes by the maximum stratum of their head predicates.
|
||||||
|
let num_strata = stratum.values().max().copied().unwrap_or(0) + 1;
|
||||||
|
let mut strata: Vec<Vec<usize>> = vec![Vec::new(); num_strata];
|
||||||
|
for (rule_index, rule) in rules.iter().enumerate() {
|
||||||
|
let rule_stratum = rule
|
||||||
|
.head
|
||||||
|
.iter()
|
||||||
|
.map(|a| stratum.get(&a.predicate).copied().unwrap_or(0))
|
||||||
|
.max()
|
||||||
|
.unwrap_or(0);
|
||||||
|
strata[rule_stratum].push(rule_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(strata)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::chase::rule::RuleBuilder;
|
||||||
|
use crate::chase::term::Term;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rules_without_negation_are_single_stratum() {
|
||||||
|
let rules = vec![
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build(),
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
|
||||||
|
.then("Path", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let strata = stratify(&rules).unwrap();
|
||||||
|
assert_eq!(strata.len(), 1);
|
||||||
|
assert_eq!(strata[0].len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn negation_puts_dependent_rule_in_higher_stratum() {
|
||||||
|
let rules = vec![
|
||||||
|
// Stratum 0: Edge(X, Y) -> Path(X, Y)
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build(),
|
||||||
|
// Stratum 1: Node(X), NOT Path(X, Y) -> Isolated(X)
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("Node", vec![Term::var("X")])
|
||||||
|
.when_not("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Isolated", vec![Term::var("X")])
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let strata = stratify(&rules).unwrap();
|
||||||
|
assert!(strata.len() >= 2);
|
||||||
|
// Rule 0 should be in an earlier stratum than rule 1.
|
||||||
|
let rule0_stratum = strata.iter().position(|s| s.contains(&0)).unwrap();
|
||||||
|
let rule1_stratum = strata.iter().position(|s| s.contains(&1)).unwrap();
|
||||||
|
assert!(rule0_stratum < rule1_stratum);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cycle_through_negation_is_rejected() {
|
||||||
|
// A(X) -> B(X), B(X), NOT A(X) -> A(X)
|
||||||
|
// A depends negatively on B which depends on A: cycle through negation.
|
||||||
|
let rules = vec![
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.then("B", vec![Term::var("X")])
|
||||||
|
.build(),
|
||||||
|
RuleBuilder::new()
|
||||||
|
.when("B", vec![Term::var("X")])
|
||||||
|
.when_not("A", vec![Term::var("X")])
|
||||||
|
.then("A", vec![Term::var("X")])
|
||||||
|
.build(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let result = stratify(&rules);
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
247
src/io/csv.rs
Normal file
247
src/io/csv.rs
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
//! CSV-based fact import and export for [`Instance`].
|
||||||
|
//!
|
||||||
|
//! Each CSV file corresponds to one predicate. The file name (minus the
|
||||||
|
//! `.csv` extension) is used as the predicate name. Each row becomes one
|
||||||
|
//! ground atom with string-valued constant terms.
|
||||||
|
//!
|
||||||
|
//! ## Import
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! alice,bob
|
||||||
|
//! bob,carol
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Loading the above as predicate `Parent` yields:
|
||||||
|
//! `Parent(alice, bob)` and `Parent(bob, carol)`.
|
||||||
|
//!
|
||||||
|
//! ## Export
|
||||||
|
//!
|
||||||
|
//! Exporting an instance writes one CSV file per predicate into a target
|
||||||
|
//! directory, using the predicate name as the file stem.
|
||||||
|
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
use std::fs;
|
||||||
|
use std::io::{self, BufRead, Write};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use crate::chase::atom::Atom;
|
||||||
|
use crate::chase::instance::Instance;
|
||||||
|
use crate::chase::term::Term;
|
||||||
|
|
||||||
|
/// Errors from CSV import/export operations.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum CsvError {
|
||||||
|
/// An I/O error occurred while reading or writing files.
|
||||||
|
Io(io::Error),
|
||||||
|
/// A CSV row contained a non-ground term representation (should not
|
||||||
|
/// normally happen with well-formed CSV data).
|
||||||
|
NonGroundValue(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for CsvError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Io(e) => write!(f, "CSV I/O error: {}", e),
|
||||||
|
Self::NonGroundValue(v) => write!(f, "non-ground value in CSV: {}", v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for CsvError {
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
Self::Io(e) => Some(e),
|
||||||
|
Self::NonGroundValue(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for CsvError {
|
||||||
|
fn from(e: io::Error) -> Self {
|
||||||
|
Self::Io(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load facts for a single predicate from a CSV reader.
|
||||||
|
///
|
||||||
|
/// Each line is split on commas. Leading and trailing whitespace on each
|
||||||
|
/// field is trimmed. Empty lines are skipped.
|
||||||
|
pub fn load_predicate<R: BufRead>(predicate: &str, reader: R) -> Result<Vec<Atom>, CsvError> {
|
||||||
|
let mut atoms = Vec::new();
|
||||||
|
for line in reader.lines() {
|
||||||
|
let line = line?;
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let terms: Vec<Term> = trimmed
|
||||||
|
.split(',')
|
||||||
|
.map(|s| Term::constant(s.trim()))
|
||||||
|
.collect();
|
||||||
|
atoms.push(Atom::new(predicate, terms));
|
||||||
|
}
|
||||||
|
Ok(atoms)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load all `.csv` files from a directory into an [`Instance`].
|
||||||
|
///
|
||||||
|
/// Each file's stem (the part before `.csv`) becomes the predicate name.
|
||||||
|
pub fn load_directory(dir: &Path) -> Result<Instance, CsvError> {
|
||||||
|
let mut instance = Instance::new();
|
||||||
|
for entry in fs::read_dir(dir)? {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension().is_some_and(|ext| ext == "csv") {
|
||||||
|
let predicate = path
|
||||||
|
.file_stem()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.to_string_lossy()
|
||||||
|
.to_string();
|
||||||
|
let file = fs::File::open(&path)?;
|
||||||
|
let reader = io::BufReader::new(file);
|
||||||
|
for atom in load_predicate(&predicate, reader)? {
|
||||||
|
instance.add(atom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(instance)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Export an [`Instance`] to CSV files, one per predicate, in the given
|
||||||
|
/// directory. The directory is created if it does not exist.
|
||||||
|
pub fn export_directory(instance: &Instance, dir: &Path) -> Result<(), CsvError> {
|
||||||
|
fs::create_dir_all(dir)?;
|
||||||
|
|
||||||
|
// Gather predicates.
|
||||||
|
let mut predicates: Vec<String> = Vec::new();
|
||||||
|
for atom in instance.iter() {
|
||||||
|
if !predicates.contains(&atom.predicate) {
|
||||||
|
predicates.push(atom.predicate.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
predicates.sort();
|
||||||
|
|
||||||
|
for predicate in &predicates {
|
||||||
|
let path = dir.join(format!("{}.csv", predicate));
|
||||||
|
let mut file = fs::File::create(&path)?;
|
||||||
|
let mut facts: Vec<&Atom> = instance.facts_for_predicate(predicate);
|
||||||
|
facts.sort_by(|a, b| format!("{}", a).cmp(&format!("{}", b)));
|
||||||
|
|
||||||
|
for atom in facts {
|
||||||
|
let fields: Vec<String> = atom
|
||||||
|
.terms
|
||||||
|
.iter()
|
||||||
|
.map(|t| match t {
|
||||||
|
Term::Constant(v) => v.clone(),
|
||||||
|
Term::Null(id) => format!("__null_{}", id),
|
||||||
|
Term::Variable(v) => format!("?{}", v),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
writeln!(file, "{}", fields.join(","))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Export facts for a single predicate as a CSV string.
|
||||||
|
pub fn export_predicate(instance: &Instance, predicate: &str) -> String {
|
||||||
|
let mut facts: Vec<&Atom> = instance.facts_for_predicate(predicate);
|
||||||
|
facts.sort_by(|a, b| format!("{}", a).cmp(&format!("{}", b)));
|
||||||
|
let mut out = String::new();
|
||||||
|
for atom in facts {
|
||||||
|
let fields: Vec<String> = atom
|
||||||
|
.terms
|
||||||
|
.iter()
|
||||||
|
.map(|t| match t {
|
||||||
|
Term::Constant(v) => v.clone(),
|
||||||
|
Term::Null(id) => format!("__null_{}", id),
|
||||||
|
Term::Variable(v) => format!("?{}", v),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
out.push_str(&fields.join(","));
|
||||||
|
out.push('\n');
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn load_predicate_from_csv_lines() {
|
||||||
|
let data = "alice,bob\nbob,carol\n";
|
||||||
|
let atoms = load_predicate("Parent", io::BufReader::new(data.as_bytes())).unwrap();
|
||||||
|
assert_eq!(atoms.len(), 2);
|
||||||
|
assert_eq!(atoms[0].predicate, "Parent");
|
||||||
|
assert_eq!(atoms[0].terms[0], Term::constant("alice"));
|
||||||
|
assert_eq!(atoms[0].terms[1], Term::constant("bob"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn load_trims_whitespace_and_skips_empty_lines() {
|
||||||
|
let data = " alice , bob \n\n carol , dave \n";
|
||||||
|
let atoms = load_predicate("R", io::BufReader::new(data.as_bytes())).unwrap();
|
||||||
|
assert_eq!(atoms.len(), 2);
|
||||||
|
assert_eq!(atoms[0].terms[0], Term::constant("alice"));
|
||||||
|
assert_eq!(atoms[0].terms[1], Term::constant("bob"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn roundtrip_export_and_reimport() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
Atom::new("Person", vec![Term::constant("alice")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let dir = std::env::temp_dir().join("qe_csv_test_roundtrip");
|
||||||
|
let _ = fs::remove_dir_all(&dir);
|
||||||
|
export_directory(&instance, &dir).unwrap();
|
||||||
|
|
||||||
|
let reloaded = load_directory(&dir).unwrap();
|
||||||
|
assert_eq!(reloaded.len(), instance.len());
|
||||||
|
assert_eq!(
|
||||||
|
reloaded.facts_for_predicate("Parent").len(),
|
||||||
|
instance.facts_for_predicate("Parent").len(),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
reloaded.facts_for_predicate("Person").len(),
|
||||||
|
instance.facts_for_predicate("Person").len(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let _ = fs::remove_dir_all(&dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn export_predicate_to_string() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let csv = export_predicate(&instance, "Parent");
|
||||||
|
let lines: Vec<&str> = csv.trim().split('\n').collect();
|
||||||
|
assert_eq!(lines.len(), 2);
|
||||||
|
assert!(lines.contains(&"alice,bob"));
|
||||||
|
assert!(lines.contains(&"bob,carol"));
|
||||||
|
}
|
||||||
|
}
|
||||||
7
src/io/mod.rs
Normal file
7
src/io/mod.rs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
//! Fact import and export.
|
||||||
|
//!
|
||||||
|
//! This module provides helpers for moving facts between the chase
|
||||||
|
//! [`Instance`](crate::chase::Instance) and external file formats.
|
||||||
|
//! Currently the only supported format is CSV.
|
||||||
|
|
||||||
|
pub mod csv;
|
||||||
18
src/lib.rs
18
src/lib.rs
@ -1,15 +1,18 @@
|
|||||||
//! Query-engine playground crate.
|
//! Query-engine playground crate.
|
||||||
//!
|
//!
|
||||||
//! The current codebase primarily contains a chase-based reasoning core plus
|
//! The current codebase contains a chase-based reasoning core with four
|
||||||
//! lightweight frontends for experimenting with rule-driven query answering.
|
//! chase variants (restricted, standard, oblivious, Skolem), optional
|
||||||
//! It also contains an early relational and SQL scaffold for a narrow
|
//! semi-naive evaluation, stratified negation, and negative constraints.
|
||||||
//! `SELECT-FROM-WHERE-ORDER BY` slice with basic joins, aliases, and
|
//! It also provides a relational and SQL scaffold covering a
|
||||||
//! conjunctions. It is not yet a full SQL engine.
|
//! `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` slice with joins, aliases,
|
||||||
|
//! aggregates (`COUNT`, `SUM`, `MIN`, `MAX`, `AVG`), and a physical
|
||||||
|
//! operator layer with filter push-down. It is not yet a full SQL engine.
|
||||||
|
|
||||||
pub mod catalog;
|
pub mod catalog;
|
||||||
pub mod chase;
|
pub mod chase;
|
||||||
pub mod execution;
|
pub mod execution;
|
||||||
pub mod frontend;
|
pub mod frontend;
|
||||||
|
pub mod io;
|
||||||
pub mod planner;
|
pub mod planner;
|
||||||
pub mod relational;
|
pub mod relational;
|
||||||
pub mod sql;
|
pub mod sql;
|
||||||
@ -17,6 +20,7 @@ pub mod sql;
|
|||||||
// Curated convenience re-exports for the current public crate surface.
|
// Curated convenience re-exports for the current public crate surface.
|
||||||
// Lower-level reasoning and provenance APIs remain under `query_engine::chase`.
|
// Lower-level reasoning and provenance APIs remain under `query_engine::chase`.
|
||||||
pub use chase::{
|
pub use chase::{
|
||||||
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, Rule, RuleBuilder, Term,
|
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Instance, NegativeConstraint,
|
||||||
chase, chase_with_config, oblivious_chase, skolem_chase, standard_chase,
|
NegativeConstraintBuilder, Rule, RuleBuilder, Term, chase, chase_stratified, chase_with_config,
|
||||||
|
oblivious_chase, skolem_chase, standard_chase,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -6,8 +6,8 @@
|
|||||||
//! - [`sql`]: translation from SQL AST into the current logical-plan subset
|
//! - [`sql`]: translation from SQL AST into the current logical-plan subset
|
||||||
//!
|
//!
|
||||||
//! At the moment this is intentionally small and covers the current SQL slice:
|
//! At the moment this is intentionally small and covers the current SQL slice:
|
||||||
//! filtering, ordering, projection, and basic joins over predicate-backed
|
//! filtering, ordering, projection, basic joins, `GROUP BY` aggregation,
|
||||||
//! tables.
|
//! and `LIMIT` over predicate-backed tables.
|
||||||
|
|
||||||
pub mod logical;
|
pub mod logical;
|
||||||
pub mod sql;
|
pub mod sql;
|
||||||
|
|||||||
@ -1,8 +1,11 @@
|
|||||||
//! Minimal SQL front-end scaffolding.
|
//! Minimal SQL front-end scaffolding.
|
||||||
//!
|
//!
|
||||||
//! The current SQL layer supports a narrow `SELECT-FROM-WHERE-ORDER BY` subset
|
//! The current SQL layer supports a narrow
|
||||||
//! over predicate-backed tables, including comma-join style multi-table
|
//! `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` subset over predicate-backed
|
||||||
//! queries, table aliases, and `AND` in filter predicates. It provides:
|
//! tables, including comma-join style multi-table queries, table aliases,
|
||||||
|
//! `AND`/`OR` filter predicates, `=`/`!=`/`<>` operators, and aggregate
|
||||||
|
//! functions (`COUNT`, `SUM`, `MIN`, `MAX`, `AVG`) with `GROUP BY`. It
|
||||||
|
//! provides:
|
||||||
//!
|
//!
|
||||||
//! - a small AST in [`ast`]
|
//! - a small AST in [`ast`]
|
||||||
//! - a parser in [`parser`]
|
//! - a parser in [`parser`]
|
||||||
|
|||||||
@ -63,7 +63,7 @@ enum Token {
|
|||||||
Ne,
|
Ne,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset.
|
/// Parse a `SELECT-FROM-WHERE-GROUP BY-ORDER BY-LIMIT` query in the current SQL subset.
|
||||||
pub fn parse_select(input: &str) -> Result<Select, ParseError> {
|
pub fn parse_select(input: &str) -> Result<Select, ParseError> {
|
||||||
let tokens = tokenize(input)?;
|
let tokens = tokenize(input)?;
|
||||||
let mut parser = Parser::new(tokens);
|
let mut parser = Parser::new(tokens);
|
||||||
|
|||||||
@ -185,3 +185,63 @@ fn test_self_join_rule() {
|
|||||||
let same_team = result.instance.facts_for_predicate("SameTeam");
|
let same_team = result.instance.facts_for_predicate("SameTeam");
|
||||||
assert_eq!(same_team.len(), 5);
|
assert_eq!(same_team.len(), 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stratified_negation_derives_isolated_nodes() {
|
||||||
|
use query_engine::chase::{ChaseConfig, chase_stratified};
|
||||||
|
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("Node", vec![Term::constant("a")]),
|
||||||
|
Atom::new("Node", vec![Term::constant("b")]),
|
||||||
|
Atom::new("Node", vec![Term::constant("c")]),
|
||||||
|
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Stratum 0: Edge(X, Y) -> Connected(X)
|
||||||
|
let rule1 = RuleBuilder::new()
|
||||||
|
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Connected", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Stratum 1: Node(X), NOT Connected(X) -> Isolated(X)
|
||||||
|
let rule2 = RuleBuilder::new()
|
||||||
|
.when("Node", vec![Term::var("X")])
|
||||||
|
.when_not("Connected", vec![Term::var("X")])
|
||||||
|
.then("Isolated", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_stratified(instance, &[rule1, rule2], ChaseConfig::default());
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
let connected = result.instance.facts_for_predicate("Connected");
|
||||||
|
assert_eq!(connected.len(), 1); // only "a" has an outgoing edge
|
||||||
|
let isolated = result.instance.facts_for_predicate("Isolated");
|
||||||
|
assert_eq!(isolated.len(), 2); // "b" and "c"
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn negation_without_stratification_fires_in_single_round() {
|
||||||
|
// A simple case: negation refers to a predicate that is never derived.
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("Person", vec![Term::constant("alice")]),
|
||||||
|
Atom::new("Person", vec![Term::constant("bob")]),
|
||||||
|
Atom::new("Fired", vec![Term::constant("bob")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Person(X), NOT Fired(X) -> Active(X)
|
||||||
|
let rule = RuleBuilder::new()
|
||||||
|
.when("Person", vec![Term::var("X")])
|
||||||
|
.when_not("Fired", vec![Term::var("X")])
|
||||||
|
.then("Active", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase(instance, &[rule]);
|
||||||
|
assert!(result.terminated);
|
||||||
|
let active = result.instance.facts_for_predicate("Active");
|
||||||
|
assert_eq!(active.len(), 1);
|
||||||
|
assert_eq!(active[0].terms[0], Term::constant("alice"));
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user