Compare commits

..

2 Commits

Author SHA1 Message Date
Hassan Abedi
ee21bee039 Add predicate indexing, standard chase variant, and EGD support 2026-03-16 14:24:23 +01:00
Hassan Abedi
0e2db4e07b WIP 2026-03-16 13:11:50 +01:00
9 changed files with 1193 additions and 16 deletions

View File

@ -1,14 +1,13 @@
## Chase-rs
An implementation of the chase algorithm in Rust for advanced reasoning engines.
An implementation of the chase algorithm in Rust for reasoning engines.
### Overview
The chase algorithm is a fundamental technique in context of database theory and knowledge representation used for:
The chase algorithm is a technique in context of database theory and knowledge representation used for:
- Query answering under tuple-generating dependencies (TGDs)
- Computing universal models
- Ontology-based data access (OBDA)
- Datalog with existential rules
This implementation provides a restricted-chase style materialization with active-trigger checks.

View File

@ -1,12 +1,44 @@
//! Core chase algorithm implementation.
use std::collections::HashSet;
use std::error::Error;
use std::fmt;
use super::atom::Atom;
use super::instance::Instance;
use super::rule::Rule;
use super::rule::{Egd, Rule};
use super::substitution::{Substitution, unify_atom};
use super::term::Term;
use super::union_find::{MergeError, UnionFind};
/// Error that can occur during chase with EGDs.
#[derive(Debug, Clone)]
pub enum ChaseError {
/// EGD tried to equate two different constants.
EgdConflict(MergeError),
}
impl fmt::Display for ChaseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
}
}
}
impl Error for ChaseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
ChaseError::EgdConflict(e) => Some(e),
}
}
}
impl From<MergeError> for ChaseError {
fn from(e: MergeError) -> Self {
ChaseError::EgdConflict(e)
}
}
/// Result of running the chase algorithm.
#[derive(Debug)]
@ -17,6 +49,20 @@ pub struct ChaseResult {
pub steps: usize,
/// Whether the chase terminated (vs hitting a limit).
pub terminated: bool,
/// Error that occurred during chase (e.g., EGD conflict).
pub error: Option<ChaseError>,
}
/// The variant of chase algorithm to use.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ChaseVariant {
/// Standard chase: no trigger tracking, terminates only when no new facts.
/// May re-apply rules with the same bindings if heads are satisfied.
Standard,
/// Restricted chase: uses trigger tracking to avoid re-applying the same
/// rule with the same frontier variable bindings. This is the default.
#[default]
Restricted,
}
/// Configuration for the chase algorithm.
@ -24,11 +70,16 @@ pub struct ChaseResult {
pub struct ChaseConfig {
/// Maximum number of chase steps before giving up.
pub max_steps: usize,
/// The chase variant to use.
pub variant: ChaseVariant,
}
impl Default for ChaseConfig {
fn default() -> Self {
ChaseConfig { max_steps: 10_000 }
ChaseConfig {
max_steps: 10_000,
variant: ChaseVariant::default(),
}
}
}
@ -79,7 +130,7 @@ impl Trigger {
}
}
/// Run the standard chase algorithm.
/// Run the restricted chase algorithm (default).
///
/// The chase repeatedly applies rules to derive new facts until no more
/// facts can be derived (fixpoint) or a limit is reached.
@ -91,6 +142,22 @@ pub fn chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
chase_with_config(instance, rules, ChaseConfig::default())
}
/// Run the standard chase algorithm (no trigger tracking).
///
/// The standard chase applies rules whenever the body matches, without
/// tracking which rule applications have been performed. It terminates
/// only when no new facts are derived.
///
/// This is simpler but may be less efficient for rules with existentials,
/// as it doesn't prevent re-checking already-satisfied rule applications.
pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
let config = ChaseConfig {
variant: ChaseVariant::Standard,
..Default::default()
};
chase_with_config(instance, rules, config)
}
/// Run the chase with custom configuration.
pub fn chase_with_config(
mut instance: Instance,
@ -107,10 +174,16 @@ pub fn chase_with_config(
instance,
steps,
terminated: false,
error: None,
};
}
let new_facts = chase_step(&instance, rules, &mut null_gen, &mut applied_triggers);
let new_facts = match config.variant {
ChaseVariant::Standard => standard_chase_step(&instance, rules, &mut null_gen),
ChaseVariant::Restricted => {
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
}
};
if new_facts.is_empty() {
// Fixpoint reached
@ -118,6 +191,7 @@ pub fn chase_with_config(
instance,
steps,
terminated: true,
error: None,
};
}
@ -128,8 +202,40 @@ pub fn chase_with_config(
}
}
/// Perform a single chase step: find all applicable rule instances and derive new facts.
fn chase_step(
/// Perform a single standard chase step: apply rules without trigger tracking.
fn standard_chase_step(
instance: &Instance,
rules: &[Rule],
null_gen: &mut NullGenerator,
) -> Vec<Atom> {
let mut new_facts = Vec::new();
for rule in rules {
// Find all ways to match the rule body against the instance
let matches = find_body_matches(instance, &rule.body);
for subst in matches {
// In standard chase, we only check if head is satisfied
if head_is_satisfied(instance, rule, &subst) {
continue;
}
// Generate head atoms with this substitution
let derived = apply_rule_head(rule, &subst, null_gen);
for fact in derived {
if !instance.contains(&fact) {
new_facts.push(fact);
}
}
}
}
new_facts
}
/// Perform a single restricted chase step: use trigger tracking to avoid redundant applications.
fn restricted_chase_step(
instance: &Instance,
rules: &[Rule],
null_gen: &mut NullGenerator,
@ -186,8 +292,8 @@ fn find_body_matches(instance: &Instance, body: &[Atom]) -> Vec<Substitution> {
// Apply current substitution to the body atom
let pattern = subst.apply_atom(body_atom);
// Find all facts that match this pattern
for fact in instance.facts_for_predicate(&pattern.predicate) {
// Find all facts that match this pattern using indexed lookup
for fact in instance.facts_matching_pattern(&pattern) {
if let Some(new_subst) = unify_atom(&pattern, fact) {
// Combine substitutions
let mut combined = subst.clone();
@ -259,6 +365,145 @@ fn term_null_id(term: &Term) -> Option<usize> {
}
}
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct EgdTrigger {
egd_index: usize,
/// All body variable bindings for the EGD.
body_bindings: Vec<(String, Term)>,
}
impl EgdTrigger {
fn new(egd_index: usize, egd: &Egd, subst: &Substitution) -> Self {
let body_vars = egd.body_variables();
let mut bindings: Vec<_> = body_vars
.into_iter()
.filter_map(|v| subst.get(&v).map(|t| (v, t.clone())))
.collect();
bindings.sort_by(|a, b| a.0.cmp(&b.0));
EgdTrigger {
egd_index,
body_bindings: bindings,
}
}
}
/// Apply EGDs to the instance, returning any merge error that occurs.
fn apply_egds(
instance: &Instance,
egds: &[Egd],
uf: &mut UnionFind,
applied_egd_triggers: &mut HashSet<EgdTrigger>,
) -> Result<bool, MergeError> {
let mut made_changes = false;
for (egd_idx, egd) in egds.iter().enumerate() {
let matches = find_body_matches(instance, &egd.body);
for subst in matches {
let trigger = EgdTrigger::new(egd_idx, egd, &subst);
if applied_egd_triggers.contains(&trigger) {
continue;
}
applied_egd_triggers.insert(trigger);
// Get the terms that should be equal
let left = subst.apply_term(&egd.equality.left);
let right = subst.apply_term(&egd.equality.right);
// Merge them in the union-find
uf.merge(&left, &right)?;
made_changes = true;
}
}
Ok(made_changes)
}
/// Run chase with both TGDs and EGDs.
///
/// The chase alternates between:
/// 1. Applying TGDs to derive new facts
/// 2. Applying EGDs to enforce equalities
/// 3. Canonicalizing the instance to reflect merged nulls
///
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
chase_full(instance, tgds, egds, ChaseConfig::default())
}
/// Run the full chase with TGDs, EGDs, and custom configuration.
pub fn chase_full(
mut instance: Instance,
tgds: &[Rule],
egds: &[Egd],
config: ChaseConfig,
) -> ChaseResult {
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
let mut applied_triggers: HashSet<Trigger> = HashSet::new();
let mut applied_egd_triggers: HashSet<EgdTrigger> = HashSet::new();
let mut uf = UnionFind::new();
let mut steps = 0;
loop {
if steps >= config.max_steps {
return ChaseResult {
instance,
steps,
terminated: false,
error: None,
};
}
// Apply TGDs
let new_facts = match config.variant {
ChaseVariant::Standard => standard_chase_step(&instance, tgds, &mut null_gen),
ChaseVariant::Restricted => {
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
}
};
let tgd_changes = !new_facts.is_empty();
for fact in new_facts {
instance.add(fact);
}
// Apply EGDs
let egd_result = apply_egds(&instance, egds, &mut uf, &mut applied_egd_triggers);
match egd_result {
Err(e) => {
return ChaseResult {
instance,
steps,
terminated: false,
error: Some(ChaseError::EgdConflict(e)),
};
}
Ok(egd_changes) => {
// Canonicalize instance if EGDs made changes
if egd_changes {
instance = instance.canonicalize(&mut uf);
}
// Check for fixpoint
if !tgd_changes && !egd_changes {
return ChaseResult {
instance,
steps,
terminated: true,
error: None,
};
}
}
}
steps += 1;
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -399,4 +644,286 @@ mod tests {
assert!(result.terminated);
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
}
#[test]
fn test_standard_chase_basic() {
// Test standard chase with datalog rules
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Parent", vec![Term::var("X"), Term::var("Y")])
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
.build();
let result = standard_chase(instance, &[rule1, rule2]);
assert!(result.terminated);
let ancestors = result.instance.facts_for_predicate("Ancestor");
assert_eq!(ancestors.len(), 3);
}
#[test]
fn test_standard_and_restricted_reach_same_fixpoint() {
// For datalog rules (no existentials), both should reach the same result
let instance: Instance = vec![
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
]
.into_iter()
.collect();
let rule1 = RuleBuilder::new()
.when("Edge", vec![Term::var("X"), Term::var("Y")])
.then("Path", vec![Term::var("X"), Term::var("Y")])
.build();
let rule2 = RuleBuilder::new()
.when("Path", vec![Term::var("X"), Term::var("Y")])
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
.then("Path", vec![Term::var("X"), Term::var("Z")])
.build();
let rules = vec![rule1, rule2];
let standard_result = standard_chase(instance.clone(), &rules);
let restricted_result = chase(instance, &rules);
assert!(standard_result.terminated);
assert!(restricted_result.terminated);
// Both should derive the same paths
let standard_paths = standard_result.instance.facts_for_predicate("Path");
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
assert_eq!(standard_paths.len(), restricted_paths.len());
}
#[test]
fn test_standard_chase_with_existentials() {
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
.build();
let result = standard_chase(instance, &[rule]);
assert!(result.terminated);
let has_ssn = result.instance.facts_for_predicate("HasSSN");
assert_eq!(has_ssn.len(), 1);
assert!(matches!(has_ssn[0].terms[1], Term::Null(_)));
}
#[test]
fn test_chase_variant_config() {
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
.into_iter()
.collect();
let rule = RuleBuilder::new()
.when("A", vec![Term::var("X")])
.then("B", vec![Term::var("X")])
.build();
// Test standard variant via config
let standard_config = ChaseConfig {
variant: ChaseVariant::Standard,
..Default::default()
};
let standard_result = chase_with_config(instance.clone(), &[rule.clone()], standard_config);
// Test restricted variant via config
let restricted_config = ChaseConfig {
variant: ChaseVariant::Restricted,
..Default::default()
};
let restricted_result = chase_with_config(instance, &[rule], restricted_config);
assert!(standard_result.terminated);
assert!(restricted_result.terminated);
assert_eq!(
standard_result.instance.facts_for_predicate("B").len(),
restricted_result.instance.facts_for_predicate("B").len()
);
}
// EGD tests
use crate::chase::rule::EgdBuilder;
#[test]
fn test_egd_functional_dependency_merges_nulls() {
// Functional dependency: R(X, Y), R(X, Z) -> Y = Z
// This should merge the nulls created for the same X value
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
.into_iter()
.collect();
// TGD: Person(X) -> R(X, Y)
let tgd = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("R", vec![Term::var("X"), Term::var("Y")])
.build();
// First run without EGD to create two separate facts with nulls
// We simulate this by having two different TGDs create facts
let tgd2 = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("S", vec![Term::var("X"), Term::var("Z")])
.build();
// EGD: R(X, Y), S(X, Z) -> Y = Z
let egd = EgdBuilder::new()
.when("R", vec![Term::var("X"), Term::var("Y")])
.when("S", vec![Term::var("X"), Term::var("Z")])
.then_equal(Term::var("Y"), Term::var("Z"))
.build();
let result = chase_with_egds(instance, &[tgd, tgd2], &[egd]);
assert!(result.terminated);
assert!(result.error.is_none());
// After EGD application, the nulls should be merged
let r_facts = result.instance.facts_for_predicate("R");
let s_facts = result.instance.facts_for_predicate("S");
assert_eq!(r_facts.len(), 1);
assert_eq!(s_facts.len(), 1);
// The second argument of R and S should be the same (merged)
assert_eq!(r_facts[0].terms[1], s_facts[0].terms[1]);
}
#[test]
fn test_egd_conflict_different_constants() {
// EGD that tries to equate different constants should fail
let instance: Instance = vec![
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
Atom::new("R", vec![Term::constant("x"), Term::constant("bob")]),
]
.into_iter()
.collect();
// EGD: R(X, Y), R(X, Z) -> Y = Z
// This should fail because alice != bob
let egd = EgdBuilder::new()
.when("R", vec![Term::var("X"), Term::var("Y")])
.when("R", vec![Term::var("X"), Term::var("Z")])
.then_equal(Term::var("Y"), Term::var("Z"))
.build();
let result = chase_with_egds(instance, &[], &[egd]);
assert!(!result.terminated);
assert!(result.error.is_some());
assert!(matches!(result.error, Some(ChaseError::EgdConflict(_))));
}
#[test]
fn test_egd_null_merged_with_constant() {
// EGD that merges a null with a constant
let instance: Instance = vec![
Atom::new("R", vec![Term::constant("x"), Term::Null(0)]),
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
]
.into_iter()
.collect();
// EGD: R(X, Y), R(X, Z) -> Y = Z
let egd = EgdBuilder::new()
.when("R", vec![Term::var("X"), Term::var("Y")])
.when("R", vec![Term::var("X"), Term::var("Z")])
.then_equal(Term::var("Y"), Term::var("Z"))
.build();
let result = chase_with_egds(instance, &[], &[egd]);
assert!(result.terminated);
assert!(result.error.is_none());
// After canonicalization, there should be only one R fact
// with "alice" as the second argument
let r_facts = result.instance.facts_for_predicate("R");
assert_eq!(r_facts.len(), 1);
assert_eq!(r_facts[0].terms[1], Term::constant("alice"));
}
#[test]
fn test_tgd_and_egd_interaction() {
// TGD creates facts, EGD enforces constraints
let instance: Instance = vec![
Atom::new("Person", vec![Term::constant("alice")]),
Atom::new("Person", vec![Term::constant("bob")]),
]
.into_iter()
.collect();
// TGD: Person(X) -> HasManager(X, Y)
let tgd = RuleBuilder::new()
.when("Person", vec![Term::var("X")])
.then("HasManager", vec![Term::var("X"), Term::var("Y")])
.build();
// EGD: HasManager(X, Y), HasManager(Z, W) -> Y = W
// Everyone has the same manager
let egd = EgdBuilder::new()
.when("HasManager", vec![Term::var("X"), Term::var("Y")])
.when("HasManager", vec![Term::var("Z"), Term::var("W")])
.then_equal(Term::var("Y"), Term::var("W"))
.build();
let result = chase_with_egds(instance, &[tgd], &[egd]);
assert!(result.terminated);
assert!(result.error.is_none());
let manager_facts = result.instance.facts_for_predicate("HasManager");
assert_eq!(manager_facts.len(), 2);
// Both should have the same manager (second argument)
let managers: Vec<_> = manager_facts.iter().map(|f| &f.terms[1]).collect();
assert_eq!(managers[0], managers[1]);
}
#[test]
fn test_chase_full_with_custom_config() {
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
.into_iter()
.collect();
let tgd = RuleBuilder::new()
.when("A", vec![Term::var("X")])
.then("B", vec![Term::var("X"), Term::var("Y")])
.build();
let config = ChaseConfig {
max_steps: 100,
variant: ChaseVariant::Standard,
};
let result = chase_full(instance, &[tgd], &[], config);
assert!(result.terminated);
assert!(result.error.is_none());
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
}
}

View File

@ -5,6 +5,8 @@ use std::error::Error;
use std::fmt;
use super::atom::Atom;
use super::term::Term;
use super::union_find::UnionFind;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum InstanceError {
@ -23,10 +25,30 @@ impl fmt::Display for InstanceError {
impl Error for InstanceError {}
/// A key for argument-position indexing: (predicate, position, value).
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct IndexKey {
predicate: String,
position: usize,
value: Term,
}
impl IndexKey {
fn new(predicate: &str, position: usize, value: &Term) -> Self {
IndexKey {
predicate: predicate.to_string(),
position,
value: value.clone(),
}
}
}
/// A database instance containing ground atoms.
#[derive(Debug, Clone, Default)]
pub struct Instance {
facts_by_predicate: HashMap<String, HashSet<Atom>>,
/// Secondary index: maps (predicate, position, value) to atoms with that value at that position.
argument_index: HashMap<IndexKey, HashSet<Atom>>,
len: usize,
}
@ -35,6 +57,7 @@ impl Instance {
pub fn new() -> Self {
Instance {
facts_by_predicate: HashMap::new(),
argument_index: HashMap::new(),
len: 0,
}
}
@ -49,8 +72,16 @@ impl Instance {
.facts_by_predicate
.entry(fact.predicate.clone())
.or_default();
let inserted = bucket.insert(fact);
let inserted = bucket.insert(fact.clone());
if inserted {
// Populate argument indexes
for (position, term) in fact.terms.iter().enumerate() {
let key = IndexKey::new(&fact.predicate, position, term);
self.argument_index
.entry(key)
.or_default()
.insert(fact.clone());
}
self.len += 1;
}
@ -59,7 +90,9 @@ impl Instance {
/// Add a fact to the instance. Returns true if the fact was new.
///
/// # Panics
/// Panics if the atom is not ground.
#[allow(clippy::expect_used)]
pub fn add(&mut self, fact: Atom) -> bool {
self.try_add(fact).expect("facts must be ground atoms")
}
@ -96,6 +129,75 @@ impl Instance {
.flat_map(|facts| facts.iter())
.collect()
}
/// Get facts matching a pattern, using argument indexes for efficient lookup.
///
/// The pattern may contain variables (which match anything) and ground terms
/// (which must match exactly). This method uses a selectivity heuristic to
/// choose the most selective index for lookup.
pub fn facts_matching_pattern(&self, pattern: &Atom) -> Vec<&Atom> {
// Find bound (ground) positions and their selectivity
let bound_positions: Vec<(usize, &Term)> = pattern
.terms
.iter()
.enumerate()
.filter(|(_, term)| term.is_ground())
.collect();
if bound_positions.is_empty() {
// No bound arguments, fall back to predicate lookup
return self.facts_for_predicate(&pattern.predicate);
}
// Use selectivity heuristic: pick the bound position with the smallest bucket
let mut best_key: Option<IndexKey> = None;
let mut best_size = usize::MAX;
for (position, term) in &bound_positions {
let key = IndexKey::new(&pattern.predicate, *position, term);
let size = self.argument_index.get(&key).map(|s| s.len()).unwrap_or(0);
if size < best_size {
best_size = size;
best_key = Some(key);
}
}
// If best bucket is empty, return empty
let Some(key) = best_key else {
return Vec::new();
};
let Some(candidates) = self.argument_index.get(&key) else {
return Vec::new();
};
// Filter candidates by arity and all other bound positions
let pattern_arity = pattern.arity();
candidates
.iter()
.filter(|fact| {
fact.arity() == pattern_arity
&& bound_positions
.iter()
.all(|(pos, term)| &fact.terms[*pos] == *term)
})
.collect()
}
/// Create a new instance with all atoms canonicalized according to the union-find.
///
/// This replaces all nulls with their canonical representatives and removes
/// duplicate atoms that result from canonicalization.
pub fn canonicalize(&self, uf: &mut UnionFind) -> Instance {
let mut new_instance = Instance::new();
for atom in self.iter() {
let canonical = uf.canonicalize_atom(atom);
// try_add handles deduplication
let _ = new_instance.try_add(canonical);
}
new_instance
}
}
impl fmt::Display for Instance {
@ -171,4 +273,77 @@ mod tests {
assert!(matches!(error, InstanceError::NonGroundFact(_)));
assert!(instance.is_empty());
}
#[test]
fn test_facts_matching_pattern_with_bound_arg() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("carol")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
// Pattern with first argument bound to "alice"
let pattern = Atom::new("Parent", vec![Term::constant("alice"), Term::var("Y")]);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 2);
// Pattern with second argument bound to "carol"
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("carol")]);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 2);
// Pattern with both arguments bound
let pattern = Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 1);
// Pattern with no bound arguments (all variables)
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::var("Y")]);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 3);
// Pattern with non-matching bound argument
let pattern = Atom::new("Parent", vec![Term::constant("dave"), Term::var("Y")]);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 0);
}
#[test]
fn test_facts_matching_pattern_selectivity() {
// Create instance with skewed distribution
let mut instance = Instance::new();
// Many facts with first arg "common"
for i in 0..100 {
instance.add(Atom::new(
"R",
vec![Term::constant("common"), Term::constant(format!("v{}", i))],
));
}
// One fact with first arg "rare"
instance.add(Atom::new(
"R",
vec![Term::constant("rare"), Term::constant("x")],
));
// Pattern matching on "rare" should use more selective index
let pattern = Atom::new("R", vec![Term::constant("rare"), Term::var("Y")]);
let matches = instance.facts_matching_pattern(&pattern);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].terms[0], Term::constant("rare"));
}
}

View File

@ -5,12 +5,17 @@ pub mod instance;
pub mod rule;
pub mod substitution;
pub mod term;
pub mod union_find;
mod engine;
pub use atom::Atom;
pub use engine::{ChaseConfig, ChaseResult, chase, chase_with_config};
pub use engine::{
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
chase_with_egds, standard_chase,
};
pub use instance::{Instance, InstanceError};
pub use rule::Rule;
pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder};
pub use substitution::Substitution;
pub use term::Term;
pub use union_find::{MergeError, UnionFind};

View File

@ -121,6 +121,125 @@ impl Default for RuleBuilder {
}
}
/// An equality constraint between two terms.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Equality {
pub left: Term,
pub right: Term,
}
impl Equality {
/// Create a new equality constraint.
pub fn new(left: Term, right: Term) -> Self {
Equality { left, right }
}
/// Get all variables in this equality.
pub fn variables(&self) -> Vec<&String> {
let mut vars = Vec::new();
if let Term::Variable(v) = &self.left {
vars.push(v);
}
if let Term::Variable(v) = &self.right {
vars.push(v);
}
vars
}
}
impl fmt::Display for Equality {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} = {}", self.left, self.right)
}
}
/// An equality-generating dependency (EGD).
///
/// EGDs have the form: body -> X = Y
/// When the body matches, the equality X = Y is enforced.
#[derive(Debug, Clone)]
pub struct Egd {
/// The body atoms (conjunction).
pub body: Vec<Atom>,
/// The equality to enforce.
pub equality: Equality,
}
impl Egd {
/// Create a new EGD.
pub fn new(body: Vec<Atom>, equality: Equality) -> Self {
Egd { body, equality }
}
/// Get all variables appearing in the body.
pub fn body_variables(&self) -> HashSet<String> {
self.body
.iter()
.flat_map(|a| a.variables())
.cloned()
.collect()
}
/// Get all variables appearing in the equality.
pub fn equality_variables(&self) -> HashSet<String> {
self.equality.variables().into_iter().cloned().collect()
}
}
impl fmt::Display for Egd {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, atom) in self.body.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}", atom)?;
}
write!(f, " → {}", self.equality)
}
}
/// Builder for creating EGDs with a fluent API.
pub struct EgdBuilder {
body: Vec<Atom>,
equality: Option<Equality>,
}
impl EgdBuilder {
pub fn new() -> Self {
EgdBuilder {
body: Vec::new(),
equality: None,
}
}
/// Add an atom to the body.
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
self.body.push(Atom::new(predicate, terms));
self
}
/// Set the equality to enforce.
pub fn then_equal(mut self, left: Term, right: Term) -> Self {
self.equality = Some(Equality::new(left, right));
self
}
/// Build the EGD.
///
/// # Panics
/// Panics if `then_equal()` was not called.
#[allow(clippy::expect_used)]
pub fn build(self) -> Egd {
Egd::new(self.body, self.equality.expect("EGD must have an equality"))
}
}
impl Default for EgdBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -39,6 +39,16 @@ impl Term {
pub fn is_ground(&self) -> bool {
!self.is_variable()
}
/// Returns true if this term is a null.
pub fn is_null(&self) -> bool {
matches!(self, Term::Null(_))
}
/// Returns true if this term is a constant.
pub fn is_constant(&self) -> bool {
matches!(self, Term::Constant(_))
}
}
impl fmt::Display for Term {
@ -76,4 +86,20 @@ mod tests {
assert!(!v.is_ground());
assert!(v.is_variable());
}
#[test]
fn test_is_null_and_is_constant() {
let c = Term::constant("alice");
let n = Term::null(1);
let v = Term::var("X");
assert!(c.is_constant());
assert!(!c.is_null());
assert!(n.is_null());
assert!(!n.is_constant());
assert!(!v.is_constant());
assert!(!v.is_null());
}
}

323
src/chase/union_find.rs Normal file
View File

@ -0,0 +1,323 @@
//! Union-Find data structure for managing null equivalence classes in EGDs.
use std::collections::HashMap;
use std::error::Error;
use std::fmt;
use super::atom::Atom;
use super::term::Term;
/// Error that occurs when merging two different constants.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MergeError {
pub left: Term,
pub right: Term,
}
impl fmt::Display for MergeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"cannot merge different constants: {} and {}",
self.left, self.right
)
}
}
impl Error for MergeError {}
/// Union-Find data structure for managing equivalence classes of terms.
///
/// Used by EGDs to track which nulls have been merged and what their
/// canonical representatives are. Constants are always their own canonical
/// form, and merging a null with a constant makes the constant the canonical
/// form for that equivalence class.
#[derive(Debug, Clone, Default)]
pub struct UnionFind {
/// Maps null IDs to their parent in the union-find tree.
parent: HashMap<usize, usize>,
/// Rank for union-by-rank optimization.
rank: HashMap<usize, usize>,
/// Maps null IDs to their canonical term (constant if merged with one).
canonical: HashMap<usize, Term>,
}
impl UnionFind {
/// Create a new empty union-find structure.
pub fn new() -> Self {
Self::default()
}
/// Find the representative for a null ID, with path compression.
pub fn find(&mut self, null_id: usize) -> usize {
// Ensure the null exists in our structure and get the parent
let parent = *self.parent.entry(null_id).or_insert_with(|| {
self.rank.insert(null_id, 0);
null_id
});
if parent != null_id {
// Path compression: make this node point directly to the root
let root = self.find(parent);
self.parent.insert(null_id, root);
root
} else {
null_id
}
}
/// Get the canonical term for a given term.
///
/// - Constants are always their own canonical form.
/// - Variables are always their own canonical form (shouldn't appear in instances).
/// - Nulls are mapped to their canonical representative (which may be a constant
/// if they were merged with one).
pub fn canonical_term(&mut self, term: &Term) -> Term {
match term {
Term::Constant(_) | Term::Variable(_) => term.clone(),
Term::Null(id) => {
let root = self.find(*id);
// Check if this equivalence class has a canonical constant
if let Some(canonical) = self.canonical.get(&root) {
canonical.clone()
} else {
Term::Null(root)
}
}
}
}
/// Canonicalize an atom by replacing all terms with their canonical forms.
pub fn canonicalize_atom(&mut self, atom: &Atom) -> Atom {
Atom::new(
atom.predicate.clone(),
atom.terms.iter().map(|t| self.canonical_term(t)).collect(),
)
}
/// Merge two terms, returning an error if they are different constants.
///
/// The merge rules are:
/// - Null + Null: union the equivalence classes
/// - Null + Constant: the constant becomes the canonical form for the null's class
/// - Constant + Constant: error if different, no-op if same
pub fn merge(&mut self, left: &Term, right: &Term) -> Result<(), MergeError> {
match (left, right) {
(Term::Constant(c1), Term::Constant(c2)) => {
if c1 != c2 {
Err(MergeError {
left: left.clone(),
right: right.clone(),
})
} else {
Ok(())
}
}
(Term::Null(id), Term::Constant(_)) => {
let root = self.find(*id);
// Check if this class already has a canonical constant
if let Some(existing) = self.canonical.get(&root) {
if existing != right {
return Err(MergeError {
left: existing.clone(),
right: right.clone(),
});
}
} else {
self.canonical.insert(root, right.clone());
}
Ok(())
}
(Term::Constant(_), Term::Null(id)) => {
let root = self.find(*id);
if let Some(existing) = self.canonical.get(&root) {
if existing != left {
return Err(MergeError {
left: left.clone(),
right: existing.clone(),
});
}
} else {
self.canonical.insert(root, left.clone());
}
Ok(())
}
(Term::Null(id1), Term::Null(id2)) => {
let root1 = self.find(*id1);
let root2 = self.find(*id2);
if root1 == root2 {
return Ok(()); // Already in the same class
}
// Check if both have canonical constants
let canon1 = self.canonical.get(&root1).cloned();
let canon2 = self.canonical.get(&root2).cloned();
match (&canon1, &canon2) {
(Some(c1), Some(c2)) if c1 != c2 => {
return Err(MergeError {
left: c1.clone(),
right: c2.clone(),
});
}
_ => {}
}
// Union by rank
let rank1 = *self.rank.get(&root1).unwrap_or(&0);
let rank2 = *self.rank.get(&root2).unwrap_or(&0);
let (new_root, child) = if rank1 < rank2 {
(root2, root1)
} else if rank1 > rank2 {
(root1, root2)
} else {
self.rank.insert(root1, rank1 + 1);
(root1, root2)
};
self.parent.insert(child, new_root);
// Propagate canonical constant to new root
let canonical = canon1.or(canon2);
if let Some(c) = canonical {
self.canonical.insert(new_root, c);
}
Ok(())
}
// Variables shouldn't be merged (they shouldn't appear in instances)
(Term::Variable(_), _) | (_, Term::Variable(_)) => Ok(()),
}
}
/// Check if two terms are in the same equivalence class.
pub fn are_equivalent(&mut self, left: &Term, right: &Term) -> bool {
self.canonical_term(left) == self.canonical_term(right)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_merge_nulls() {
let mut uf = UnionFind::new();
// Merge two nulls
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
// They should now have the same canonical form
let canon0 = uf.canonical_term(&Term::Null(0));
let canon1 = uf.canonical_term(&Term::Null(1));
assert_eq!(canon0, canon1);
}
#[test]
fn test_merge_null_with_constant() {
let mut uf = UnionFind::new();
// Merge null with constant
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
// Null should now have the constant as canonical form
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("alice"));
}
#[test]
fn test_merge_transitive() {
let mut uf = UnionFind::new();
// Merge: null0 = null1, null1 = constant
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
// Both nulls should have the constant as canonical
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("bob"));
assert_eq!(uf.canonical_term(&Term::Null(1)), Term::constant("bob"));
}
#[test]
fn test_merge_different_constants_fails() {
let mut uf = UnionFind::new();
// Merging different constants should fail
let result = uf.merge(&Term::constant("alice"), &Term::constant("bob"));
assert!(result.is_err());
let err = result.unwrap_err();
assert_eq!(err.left, Term::constant("alice"));
assert_eq!(err.right, Term::constant("bob"));
}
#[test]
fn test_merge_same_constant_ok() {
let mut uf = UnionFind::new();
// Merging the same constant should be fine
uf.merge(&Term::constant("alice"), &Term::constant("alice"))
.unwrap();
}
#[test]
fn test_merge_nulls_with_conflicting_constants() {
let mut uf = UnionFind::new();
// null0 = alice, null1 = bob
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
// Merging null0 and null1 should fail (alice != bob)
let result = uf.merge(&Term::Null(0), &Term::Null(1));
assert!(result.is_err());
}
#[test]
fn test_canonicalize_atom() {
let mut uf = UnionFind::new();
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
let atom = Atom::new("R", vec![Term::Null(0), Term::Null(1), Term::constant("x")]);
let canonical = uf.canonicalize_atom(&atom);
assert_eq!(canonical.terms[0], Term::constant("alice"));
// Null(1) and Null(2) are equivalent but have no constant, so canonical is one of them
assert!(canonical.terms[1].is_null());
assert_eq!(canonical.terms[2], Term::constant("x"));
}
#[test]
fn test_path_compression() {
let mut uf = UnionFind::new();
// Create a chain: 0 -> 1 -> 2 -> 3
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
uf.merge(&Term::Null(2), &Term::Null(3)).unwrap();
// Finding from 0 should compress the path
let root = uf.find(0);
// All should point to the same root
assert_eq!(uf.find(1), root);
assert_eq!(uf.find(2), root);
assert_eq!(uf.find(3), root);
}
#[test]
fn test_are_equivalent() {
let mut uf = UnionFind::new();
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
assert!(uf.are_equivalent(&Term::Null(0), &Term::Null(1)));
assert!(!uf.are_equivalent(&Term::Null(0), &Term::Null(2)));
assert!(uf.are_equivalent(&Term::constant("a"), &Term::constant("a")));
assert!(!uf.are_equivalent(&Term::constant("a"), &Term::constant("b")));
}
}

View File

@ -86,6 +86,7 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
instance,
steps,
terminated: false,
error: None,
},
provenance,
};
@ -105,6 +106,7 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
instance,
steps,
terminated: true,
error: None,
},
provenance,
};

View File

@ -3,6 +3,7 @@ pub mod frontend;
// Re-export main types for convenience
pub use chase::{
Atom, ChaseConfig, ChaseResult, Instance, InstanceError, Rule, Substitution, Term, chase,
chase_with_config,
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Egd, EgdBuilder, Equality, Instance,
InstanceError, MergeError, Rule, RuleBuilder, Substitution, Term, UnionFind, chase, chase_full,
chase_with_config, chase_with_egds, standard_chase,
};