Compare commits
No commits in common. "ee21bee0391d931d35b44b93a5263967b324d9eb" and "fc66b8e39303d68859b491706296a00e77d39b90" have entirely different histories.
ee21bee039
...
fc66b8e393
@ -1,13 +1,14 @@
|
||||
## Chase-rs
|
||||
|
||||
An implementation of the chase algorithm in Rust for reasoning engines.
|
||||
An implementation of the chase algorithm in Rust for advanced reasoning engines.
|
||||
|
||||
### Overview
|
||||
|
||||
The chase algorithm is a technique in context of database theory and knowledge representation used for:
|
||||
The chase algorithm is a fundamental technique in context of database theory and knowledge representation used for:
|
||||
|
||||
- Query answering under tuple-generating dependencies (TGDs)
|
||||
- Computing universal models
|
||||
- Ontology-based data access (OBDA)
|
||||
- Datalog with existential rules
|
||||
|
||||
This implementation provides a restricted-chase style materialization with active-trigger checks.
|
||||
|
||||
@ -1,44 +1,12 @@
|
||||
//! Core chase algorithm implementation.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use super::atom::Atom;
|
||||
use super::instance::Instance;
|
||||
use super::rule::{Egd, Rule};
|
||||
use super::rule::Rule;
|
||||
use super::substitution::{Substitution, unify_atom};
|
||||
use super::term::Term;
|
||||
use super::union_find::{MergeError, UnionFind};
|
||||
|
||||
/// Error that can occur during chase with EGDs.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ChaseError {
|
||||
/// EGD tried to equate two different constants.
|
||||
EgdConflict(MergeError),
|
||||
}
|
||||
|
||||
impl fmt::Display for ChaseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for ChaseError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
ChaseError::EgdConflict(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MergeError> for ChaseError {
|
||||
fn from(e: MergeError) -> Self {
|
||||
ChaseError::EgdConflict(e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of running the chase algorithm.
|
||||
#[derive(Debug)]
|
||||
@ -49,20 +17,6 @@ pub struct ChaseResult {
|
||||
pub steps: usize,
|
||||
/// Whether the chase terminated (vs hitting a limit).
|
||||
pub terminated: bool,
|
||||
/// Error that occurred during chase (e.g., EGD conflict).
|
||||
pub error: Option<ChaseError>,
|
||||
}
|
||||
|
||||
/// The variant of chase algorithm to use.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
pub enum ChaseVariant {
|
||||
/// Standard chase: no trigger tracking, terminates only when no new facts.
|
||||
/// May re-apply rules with the same bindings if heads are satisfied.
|
||||
Standard,
|
||||
/// Restricted chase: uses trigger tracking to avoid re-applying the same
|
||||
/// rule with the same frontier variable bindings. This is the default.
|
||||
#[default]
|
||||
Restricted,
|
||||
}
|
||||
|
||||
/// Configuration for the chase algorithm.
|
||||
@ -70,16 +24,11 @@ pub enum ChaseVariant {
|
||||
pub struct ChaseConfig {
|
||||
/// Maximum number of chase steps before giving up.
|
||||
pub max_steps: usize,
|
||||
/// The chase variant to use.
|
||||
pub variant: ChaseVariant,
|
||||
}
|
||||
|
||||
impl Default for ChaseConfig {
|
||||
fn default() -> Self {
|
||||
ChaseConfig {
|
||||
max_steps: 10_000,
|
||||
variant: ChaseVariant::default(),
|
||||
}
|
||||
ChaseConfig { max_steps: 10_000 }
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,7 +79,7 @@ impl Trigger {
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the restricted chase algorithm (default).
|
||||
/// Run the standard chase algorithm.
|
||||
///
|
||||
/// The chase repeatedly applies rules to derive new facts until no more
|
||||
/// facts can be derived (fixpoint) or a limit is reached.
|
||||
@ -142,22 +91,6 @@ pub fn chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||
chase_with_config(instance, rules, ChaseConfig::default())
|
||||
}
|
||||
|
||||
/// Run the standard chase algorithm (no trigger tracking).
|
||||
///
|
||||
/// The standard chase applies rules whenever the body matches, without
|
||||
/// tracking which rule applications have been performed. It terminates
|
||||
/// only when no new facts are derived.
|
||||
///
|
||||
/// This is simpler but may be less efficient for rules with existentials,
|
||||
/// as it doesn't prevent re-checking already-satisfied rule applications.
|
||||
pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||
let config = ChaseConfig {
|
||||
variant: ChaseVariant::Standard,
|
||||
..Default::default()
|
||||
};
|
||||
chase_with_config(instance, rules, config)
|
||||
}
|
||||
|
||||
/// Run the chase with custom configuration.
|
||||
pub fn chase_with_config(
|
||||
mut instance: Instance,
|
||||
@ -174,16 +107,10 @@ pub fn chase_with_config(
|
||||
instance,
|
||||
steps,
|
||||
terminated: false,
|
||||
error: None,
|
||||
};
|
||||
}
|
||||
|
||||
let new_facts = match config.variant {
|
||||
ChaseVariant::Standard => standard_chase_step(&instance, rules, &mut null_gen),
|
||||
ChaseVariant::Restricted => {
|
||||
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
|
||||
}
|
||||
};
|
||||
let new_facts = chase_step(&instance, rules, &mut null_gen, &mut applied_triggers);
|
||||
|
||||
if new_facts.is_empty() {
|
||||
// Fixpoint reached
|
||||
@ -191,7 +118,6 @@ pub fn chase_with_config(
|
||||
instance,
|
||||
steps,
|
||||
terminated: true,
|
||||
error: None,
|
||||
};
|
||||
}
|
||||
|
||||
@ -202,40 +128,8 @@ pub fn chase_with_config(
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a single standard chase step: apply rules without trigger tracking.
|
||||
fn standard_chase_step(
|
||||
instance: &Instance,
|
||||
rules: &[Rule],
|
||||
null_gen: &mut NullGenerator,
|
||||
) -> Vec<Atom> {
|
||||
let mut new_facts = Vec::new();
|
||||
|
||||
for rule in rules {
|
||||
// Find all ways to match the rule body against the instance
|
||||
let matches = find_body_matches(instance, &rule.body);
|
||||
|
||||
for subst in matches {
|
||||
// In standard chase, we only check if head is satisfied
|
||||
if head_is_satisfied(instance, rule, &subst) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate head atoms with this substitution
|
||||
let derived = apply_rule_head(rule, &subst, null_gen);
|
||||
|
||||
for fact in derived {
|
||||
if !instance.contains(&fact) {
|
||||
new_facts.push(fact);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_facts
|
||||
}
|
||||
|
||||
/// Perform a single restricted chase step: use trigger tracking to avoid redundant applications.
|
||||
fn restricted_chase_step(
|
||||
/// Perform a single chase step: find all applicable rule instances and derive new facts.
|
||||
fn chase_step(
|
||||
instance: &Instance,
|
||||
rules: &[Rule],
|
||||
null_gen: &mut NullGenerator,
|
||||
@ -292,8 +186,8 @@ fn find_body_matches(instance: &Instance, body: &[Atom]) -> Vec<Substitution> {
|
||||
// Apply current substitution to the body atom
|
||||
let pattern = subst.apply_atom(body_atom);
|
||||
|
||||
// Find all facts that match this pattern using indexed lookup
|
||||
for fact in instance.facts_matching_pattern(&pattern) {
|
||||
// Find all facts that match this pattern
|
||||
for fact in instance.facts_for_predicate(&pattern.predicate) {
|
||||
if let Some(new_subst) = unify_atom(&pattern, fact) {
|
||||
// Combine substitutions
|
||||
let mut combined = subst.clone();
|
||||
@ -365,145 +259,6 @@ fn term_null_id(term: &Term) -> Option<usize> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct EgdTrigger {
|
||||
egd_index: usize,
|
||||
/// All body variable bindings for the EGD.
|
||||
body_bindings: Vec<(String, Term)>,
|
||||
}
|
||||
|
||||
impl EgdTrigger {
|
||||
fn new(egd_index: usize, egd: &Egd, subst: &Substitution) -> Self {
|
||||
let body_vars = egd.body_variables();
|
||||
let mut bindings: Vec<_> = body_vars
|
||||
.into_iter()
|
||||
.filter_map(|v| subst.get(&v).map(|t| (v, t.clone())))
|
||||
.collect();
|
||||
bindings.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
EgdTrigger {
|
||||
egd_index,
|
||||
body_bindings: bindings,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply EGDs to the instance, returning any merge error that occurs.
|
||||
fn apply_egds(
|
||||
instance: &Instance,
|
||||
egds: &[Egd],
|
||||
uf: &mut UnionFind,
|
||||
applied_egd_triggers: &mut HashSet<EgdTrigger>,
|
||||
) -> Result<bool, MergeError> {
|
||||
let mut made_changes = false;
|
||||
|
||||
for (egd_idx, egd) in egds.iter().enumerate() {
|
||||
let matches = find_body_matches(instance, &egd.body);
|
||||
|
||||
for subst in matches {
|
||||
let trigger = EgdTrigger::new(egd_idx, egd, &subst);
|
||||
|
||||
if applied_egd_triggers.contains(&trigger) {
|
||||
continue;
|
||||
}
|
||||
applied_egd_triggers.insert(trigger);
|
||||
|
||||
// Get the terms that should be equal
|
||||
let left = subst.apply_term(&egd.equality.left);
|
||||
let right = subst.apply_term(&egd.equality.right);
|
||||
|
||||
// Merge them in the union-find
|
||||
uf.merge(&left, &right)?;
|
||||
made_changes = true;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(made_changes)
|
||||
}
|
||||
|
||||
/// Run chase with both TGDs and EGDs.
|
||||
///
|
||||
/// The chase alternates between:
|
||||
/// 1. Applying TGDs to derive new facts
|
||||
/// 2. Applying EGDs to enforce equalities
|
||||
/// 3. Canonicalizing the instance to reflect merged nulls
|
||||
///
|
||||
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
|
||||
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
|
||||
chase_full(instance, tgds, egds, ChaseConfig::default())
|
||||
}
|
||||
|
||||
/// Run the full chase with TGDs, EGDs, and custom configuration.
|
||||
pub fn chase_full(
|
||||
mut instance: Instance,
|
||||
tgds: &[Rule],
|
||||
egds: &[Egd],
|
||||
config: ChaseConfig,
|
||||
) -> ChaseResult {
|
||||
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
|
||||
let mut applied_triggers: HashSet<Trigger> = HashSet::new();
|
||||
let mut applied_egd_triggers: HashSet<EgdTrigger> = HashSet::new();
|
||||
let mut uf = UnionFind::new();
|
||||
let mut steps = 0;
|
||||
|
||||
loop {
|
||||
if steps >= config.max_steps {
|
||||
return ChaseResult {
|
||||
instance,
|
||||
steps,
|
||||
terminated: false,
|
||||
error: None,
|
||||
};
|
||||
}
|
||||
|
||||
// Apply TGDs
|
||||
let new_facts = match config.variant {
|
||||
ChaseVariant::Standard => standard_chase_step(&instance, tgds, &mut null_gen),
|
||||
ChaseVariant::Restricted => {
|
||||
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
|
||||
}
|
||||
};
|
||||
|
||||
let tgd_changes = !new_facts.is_empty();
|
||||
for fact in new_facts {
|
||||
instance.add(fact);
|
||||
}
|
||||
|
||||
// Apply EGDs
|
||||
let egd_result = apply_egds(&instance, egds, &mut uf, &mut applied_egd_triggers);
|
||||
|
||||
match egd_result {
|
||||
Err(e) => {
|
||||
return ChaseResult {
|
||||
instance,
|
||||
steps,
|
||||
terminated: false,
|
||||
error: Some(ChaseError::EgdConflict(e)),
|
||||
};
|
||||
}
|
||||
Ok(egd_changes) => {
|
||||
// Canonicalize instance if EGDs made changes
|
||||
if egd_changes {
|
||||
instance = instance.canonicalize(&mut uf);
|
||||
}
|
||||
|
||||
// Check for fixpoint
|
||||
if !tgd_changes && !egd_changes {
|
||||
return ChaseResult {
|
||||
instance,
|
||||
steps,
|
||||
terminated: true,
|
||||
error: None,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
steps += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -644,286 +399,4 @@ mod tests {
|
||||
assert!(result.terminated);
|
||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_standard_chase_basic() {
|
||||
// Test standard chase with datalog rules
|
||||
let instance: Instance = vec![
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("bob")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("bob"), Term::constant("carol")],
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule1 = RuleBuilder::new()
|
||||
.when("Parent", vec![Term::var("X"), Term::var("Y")])
|
||||
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let rule2 = RuleBuilder::new()
|
||||
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
|
||||
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
|
||||
.build();
|
||||
|
||||
let result = standard_chase(instance, &[rule1, rule2]);
|
||||
|
||||
assert!(result.terminated);
|
||||
let ancestors = result.instance.facts_for_predicate("Ancestor");
|
||||
assert_eq!(ancestors.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_standard_and_restricted_reach_same_fixpoint() {
|
||||
// For datalog rules (no existentials), both should reach the same result
|
||||
let instance: Instance = vec![
|
||||
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
|
||||
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
|
||||
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule1 = RuleBuilder::new()
|
||||
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let rule2 = RuleBuilder::new()
|
||||
.when("Path", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
|
||||
.then("Path", vec![Term::var("X"), Term::var("Z")])
|
||||
.build();
|
||||
|
||||
let rules = vec![rule1, rule2];
|
||||
|
||||
let standard_result = standard_chase(instance.clone(), &rules);
|
||||
let restricted_result = chase(instance, &rules);
|
||||
|
||||
assert!(standard_result.terminated);
|
||||
assert!(restricted_result.terminated);
|
||||
|
||||
// Both should derive the same paths
|
||||
let standard_paths = standard_result.instance.facts_for_predicate("Path");
|
||||
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
|
||||
assert_eq!(standard_paths.len(), restricted_paths.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_standard_chase_with_existentials() {
|
||||
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule = RuleBuilder::new()
|
||||
.when("Person", vec![Term::var("X")])
|
||||
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let result = standard_chase(instance, &[rule]);
|
||||
|
||||
assert!(result.terminated);
|
||||
let has_ssn = result.instance.facts_for_predicate("HasSSN");
|
||||
assert_eq!(has_ssn.len(), 1);
|
||||
assert!(matches!(has_ssn[0].terms[1], Term::Null(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chase_variant_config() {
|
||||
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let rule = RuleBuilder::new()
|
||||
.when("A", vec![Term::var("X")])
|
||||
.then("B", vec![Term::var("X")])
|
||||
.build();
|
||||
|
||||
// Test standard variant via config
|
||||
let standard_config = ChaseConfig {
|
||||
variant: ChaseVariant::Standard,
|
||||
..Default::default()
|
||||
};
|
||||
let standard_result = chase_with_config(instance.clone(), &[rule.clone()], standard_config);
|
||||
|
||||
// Test restricted variant via config
|
||||
let restricted_config = ChaseConfig {
|
||||
variant: ChaseVariant::Restricted,
|
||||
..Default::default()
|
||||
};
|
||||
let restricted_result = chase_with_config(instance, &[rule], restricted_config);
|
||||
|
||||
assert!(standard_result.terminated);
|
||||
assert!(restricted_result.terminated);
|
||||
assert_eq!(
|
||||
standard_result.instance.facts_for_predicate("B").len(),
|
||||
restricted_result.instance.facts_for_predicate("B").len()
|
||||
);
|
||||
}
|
||||
|
||||
// EGD tests
|
||||
use crate::chase::rule::EgdBuilder;
|
||||
|
||||
#[test]
|
||||
fn test_egd_functional_dependency_merges_nulls() {
|
||||
// Functional dependency: R(X, Y), R(X, Z) -> Y = Z
|
||||
// This should merge the nulls created for the same X value
|
||||
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// TGD: Person(X) -> R(X, Y)
|
||||
let tgd = RuleBuilder::new()
|
||||
.when("Person", vec![Term::var("X")])
|
||||
.then("R", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
// First run without EGD to create two separate facts with nulls
|
||||
// We simulate this by having two different TGDs create facts
|
||||
let tgd2 = RuleBuilder::new()
|
||||
.when("Person", vec![Term::var("X")])
|
||||
.then("S", vec![Term::var("X"), Term::var("Z")])
|
||||
.build();
|
||||
|
||||
// EGD: R(X, Y), S(X, Z) -> Y = Z
|
||||
let egd = EgdBuilder::new()
|
||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("S", vec![Term::var("X"), Term::var("Z")])
|
||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||
.build();
|
||||
|
||||
let result = chase_with_egds(instance, &[tgd, tgd2], &[egd]);
|
||||
|
||||
assert!(result.terminated);
|
||||
assert!(result.error.is_none());
|
||||
|
||||
// After EGD application, the nulls should be merged
|
||||
let r_facts = result.instance.facts_for_predicate("R");
|
||||
let s_facts = result.instance.facts_for_predicate("S");
|
||||
assert_eq!(r_facts.len(), 1);
|
||||
assert_eq!(s_facts.len(), 1);
|
||||
|
||||
// The second argument of R and S should be the same (merged)
|
||||
assert_eq!(r_facts[0].terms[1], s_facts[0].terms[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_egd_conflict_different_constants() {
|
||||
// EGD that tries to equate different constants should fail
|
||||
let instance: Instance = vec![
|
||||
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
||||
Atom::new("R", vec![Term::constant("x"), Term::constant("bob")]),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
||||
// This should fail because alice != bob
|
||||
let egd = EgdBuilder::new()
|
||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("R", vec![Term::var("X"), Term::var("Z")])
|
||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||
.build();
|
||||
|
||||
let result = chase_with_egds(instance, &[], &[egd]);
|
||||
|
||||
assert!(!result.terminated);
|
||||
assert!(result.error.is_some());
|
||||
assert!(matches!(result.error, Some(ChaseError::EgdConflict(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_egd_null_merged_with_constant() {
|
||||
// EGD that merges a null with a constant
|
||||
let instance: Instance = vec![
|
||||
Atom::new("R", vec![Term::constant("x"), Term::Null(0)]),
|
||||
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
||||
let egd = EgdBuilder::new()
|
||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("R", vec![Term::var("X"), Term::var("Z")])
|
||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||
.build();
|
||||
|
||||
let result = chase_with_egds(instance, &[], &[egd]);
|
||||
|
||||
assert!(result.terminated);
|
||||
assert!(result.error.is_none());
|
||||
|
||||
// After canonicalization, there should be only one R fact
|
||||
// with "alice" as the second argument
|
||||
let r_facts = result.instance.facts_for_predicate("R");
|
||||
assert_eq!(r_facts.len(), 1);
|
||||
assert_eq!(r_facts[0].terms[1], Term::constant("alice"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tgd_and_egd_interaction() {
|
||||
// TGD creates facts, EGD enforces constraints
|
||||
let instance: Instance = vec![
|
||||
Atom::new("Person", vec![Term::constant("alice")]),
|
||||
Atom::new("Person", vec![Term::constant("bob")]),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// TGD: Person(X) -> HasManager(X, Y)
|
||||
let tgd = RuleBuilder::new()
|
||||
.when("Person", vec![Term::var("X")])
|
||||
.then("HasManager", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
// EGD: HasManager(X, Y), HasManager(Z, W) -> Y = W
|
||||
// Everyone has the same manager
|
||||
let egd = EgdBuilder::new()
|
||||
.when("HasManager", vec![Term::var("X"), Term::var("Y")])
|
||||
.when("HasManager", vec![Term::var("Z"), Term::var("W")])
|
||||
.then_equal(Term::var("Y"), Term::var("W"))
|
||||
.build();
|
||||
|
||||
let result = chase_with_egds(instance, &[tgd], &[egd]);
|
||||
|
||||
assert!(result.terminated);
|
||||
assert!(result.error.is_none());
|
||||
|
||||
let manager_facts = result.instance.facts_for_predicate("HasManager");
|
||||
assert_eq!(manager_facts.len(), 2);
|
||||
|
||||
// Both should have the same manager (second argument)
|
||||
let managers: Vec<_> = manager_facts.iter().map(|f| &f.terms[1]).collect();
|
||||
assert_eq!(managers[0], managers[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chase_full_with_custom_config() {
|
||||
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let tgd = RuleBuilder::new()
|
||||
.when("A", vec![Term::var("X")])
|
||||
.then("B", vec![Term::var("X"), Term::var("Y")])
|
||||
.build();
|
||||
|
||||
let config = ChaseConfig {
|
||||
max_steps: 100,
|
||||
variant: ChaseVariant::Standard,
|
||||
};
|
||||
|
||||
let result = chase_full(instance, &[tgd], &[], config);
|
||||
|
||||
assert!(result.terminated);
|
||||
assert!(result.error.is_none());
|
||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,8 +5,6 @@ use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use super::atom::Atom;
|
||||
use super::term::Term;
|
||||
use super::union_find::UnionFind;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum InstanceError {
|
||||
@ -25,30 +23,10 @@ impl fmt::Display for InstanceError {
|
||||
|
||||
impl Error for InstanceError {}
|
||||
|
||||
/// A key for argument-position indexing: (predicate, position, value).
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct IndexKey {
|
||||
predicate: String,
|
||||
position: usize,
|
||||
value: Term,
|
||||
}
|
||||
|
||||
impl IndexKey {
|
||||
fn new(predicate: &str, position: usize, value: &Term) -> Self {
|
||||
IndexKey {
|
||||
predicate: predicate.to_string(),
|
||||
position,
|
||||
value: value.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A database instance containing ground atoms.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct Instance {
|
||||
facts_by_predicate: HashMap<String, HashSet<Atom>>,
|
||||
/// Secondary index: maps (predicate, position, value) to atoms with that value at that position.
|
||||
argument_index: HashMap<IndexKey, HashSet<Atom>>,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
@ -57,7 +35,6 @@ impl Instance {
|
||||
pub fn new() -> Self {
|
||||
Instance {
|
||||
facts_by_predicate: HashMap::new(),
|
||||
argument_index: HashMap::new(),
|
||||
len: 0,
|
||||
}
|
||||
}
|
||||
@ -72,16 +49,8 @@ impl Instance {
|
||||
.facts_by_predicate
|
||||
.entry(fact.predicate.clone())
|
||||
.or_default();
|
||||
let inserted = bucket.insert(fact.clone());
|
||||
let inserted = bucket.insert(fact);
|
||||
if inserted {
|
||||
// Populate argument indexes
|
||||
for (position, term) in fact.terms.iter().enumerate() {
|
||||
let key = IndexKey::new(&fact.predicate, position, term);
|
||||
self.argument_index
|
||||
.entry(key)
|
||||
.or_default()
|
||||
.insert(fact.clone());
|
||||
}
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
@ -90,9 +59,7 @@ impl Instance {
|
||||
|
||||
/// Add a fact to the instance. Returns true if the fact was new.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the atom is not ground.
|
||||
#[allow(clippy::expect_used)]
|
||||
pub fn add(&mut self, fact: Atom) -> bool {
|
||||
self.try_add(fact).expect("facts must be ground atoms")
|
||||
}
|
||||
@ -129,75 +96,6 @@ impl Instance {
|
||||
.flat_map(|facts| facts.iter())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get facts matching a pattern, using argument indexes for efficient lookup.
|
||||
///
|
||||
/// The pattern may contain variables (which match anything) and ground terms
|
||||
/// (which must match exactly). This method uses a selectivity heuristic to
|
||||
/// choose the most selective index for lookup.
|
||||
pub fn facts_matching_pattern(&self, pattern: &Atom) -> Vec<&Atom> {
|
||||
// Find bound (ground) positions and their selectivity
|
||||
let bound_positions: Vec<(usize, &Term)> = pattern
|
||||
.terms
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, term)| term.is_ground())
|
||||
.collect();
|
||||
|
||||
if bound_positions.is_empty() {
|
||||
// No bound arguments, fall back to predicate lookup
|
||||
return self.facts_for_predicate(&pattern.predicate);
|
||||
}
|
||||
|
||||
// Use selectivity heuristic: pick the bound position with the smallest bucket
|
||||
let mut best_key: Option<IndexKey> = None;
|
||||
let mut best_size = usize::MAX;
|
||||
|
||||
for (position, term) in &bound_positions {
|
||||
let key = IndexKey::new(&pattern.predicate, *position, term);
|
||||
let size = self.argument_index.get(&key).map(|s| s.len()).unwrap_or(0);
|
||||
|
||||
if size < best_size {
|
||||
best_size = size;
|
||||
best_key = Some(key);
|
||||
}
|
||||
}
|
||||
|
||||
// If best bucket is empty, return empty
|
||||
let Some(key) = best_key else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let Some(candidates) = self.argument_index.get(&key) else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
// Filter candidates by arity and all other bound positions
|
||||
let pattern_arity = pattern.arity();
|
||||
candidates
|
||||
.iter()
|
||||
.filter(|fact| {
|
||||
fact.arity() == pattern_arity
|
||||
&& bound_positions
|
||||
.iter()
|
||||
.all(|(pos, term)| &fact.terms[*pos] == *term)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a new instance with all atoms canonicalized according to the union-find.
|
||||
///
|
||||
/// This replaces all nulls with their canonical representatives and removes
|
||||
/// duplicate atoms that result from canonicalization.
|
||||
pub fn canonicalize(&self, uf: &mut UnionFind) -> Instance {
|
||||
let mut new_instance = Instance::new();
|
||||
for atom in self.iter() {
|
||||
let canonical = uf.canonicalize_atom(atom);
|
||||
// try_add handles deduplication
|
||||
let _ = new_instance.try_add(canonical);
|
||||
}
|
||||
new_instance
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Instance {
|
||||
@ -273,77 +171,4 @@ mod tests {
|
||||
assert!(matches!(error, InstanceError::NonGroundFact(_)));
|
||||
assert!(instance.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_facts_matching_pattern_with_bound_arg() {
|
||||
let instance: Instance = vec![
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("bob")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("carol")],
|
||||
),
|
||||
Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("bob"), Term::constant("carol")],
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// Pattern with first argument bound to "alice"
|
||||
let pattern = Atom::new("Parent", vec![Term::constant("alice"), Term::var("Y")]);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 2);
|
||||
|
||||
// Pattern with second argument bound to "carol"
|
||||
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("carol")]);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 2);
|
||||
|
||||
// Pattern with both arguments bound
|
||||
let pattern = Atom::new(
|
||||
"Parent",
|
||||
vec![Term::constant("alice"), Term::constant("bob")],
|
||||
);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 1);
|
||||
|
||||
// Pattern with no bound arguments (all variables)
|
||||
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::var("Y")]);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 3);
|
||||
|
||||
// Pattern with non-matching bound argument
|
||||
let pattern = Atom::new("Parent", vec![Term::constant("dave"), Term::var("Y")]);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_facts_matching_pattern_selectivity() {
|
||||
// Create instance with skewed distribution
|
||||
let mut instance = Instance::new();
|
||||
|
||||
// Many facts with first arg "common"
|
||||
for i in 0..100 {
|
||||
instance.add(Atom::new(
|
||||
"R",
|
||||
vec![Term::constant("common"), Term::constant(format!("v{}", i))],
|
||||
));
|
||||
}
|
||||
// One fact with first arg "rare"
|
||||
instance.add(Atom::new(
|
||||
"R",
|
||||
vec![Term::constant("rare"), Term::constant("x")],
|
||||
));
|
||||
|
||||
// Pattern matching on "rare" should use more selective index
|
||||
let pattern = Atom::new("R", vec![Term::constant("rare"), Term::var("Y")]);
|
||||
let matches = instance.facts_matching_pattern(&pattern);
|
||||
assert_eq!(matches.len(), 1);
|
||||
assert_eq!(matches[0].terms[0], Term::constant("rare"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,17 +5,12 @@ pub mod instance;
|
||||
pub mod rule;
|
||||
pub mod substitution;
|
||||
pub mod term;
|
||||
pub mod union_find;
|
||||
|
||||
mod engine;
|
||||
|
||||
pub use atom::Atom;
|
||||
pub use engine::{
|
||||
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
|
||||
chase_with_egds, standard_chase,
|
||||
};
|
||||
pub use engine::{ChaseConfig, ChaseResult, chase, chase_with_config};
|
||||
pub use instance::{Instance, InstanceError};
|
||||
pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder};
|
||||
pub use rule::Rule;
|
||||
pub use substitution::Substitution;
|
||||
pub use term::Term;
|
||||
pub use union_find::{MergeError, UnionFind};
|
||||
|
||||
@ -121,125 +121,6 @@ impl Default for RuleBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// An equality constraint between two terms.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Equality {
|
||||
pub left: Term,
|
||||
pub right: Term,
|
||||
}
|
||||
|
||||
impl Equality {
|
||||
/// Create a new equality constraint.
|
||||
pub fn new(left: Term, right: Term) -> Self {
|
||||
Equality { left, right }
|
||||
}
|
||||
|
||||
/// Get all variables in this equality.
|
||||
pub fn variables(&self) -> Vec<&String> {
|
||||
let mut vars = Vec::new();
|
||||
if let Term::Variable(v) = &self.left {
|
||||
vars.push(v);
|
||||
}
|
||||
if let Term::Variable(v) = &self.right {
|
||||
vars.push(v);
|
||||
}
|
||||
vars
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Equality {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{} = {}", self.left, self.right)
|
||||
}
|
||||
}
|
||||
|
||||
/// An equality-generating dependency (EGD).
|
||||
///
|
||||
/// EGDs have the form: body -> X = Y
|
||||
/// When the body matches, the equality X = Y is enforced.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Egd {
|
||||
/// The body atoms (conjunction).
|
||||
pub body: Vec<Atom>,
|
||||
/// The equality to enforce.
|
||||
pub equality: Equality,
|
||||
}
|
||||
|
||||
impl Egd {
|
||||
/// Create a new EGD.
|
||||
pub fn new(body: Vec<Atom>, equality: Equality) -> Self {
|
||||
Egd { body, equality }
|
||||
}
|
||||
|
||||
/// Get all variables appearing in the body.
|
||||
pub fn body_variables(&self) -> HashSet<String> {
|
||||
self.body
|
||||
.iter()
|
||||
.flat_map(|a| a.variables())
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all variables appearing in the equality.
|
||||
pub fn equality_variables(&self) -> HashSet<String> {
|
||||
self.equality.variables().into_iter().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Egd {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for (i, atom) in self.body.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{}", atom)?;
|
||||
}
|
||||
write!(f, " → {}", self.equality)
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for creating EGDs with a fluent API.
|
||||
pub struct EgdBuilder {
|
||||
body: Vec<Atom>,
|
||||
equality: Option<Equality>,
|
||||
}
|
||||
|
||||
impl EgdBuilder {
|
||||
pub fn new() -> Self {
|
||||
EgdBuilder {
|
||||
body: Vec::new(),
|
||||
equality: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an atom to the body.
|
||||
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||
self.body.push(Atom::new(predicate, terms));
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the equality to enforce.
|
||||
pub fn then_equal(mut self, left: Term, right: Term) -> Self {
|
||||
self.equality = Some(Equality::new(left, right));
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the EGD.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `then_equal()` was not called.
|
||||
#[allow(clippy::expect_used)]
|
||||
pub fn build(self) -> Egd {
|
||||
Egd::new(self.body, self.equality.expect("EGD must have an equality"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EgdBuilder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@ -39,16 +39,6 @@ impl Term {
|
||||
pub fn is_ground(&self) -> bool {
|
||||
!self.is_variable()
|
||||
}
|
||||
|
||||
/// Returns true if this term is a null.
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, Term::Null(_))
|
||||
}
|
||||
|
||||
/// Returns true if this term is a constant.
|
||||
pub fn is_constant(&self) -> bool {
|
||||
matches!(self, Term::Constant(_))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Term {
|
||||
@ -86,20 +76,4 @@ mod tests {
|
||||
assert!(!v.is_ground());
|
||||
assert!(v.is_variable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_null_and_is_constant() {
|
||||
let c = Term::constant("alice");
|
||||
let n = Term::null(1);
|
||||
let v = Term::var("X");
|
||||
|
||||
assert!(c.is_constant());
|
||||
assert!(!c.is_null());
|
||||
|
||||
assert!(n.is_null());
|
||||
assert!(!n.is_constant());
|
||||
|
||||
assert!(!v.is_constant());
|
||||
assert!(!v.is_null());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,323 +0,0 @@
|
||||
//! Union-Find data structure for managing null equivalence classes in EGDs.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use super::atom::Atom;
|
||||
use super::term::Term;
|
||||
|
||||
/// Error that occurs when merging two different constants.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct MergeError {
|
||||
pub left: Term,
|
||||
pub right: Term,
|
||||
}
|
||||
|
||||
impl fmt::Display for MergeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"cannot merge different constants: {} and {}",
|
||||
self.left, self.right
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for MergeError {}
|
||||
|
||||
/// Union-Find data structure for managing equivalence classes of terms.
|
||||
///
|
||||
/// Used by EGDs to track which nulls have been merged and what their
|
||||
/// canonical representatives are. Constants are always their own canonical
|
||||
/// form, and merging a null with a constant makes the constant the canonical
|
||||
/// form for that equivalence class.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct UnionFind {
|
||||
/// Maps null IDs to their parent in the union-find tree.
|
||||
parent: HashMap<usize, usize>,
|
||||
/// Rank for union-by-rank optimization.
|
||||
rank: HashMap<usize, usize>,
|
||||
/// Maps null IDs to their canonical term (constant if merged with one).
|
||||
canonical: HashMap<usize, Term>,
|
||||
}
|
||||
|
||||
impl UnionFind {
|
||||
/// Create a new empty union-find structure.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Find the representative for a null ID, with path compression.
|
||||
pub fn find(&mut self, null_id: usize) -> usize {
|
||||
// Ensure the null exists in our structure and get the parent
|
||||
let parent = *self.parent.entry(null_id).or_insert_with(|| {
|
||||
self.rank.insert(null_id, 0);
|
||||
null_id
|
||||
});
|
||||
|
||||
if parent != null_id {
|
||||
// Path compression: make this node point directly to the root
|
||||
let root = self.find(parent);
|
||||
self.parent.insert(null_id, root);
|
||||
root
|
||||
} else {
|
||||
null_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the canonical term for a given term.
|
||||
///
|
||||
/// - Constants are always their own canonical form.
|
||||
/// - Variables are always their own canonical form (shouldn't appear in instances).
|
||||
/// - Nulls are mapped to their canonical representative (which may be a constant
|
||||
/// if they were merged with one).
|
||||
pub fn canonical_term(&mut self, term: &Term) -> Term {
|
||||
match term {
|
||||
Term::Constant(_) | Term::Variable(_) => term.clone(),
|
||||
Term::Null(id) => {
|
||||
let root = self.find(*id);
|
||||
// Check if this equivalence class has a canonical constant
|
||||
if let Some(canonical) = self.canonical.get(&root) {
|
||||
canonical.clone()
|
||||
} else {
|
||||
Term::Null(root)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Canonicalize an atom by replacing all terms with their canonical forms.
|
||||
pub fn canonicalize_atom(&mut self, atom: &Atom) -> Atom {
|
||||
Atom::new(
|
||||
atom.predicate.clone(),
|
||||
atom.terms.iter().map(|t| self.canonical_term(t)).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Merge two terms, returning an error if they are different constants.
|
||||
///
|
||||
/// The merge rules are:
|
||||
/// - Null + Null: union the equivalence classes
|
||||
/// - Null + Constant: the constant becomes the canonical form for the null's class
|
||||
/// - Constant + Constant: error if different, no-op if same
|
||||
pub fn merge(&mut self, left: &Term, right: &Term) -> Result<(), MergeError> {
|
||||
match (left, right) {
|
||||
(Term::Constant(c1), Term::Constant(c2)) => {
|
||||
if c1 != c2 {
|
||||
Err(MergeError {
|
||||
left: left.clone(),
|
||||
right: right.clone(),
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
(Term::Null(id), Term::Constant(_)) => {
|
||||
let root = self.find(*id);
|
||||
// Check if this class already has a canonical constant
|
||||
if let Some(existing) = self.canonical.get(&root) {
|
||||
if existing != right {
|
||||
return Err(MergeError {
|
||||
left: existing.clone(),
|
||||
right: right.clone(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
self.canonical.insert(root, right.clone());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
(Term::Constant(_), Term::Null(id)) => {
|
||||
let root = self.find(*id);
|
||||
if let Some(existing) = self.canonical.get(&root) {
|
||||
if existing != left {
|
||||
return Err(MergeError {
|
||||
left: left.clone(),
|
||||
right: existing.clone(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
self.canonical.insert(root, left.clone());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
(Term::Null(id1), Term::Null(id2)) => {
|
||||
let root1 = self.find(*id1);
|
||||
let root2 = self.find(*id2);
|
||||
|
||||
if root1 == root2 {
|
||||
return Ok(()); // Already in the same class
|
||||
}
|
||||
|
||||
// Check if both have canonical constants
|
||||
let canon1 = self.canonical.get(&root1).cloned();
|
||||
let canon2 = self.canonical.get(&root2).cloned();
|
||||
|
||||
match (&canon1, &canon2) {
|
||||
(Some(c1), Some(c2)) if c1 != c2 => {
|
||||
return Err(MergeError {
|
||||
left: c1.clone(),
|
||||
right: c2.clone(),
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Union by rank
|
||||
let rank1 = *self.rank.get(&root1).unwrap_or(&0);
|
||||
let rank2 = *self.rank.get(&root2).unwrap_or(&0);
|
||||
|
||||
let (new_root, child) = if rank1 < rank2 {
|
||||
(root2, root1)
|
||||
} else if rank1 > rank2 {
|
||||
(root1, root2)
|
||||
} else {
|
||||
self.rank.insert(root1, rank1 + 1);
|
||||
(root1, root2)
|
||||
};
|
||||
|
||||
self.parent.insert(child, new_root);
|
||||
|
||||
// Propagate canonical constant to new root
|
||||
let canonical = canon1.or(canon2);
|
||||
if let Some(c) = canonical {
|
||||
self.canonical.insert(new_root, c);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// Variables shouldn't be merged (they shouldn't appear in instances)
|
||||
(Term::Variable(_), _) | (_, Term::Variable(_)) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if two terms are in the same equivalence class.
|
||||
pub fn are_equivalent(&mut self, left: &Term, right: &Term) -> bool {
|
||||
self.canonical_term(left) == self.canonical_term(right)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_merge_nulls() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Merge two nulls
|
||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||
|
||||
// They should now have the same canonical form
|
||||
let canon0 = uf.canonical_term(&Term::Null(0));
|
||||
let canon1 = uf.canonical_term(&Term::Null(1));
|
||||
assert_eq!(canon0, canon1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_null_with_constant() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Merge null with constant
|
||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||
|
||||
// Null should now have the constant as canonical form
|
||||
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("alice"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_transitive() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Merge: null0 = null1, null1 = constant
|
||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
||||
|
||||
// Both nulls should have the constant as canonical
|
||||
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("bob"));
|
||||
assert_eq!(uf.canonical_term(&Term::Null(1)), Term::constant("bob"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_different_constants_fails() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Merging different constants should fail
|
||||
let result = uf.merge(&Term::constant("alice"), &Term::constant("bob"));
|
||||
assert!(result.is_err());
|
||||
|
||||
let err = result.unwrap_err();
|
||||
assert_eq!(err.left, Term::constant("alice"));
|
||||
assert_eq!(err.right, Term::constant("bob"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_same_constant_ok() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Merging the same constant should be fine
|
||||
uf.merge(&Term::constant("alice"), &Term::constant("alice"))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_nulls_with_conflicting_constants() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// null0 = alice, null1 = bob
|
||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
||||
|
||||
// Merging null0 and null1 should fail (alice != bob)
|
||||
let result = uf.merge(&Term::Null(0), &Term::Null(1));
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_atom() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
||||
|
||||
let atom = Atom::new("R", vec![Term::Null(0), Term::Null(1), Term::constant("x")]);
|
||||
|
||||
let canonical = uf.canonicalize_atom(&atom);
|
||||
|
||||
assert_eq!(canonical.terms[0], Term::constant("alice"));
|
||||
// Null(1) and Null(2) are equivalent but have no constant, so canonical is one of them
|
||||
assert!(canonical.terms[1].is_null());
|
||||
assert_eq!(canonical.terms[2], Term::constant("x"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_path_compression() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
// Create a chain: 0 -> 1 -> 2 -> 3
|
||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
||||
uf.merge(&Term::Null(2), &Term::Null(3)).unwrap();
|
||||
|
||||
// Finding from 0 should compress the path
|
||||
let root = uf.find(0);
|
||||
|
||||
// All should point to the same root
|
||||
assert_eq!(uf.find(1), root);
|
||||
assert_eq!(uf.find(2), root);
|
||||
assert_eq!(uf.find(3), root);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_are_equivalent() {
|
||||
let mut uf = UnionFind::new();
|
||||
|
||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||
|
||||
assert!(uf.are_equivalent(&Term::Null(0), &Term::Null(1)));
|
||||
assert!(!uf.are_equivalent(&Term::Null(0), &Term::Null(2)));
|
||||
assert!(uf.are_equivalent(&Term::constant("a"), &Term::constant("a")));
|
||||
assert!(!uf.are_equivalent(&Term::constant("a"), &Term::constant("b")));
|
||||
}
|
||||
}
|
||||
@ -86,7 +86,6 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
||||
instance,
|
||||
steps,
|
||||
terminated: false,
|
||||
error: None,
|
||||
},
|
||||
provenance,
|
||||
};
|
||||
@ -106,7 +105,6 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
||||
instance,
|
||||
steps,
|
||||
terminated: true,
|
||||
error: None,
|
||||
},
|
||||
provenance,
|
||||
};
|
||||
|
||||
@ -3,7 +3,6 @@ pub mod frontend;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use chase::{
|
||||
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Egd, EgdBuilder, Equality, Instance,
|
||||
InstanceError, MergeError, Rule, RuleBuilder, Substitution, Term, UnionFind, chase, chase_full,
|
||||
chase_with_config, chase_with_egds, standard_chase,
|
||||
Atom, ChaseConfig, ChaseResult, Instance, InstanceError, Rule, Substitution, Term, chase,
|
||||
chase_with_config,
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user