Compare commits
2 Commits
fc66b8e393
...
ee21bee039
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee21bee039 | ||
|
|
0e2db4e07b |
@ -1,14 +1,13 @@
|
|||||||
## Chase-rs
|
## Chase-rs
|
||||||
|
|
||||||
An implementation of the chase algorithm in Rust for advanced reasoning engines.
|
An implementation of the chase algorithm in Rust for reasoning engines.
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
||||||
The chase algorithm is a fundamental technique in context of database theory and knowledge representation used for:
|
The chase algorithm is a technique in context of database theory and knowledge representation used for:
|
||||||
|
|
||||||
- Query answering under tuple-generating dependencies (TGDs)
|
- Query answering under tuple-generating dependencies (TGDs)
|
||||||
- Computing universal models
|
- Computing universal models
|
||||||
- Ontology-based data access (OBDA)
|
|
||||||
- Datalog with existential rules
|
- Datalog with existential rules
|
||||||
|
|
||||||
This implementation provides a restricted-chase style materialization with active-trigger checks.
|
This implementation provides a restricted-chase style materialization with active-trigger checks.
|
||||||
|
|||||||
@ -1,12 +1,44 @@
|
|||||||
//! Core chase algorithm implementation.
|
//! Core chase algorithm implementation.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use super::atom::Atom;
|
use super::atom::Atom;
|
||||||
use super::instance::Instance;
|
use super::instance::Instance;
|
||||||
use super::rule::Rule;
|
use super::rule::{Egd, Rule};
|
||||||
use super::substitution::{Substitution, unify_atom};
|
use super::substitution::{Substitution, unify_atom};
|
||||||
use super::term::Term;
|
use super::term::Term;
|
||||||
|
use super::union_find::{MergeError, UnionFind};
|
||||||
|
|
||||||
|
/// Error that can occur during chase with EGDs.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ChaseError {
|
||||||
|
/// EGD tried to equate two different constants.
|
||||||
|
EgdConflict(MergeError),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ChaseError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for ChaseError {
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
ChaseError::EgdConflict(e) => Some(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<MergeError> for ChaseError {
|
||||||
|
fn from(e: MergeError) -> Self {
|
||||||
|
ChaseError::EgdConflict(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Result of running the chase algorithm.
|
/// Result of running the chase algorithm.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -17,6 +49,20 @@ pub struct ChaseResult {
|
|||||||
pub steps: usize,
|
pub steps: usize,
|
||||||
/// Whether the chase terminated (vs hitting a limit).
|
/// Whether the chase terminated (vs hitting a limit).
|
||||||
pub terminated: bool,
|
pub terminated: bool,
|
||||||
|
/// Error that occurred during chase (e.g., EGD conflict).
|
||||||
|
pub error: Option<ChaseError>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The variant of chase algorithm to use.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||||
|
pub enum ChaseVariant {
|
||||||
|
/// Standard chase: no trigger tracking, terminates only when no new facts.
|
||||||
|
/// May re-apply rules with the same bindings if heads are satisfied.
|
||||||
|
Standard,
|
||||||
|
/// Restricted chase: uses trigger tracking to avoid re-applying the same
|
||||||
|
/// rule with the same frontier variable bindings. This is the default.
|
||||||
|
#[default]
|
||||||
|
Restricted,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration for the chase algorithm.
|
/// Configuration for the chase algorithm.
|
||||||
@ -24,11 +70,16 @@ pub struct ChaseResult {
|
|||||||
pub struct ChaseConfig {
|
pub struct ChaseConfig {
|
||||||
/// Maximum number of chase steps before giving up.
|
/// Maximum number of chase steps before giving up.
|
||||||
pub max_steps: usize,
|
pub max_steps: usize,
|
||||||
|
/// The chase variant to use.
|
||||||
|
pub variant: ChaseVariant,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ChaseConfig {
|
impl Default for ChaseConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
ChaseConfig { max_steps: 10_000 }
|
ChaseConfig {
|
||||||
|
max_steps: 10_000,
|
||||||
|
variant: ChaseVariant::default(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,7 +130,7 @@ impl Trigger {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the standard chase algorithm.
|
/// Run the restricted chase algorithm (default).
|
||||||
///
|
///
|
||||||
/// The chase repeatedly applies rules to derive new facts until no more
|
/// The chase repeatedly applies rules to derive new facts until no more
|
||||||
/// facts can be derived (fixpoint) or a limit is reached.
|
/// facts can be derived (fixpoint) or a limit is reached.
|
||||||
@ -91,6 +142,22 @@ pub fn chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
|||||||
chase_with_config(instance, rules, ChaseConfig::default())
|
chase_with_config(instance, rules, ChaseConfig::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run the standard chase algorithm (no trigger tracking).
|
||||||
|
///
|
||||||
|
/// The standard chase applies rules whenever the body matches, without
|
||||||
|
/// tracking which rule applications have been performed. It terminates
|
||||||
|
/// only when no new facts are derived.
|
||||||
|
///
|
||||||
|
/// This is simpler but may be less efficient for rules with existentials,
|
||||||
|
/// as it doesn't prevent re-checking already-satisfied rule applications.
|
||||||
|
pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
||||||
|
let config = ChaseConfig {
|
||||||
|
variant: ChaseVariant::Standard,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
chase_with_config(instance, rules, config)
|
||||||
|
}
|
||||||
|
|
||||||
/// Run the chase with custom configuration.
|
/// Run the chase with custom configuration.
|
||||||
pub fn chase_with_config(
|
pub fn chase_with_config(
|
||||||
mut instance: Instance,
|
mut instance: Instance,
|
||||||
@ -107,10 +174,16 @@ pub fn chase_with_config(
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: false,
|
terminated: false,
|
||||||
|
error: None,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_facts = chase_step(&instance, rules, &mut null_gen, &mut applied_triggers);
|
let new_facts = match config.variant {
|
||||||
|
ChaseVariant::Standard => standard_chase_step(&instance, rules, &mut null_gen),
|
||||||
|
ChaseVariant::Restricted => {
|
||||||
|
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if new_facts.is_empty() {
|
if new_facts.is_empty() {
|
||||||
// Fixpoint reached
|
// Fixpoint reached
|
||||||
@ -118,6 +191,7 @@ pub fn chase_with_config(
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: true,
|
terminated: true,
|
||||||
|
error: None,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,8 +202,40 @@ pub fn chase_with_config(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform a single chase step: find all applicable rule instances and derive new facts.
|
/// Perform a single standard chase step: apply rules without trigger tracking.
|
||||||
fn chase_step(
|
fn standard_chase_step(
|
||||||
|
instance: &Instance,
|
||||||
|
rules: &[Rule],
|
||||||
|
null_gen: &mut NullGenerator,
|
||||||
|
) -> Vec<Atom> {
|
||||||
|
let mut new_facts = Vec::new();
|
||||||
|
|
||||||
|
for rule in rules {
|
||||||
|
// Find all ways to match the rule body against the instance
|
||||||
|
let matches = find_body_matches(instance, &rule.body);
|
||||||
|
|
||||||
|
for subst in matches {
|
||||||
|
// In standard chase, we only check if head is satisfied
|
||||||
|
if head_is_satisfied(instance, rule, &subst) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate head atoms with this substitution
|
||||||
|
let derived = apply_rule_head(rule, &subst, null_gen);
|
||||||
|
|
||||||
|
for fact in derived {
|
||||||
|
if !instance.contains(&fact) {
|
||||||
|
new_facts.push(fact);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new_facts
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a single restricted chase step: use trigger tracking to avoid redundant applications.
|
||||||
|
fn restricted_chase_step(
|
||||||
instance: &Instance,
|
instance: &Instance,
|
||||||
rules: &[Rule],
|
rules: &[Rule],
|
||||||
null_gen: &mut NullGenerator,
|
null_gen: &mut NullGenerator,
|
||||||
@ -186,8 +292,8 @@ fn find_body_matches(instance: &Instance, body: &[Atom]) -> Vec<Substitution> {
|
|||||||
// Apply current substitution to the body atom
|
// Apply current substitution to the body atom
|
||||||
let pattern = subst.apply_atom(body_atom);
|
let pattern = subst.apply_atom(body_atom);
|
||||||
|
|
||||||
// Find all facts that match this pattern
|
// Find all facts that match this pattern using indexed lookup
|
||||||
for fact in instance.facts_for_predicate(&pattern.predicate) {
|
for fact in instance.facts_matching_pattern(&pattern) {
|
||||||
if let Some(new_subst) = unify_atom(&pattern, fact) {
|
if let Some(new_subst) = unify_atom(&pattern, fact) {
|
||||||
// Combine substitutions
|
// Combine substitutions
|
||||||
let mut combined = subst.clone();
|
let mut combined = subst.clone();
|
||||||
@ -259,6 +365,145 @@ fn term_null_id(term: &Term) -> Option<usize> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
struct EgdTrigger {
|
||||||
|
egd_index: usize,
|
||||||
|
/// All body variable bindings for the EGD.
|
||||||
|
body_bindings: Vec<(String, Term)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EgdTrigger {
|
||||||
|
fn new(egd_index: usize, egd: &Egd, subst: &Substitution) -> Self {
|
||||||
|
let body_vars = egd.body_variables();
|
||||||
|
let mut bindings: Vec<_> = body_vars
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|v| subst.get(&v).map(|t| (v, t.clone())))
|
||||||
|
.collect();
|
||||||
|
bindings.sort_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
|
||||||
|
EgdTrigger {
|
||||||
|
egd_index,
|
||||||
|
body_bindings: bindings,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply EGDs to the instance, returning any merge error that occurs.
|
||||||
|
fn apply_egds(
|
||||||
|
instance: &Instance,
|
||||||
|
egds: &[Egd],
|
||||||
|
uf: &mut UnionFind,
|
||||||
|
applied_egd_triggers: &mut HashSet<EgdTrigger>,
|
||||||
|
) -> Result<bool, MergeError> {
|
||||||
|
let mut made_changes = false;
|
||||||
|
|
||||||
|
for (egd_idx, egd) in egds.iter().enumerate() {
|
||||||
|
let matches = find_body_matches(instance, &egd.body);
|
||||||
|
|
||||||
|
for subst in matches {
|
||||||
|
let trigger = EgdTrigger::new(egd_idx, egd, &subst);
|
||||||
|
|
||||||
|
if applied_egd_triggers.contains(&trigger) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
applied_egd_triggers.insert(trigger);
|
||||||
|
|
||||||
|
// Get the terms that should be equal
|
||||||
|
let left = subst.apply_term(&egd.equality.left);
|
||||||
|
let right = subst.apply_term(&egd.equality.right);
|
||||||
|
|
||||||
|
// Merge them in the union-find
|
||||||
|
uf.merge(&left, &right)?;
|
||||||
|
made_changes = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(made_changes)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run chase with both TGDs and EGDs.
|
||||||
|
///
|
||||||
|
/// The chase alternates between:
|
||||||
|
/// 1. Applying TGDs to derive new facts
|
||||||
|
/// 2. Applying EGDs to enforce equalities
|
||||||
|
/// 3. Canonicalizing the instance to reflect merged nulls
|
||||||
|
///
|
||||||
|
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
|
||||||
|
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
|
||||||
|
chase_full(instance, tgds, egds, ChaseConfig::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the full chase with TGDs, EGDs, and custom configuration.
|
||||||
|
pub fn chase_full(
|
||||||
|
mut instance: Instance,
|
||||||
|
tgds: &[Rule],
|
||||||
|
egds: &[Egd],
|
||||||
|
config: ChaseConfig,
|
||||||
|
) -> ChaseResult {
|
||||||
|
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
|
||||||
|
let mut applied_triggers: HashSet<Trigger> = HashSet::new();
|
||||||
|
let mut applied_egd_triggers: HashSet<EgdTrigger> = HashSet::new();
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
let mut steps = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if steps >= config.max_steps {
|
||||||
|
return ChaseResult {
|
||||||
|
instance,
|
||||||
|
steps,
|
||||||
|
terminated: false,
|
||||||
|
error: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply TGDs
|
||||||
|
let new_facts = match config.variant {
|
||||||
|
ChaseVariant::Standard => standard_chase_step(&instance, tgds, &mut null_gen),
|
||||||
|
ChaseVariant::Restricted => {
|
||||||
|
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let tgd_changes = !new_facts.is_empty();
|
||||||
|
for fact in new_facts {
|
||||||
|
instance.add(fact);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply EGDs
|
||||||
|
let egd_result = apply_egds(&instance, egds, &mut uf, &mut applied_egd_triggers);
|
||||||
|
|
||||||
|
match egd_result {
|
||||||
|
Err(e) => {
|
||||||
|
return ChaseResult {
|
||||||
|
instance,
|
||||||
|
steps,
|
||||||
|
terminated: false,
|
||||||
|
error: Some(ChaseError::EgdConflict(e)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(egd_changes) => {
|
||||||
|
// Canonicalize instance if EGDs made changes
|
||||||
|
if egd_changes {
|
||||||
|
instance = instance.canonicalize(&mut uf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for fixpoint
|
||||||
|
if !tgd_changes && !egd_changes {
|
||||||
|
return ChaseResult {
|
||||||
|
instance,
|
||||||
|
steps,
|
||||||
|
terminated: true,
|
||||||
|
error: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
steps += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -399,4 +644,286 @@ mod tests {
|
|||||||
assert!(result.terminated);
|
assert!(result.terminated);
|
||||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_standard_chase_basic() {
|
||||||
|
// Test standard chase with datalog rules
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let rule1 = RuleBuilder::new()
|
||||||
|
.when("Parent", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let rule2 = RuleBuilder::new()
|
||||||
|
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
|
||||||
|
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = standard_chase(instance, &[rule1, rule2]);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
let ancestors = result.instance.facts_for_predicate("Ancestor");
|
||||||
|
assert_eq!(ancestors.len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_standard_and_restricted_reach_same_fixpoint() {
|
||||||
|
// For datalog rules (no existentials), both should reach the same result
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
|
||||||
|
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
|
||||||
|
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let rule1 = RuleBuilder::new()
|
||||||
|
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let rule2 = RuleBuilder::new()
|
||||||
|
.when("Path", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
|
||||||
|
.then("Path", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let rules = vec![rule1, rule2];
|
||||||
|
|
||||||
|
let standard_result = standard_chase(instance.clone(), &rules);
|
||||||
|
let restricted_result = chase(instance, &rules);
|
||||||
|
|
||||||
|
assert!(standard_result.terminated);
|
||||||
|
assert!(restricted_result.terminated);
|
||||||
|
|
||||||
|
// Both should derive the same paths
|
||||||
|
let standard_paths = standard_result.instance.facts_for_predicate("Path");
|
||||||
|
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
|
||||||
|
assert_eq!(standard_paths.len(), restricted_paths.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_standard_chase_with_existentials() {
|
||||||
|
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let rule = RuleBuilder::new()
|
||||||
|
.when("Person", vec![Term::var("X")])
|
||||||
|
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = standard_chase(instance, &[rule]);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
let has_ssn = result.instance.facts_for_predicate("HasSSN");
|
||||||
|
assert_eq!(has_ssn.len(), 1);
|
||||||
|
assert!(matches!(has_ssn[0].terms[1], Term::Null(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chase_variant_config() {
|
||||||
|
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let rule = RuleBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.then("B", vec![Term::var("X")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Test standard variant via config
|
||||||
|
let standard_config = ChaseConfig {
|
||||||
|
variant: ChaseVariant::Standard,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let standard_result = chase_with_config(instance.clone(), &[rule.clone()], standard_config);
|
||||||
|
|
||||||
|
// Test restricted variant via config
|
||||||
|
let restricted_config = ChaseConfig {
|
||||||
|
variant: ChaseVariant::Restricted,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let restricted_result = chase_with_config(instance, &[rule], restricted_config);
|
||||||
|
|
||||||
|
assert!(standard_result.terminated);
|
||||||
|
assert!(restricted_result.terminated);
|
||||||
|
assert_eq!(
|
||||||
|
standard_result.instance.facts_for_predicate("B").len(),
|
||||||
|
restricted_result.instance.facts_for_predicate("B").len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// EGD tests
|
||||||
|
use crate::chase::rule::EgdBuilder;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_egd_functional_dependency_merges_nulls() {
|
||||||
|
// Functional dependency: R(X, Y), R(X, Z) -> Y = Z
|
||||||
|
// This should merge the nulls created for the same X value
|
||||||
|
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// TGD: Person(X) -> R(X, Y)
|
||||||
|
let tgd = RuleBuilder::new()
|
||||||
|
.when("Person", vec![Term::var("X")])
|
||||||
|
.then("R", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// First run without EGD to create two separate facts with nulls
|
||||||
|
// We simulate this by having two different TGDs create facts
|
||||||
|
let tgd2 = RuleBuilder::new()
|
||||||
|
.when("Person", vec![Term::var("X")])
|
||||||
|
.then("S", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// EGD: R(X, Y), S(X, Z) -> Y = Z
|
||||||
|
let egd = EgdBuilder::new()
|
||||||
|
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("S", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_with_egds(instance, &[tgd, tgd2], &[egd]);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
|
||||||
|
// After EGD application, the nulls should be merged
|
||||||
|
let r_facts = result.instance.facts_for_predicate("R");
|
||||||
|
let s_facts = result.instance.facts_for_predicate("S");
|
||||||
|
assert_eq!(r_facts.len(), 1);
|
||||||
|
assert_eq!(s_facts.len(), 1);
|
||||||
|
|
||||||
|
// The second argument of R and S should be the same (merged)
|
||||||
|
assert_eq!(r_facts[0].terms[1], s_facts[0].terms[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_egd_conflict_different_constants() {
|
||||||
|
// EGD that tries to equate different constants should fail
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
||||||
|
Atom::new("R", vec![Term::constant("x"), Term::constant("bob")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
||||||
|
// This should fail because alice != bob
|
||||||
|
let egd = EgdBuilder::new()
|
||||||
|
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("R", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_with_egds(instance, &[], &[egd]);
|
||||||
|
|
||||||
|
assert!(!result.terminated);
|
||||||
|
assert!(result.error.is_some());
|
||||||
|
assert!(matches!(result.error, Some(ChaseError::EgdConflict(_))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_egd_null_merged_with_constant() {
|
||||||
|
// EGD that merges a null with a constant
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("R", vec![Term::constant("x"), Term::Null(0)]),
|
||||||
|
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
||||||
|
let egd = EgdBuilder::new()
|
||||||
|
.when("R", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("R", vec![Term::var("X"), Term::var("Z")])
|
||||||
|
.then_equal(Term::var("Y"), Term::var("Z"))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_with_egds(instance, &[], &[egd]);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
|
||||||
|
// After canonicalization, there should be only one R fact
|
||||||
|
// with "alice" as the second argument
|
||||||
|
let r_facts = result.instance.facts_for_predicate("R");
|
||||||
|
assert_eq!(r_facts.len(), 1);
|
||||||
|
assert_eq!(r_facts[0].terms[1], Term::constant("alice"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tgd_and_egd_interaction() {
|
||||||
|
// TGD creates facts, EGD enforces constraints
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new("Person", vec![Term::constant("alice")]),
|
||||||
|
Atom::new("Person", vec![Term::constant("bob")]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// TGD: Person(X) -> HasManager(X, Y)
|
||||||
|
let tgd = RuleBuilder::new()
|
||||||
|
.when("Person", vec![Term::var("X")])
|
||||||
|
.then("HasManager", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// EGD: HasManager(X, Y), HasManager(Z, W) -> Y = W
|
||||||
|
// Everyone has the same manager
|
||||||
|
let egd = EgdBuilder::new()
|
||||||
|
.when("HasManager", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.when("HasManager", vec![Term::var("Z"), Term::var("W")])
|
||||||
|
.then_equal(Term::var("Y"), Term::var("W"))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let result = chase_with_egds(instance, &[tgd], &[egd]);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
|
||||||
|
let manager_facts = result.instance.facts_for_predicate("HasManager");
|
||||||
|
assert_eq!(manager_facts.len(), 2);
|
||||||
|
|
||||||
|
// Both should have the same manager (second argument)
|
||||||
|
let managers: Vec<_> = manager_facts.iter().map(|f| &f.terms[1]).collect();
|
||||||
|
assert_eq!(managers[0], managers[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chase_full_with_custom_config() {
|
||||||
|
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let tgd = RuleBuilder::new()
|
||||||
|
.when("A", vec![Term::var("X")])
|
||||||
|
.then("B", vec![Term::var("X"), Term::var("Y")])
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let config = ChaseConfig {
|
||||||
|
max_steps: 100,
|
||||||
|
variant: ChaseVariant::Standard,
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = chase_full(instance, &[tgd], &[], config);
|
||||||
|
|
||||||
|
assert!(result.terminated);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,6 +5,8 @@ use std::error::Error;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use super::atom::Atom;
|
use super::atom::Atom;
|
||||||
|
use super::term::Term;
|
||||||
|
use super::union_find::UnionFind;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum InstanceError {
|
pub enum InstanceError {
|
||||||
@ -23,10 +25,30 @@ impl fmt::Display for InstanceError {
|
|||||||
|
|
||||||
impl Error for InstanceError {}
|
impl Error for InstanceError {}
|
||||||
|
|
||||||
|
/// A key for argument-position indexing: (predicate, position, value).
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
struct IndexKey {
|
||||||
|
predicate: String,
|
||||||
|
position: usize,
|
||||||
|
value: Term,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IndexKey {
|
||||||
|
fn new(predicate: &str, position: usize, value: &Term) -> Self {
|
||||||
|
IndexKey {
|
||||||
|
predicate: predicate.to_string(),
|
||||||
|
position,
|
||||||
|
value: value.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A database instance containing ground atoms.
|
/// A database instance containing ground atoms.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct Instance {
|
pub struct Instance {
|
||||||
facts_by_predicate: HashMap<String, HashSet<Atom>>,
|
facts_by_predicate: HashMap<String, HashSet<Atom>>,
|
||||||
|
/// Secondary index: maps (predicate, position, value) to atoms with that value at that position.
|
||||||
|
argument_index: HashMap<IndexKey, HashSet<Atom>>,
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,6 +57,7 @@ impl Instance {
|
|||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Instance {
|
Instance {
|
||||||
facts_by_predicate: HashMap::new(),
|
facts_by_predicate: HashMap::new(),
|
||||||
|
argument_index: HashMap::new(),
|
||||||
len: 0,
|
len: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,8 +72,16 @@ impl Instance {
|
|||||||
.facts_by_predicate
|
.facts_by_predicate
|
||||||
.entry(fact.predicate.clone())
|
.entry(fact.predicate.clone())
|
||||||
.or_default();
|
.or_default();
|
||||||
let inserted = bucket.insert(fact);
|
let inserted = bucket.insert(fact.clone());
|
||||||
if inserted {
|
if inserted {
|
||||||
|
// Populate argument indexes
|
||||||
|
for (position, term) in fact.terms.iter().enumerate() {
|
||||||
|
let key = IndexKey::new(&fact.predicate, position, term);
|
||||||
|
self.argument_index
|
||||||
|
.entry(key)
|
||||||
|
.or_default()
|
||||||
|
.insert(fact.clone());
|
||||||
|
}
|
||||||
self.len += 1;
|
self.len += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,7 +90,9 @@ impl Instance {
|
|||||||
|
|
||||||
/// Add a fact to the instance. Returns true if the fact was new.
|
/// Add a fact to the instance. Returns true if the fact was new.
|
||||||
///
|
///
|
||||||
|
/// # Panics
|
||||||
/// Panics if the atom is not ground.
|
/// Panics if the atom is not ground.
|
||||||
|
#[allow(clippy::expect_used)]
|
||||||
pub fn add(&mut self, fact: Atom) -> bool {
|
pub fn add(&mut self, fact: Atom) -> bool {
|
||||||
self.try_add(fact).expect("facts must be ground atoms")
|
self.try_add(fact).expect("facts must be ground atoms")
|
||||||
}
|
}
|
||||||
@ -96,6 +129,75 @@ impl Instance {
|
|||||||
.flat_map(|facts| facts.iter())
|
.flat_map(|facts| facts.iter())
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get facts matching a pattern, using argument indexes for efficient lookup.
|
||||||
|
///
|
||||||
|
/// The pattern may contain variables (which match anything) and ground terms
|
||||||
|
/// (which must match exactly). This method uses a selectivity heuristic to
|
||||||
|
/// choose the most selective index for lookup.
|
||||||
|
pub fn facts_matching_pattern(&self, pattern: &Atom) -> Vec<&Atom> {
|
||||||
|
// Find bound (ground) positions and their selectivity
|
||||||
|
let bound_positions: Vec<(usize, &Term)> = pattern
|
||||||
|
.terms
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_, term)| term.is_ground())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if bound_positions.is_empty() {
|
||||||
|
// No bound arguments, fall back to predicate lookup
|
||||||
|
return self.facts_for_predicate(&pattern.predicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use selectivity heuristic: pick the bound position with the smallest bucket
|
||||||
|
let mut best_key: Option<IndexKey> = None;
|
||||||
|
let mut best_size = usize::MAX;
|
||||||
|
|
||||||
|
for (position, term) in &bound_positions {
|
||||||
|
let key = IndexKey::new(&pattern.predicate, *position, term);
|
||||||
|
let size = self.argument_index.get(&key).map(|s| s.len()).unwrap_or(0);
|
||||||
|
|
||||||
|
if size < best_size {
|
||||||
|
best_size = size;
|
||||||
|
best_key = Some(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If best bucket is empty, return empty
|
||||||
|
let Some(key) = best_key else {
|
||||||
|
return Vec::new();
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(candidates) = self.argument_index.get(&key) else {
|
||||||
|
return Vec::new();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Filter candidates by arity and all other bound positions
|
||||||
|
let pattern_arity = pattern.arity();
|
||||||
|
candidates
|
||||||
|
.iter()
|
||||||
|
.filter(|fact| {
|
||||||
|
fact.arity() == pattern_arity
|
||||||
|
&& bound_positions
|
||||||
|
.iter()
|
||||||
|
.all(|(pos, term)| &fact.terms[*pos] == *term)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new instance with all atoms canonicalized according to the union-find.
|
||||||
|
///
|
||||||
|
/// This replaces all nulls with their canonical representatives and removes
|
||||||
|
/// duplicate atoms that result from canonicalization.
|
||||||
|
pub fn canonicalize(&self, uf: &mut UnionFind) -> Instance {
|
||||||
|
let mut new_instance = Instance::new();
|
||||||
|
for atom in self.iter() {
|
||||||
|
let canonical = uf.canonicalize_atom(atom);
|
||||||
|
// try_add handles deduplication
|
||||||
|
let _ = new_instance.try_add(canonical);
|
||||||
|
}
|
||||||
|
new_instance
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Instance {
|
impl fmt::Display for Instance {
|
||||||
@ -171,4 +273,77 @@ mod tests {
|
|||||||
assert!(matches!(error, InstanceError::NonGroundFact(_)));
|
assert!(matches!(error, InstanceError::NonGroundFact(_)));
|
||||||
assert!(instance.is_empty());
|
assert!(instance.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_facts_matching_pattern_with_bound_arg() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Pattern with first argument bound to "alice"
|
||||||
|
let pattern = Atom::new("Parent", vec![Term::constant("alice"), Term::var("Y")]);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 2);
|
||||||
|
|
||||||
|
// Pattern with second argument bound to "carol"
|
||||||
|
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("carol")]);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 2);
|
||||||
|
|
||||||
|
// Pattern with both arguments bound
|
||||||
|
let pattern = Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 1);
|
||||||
|
|
||||||
|
// Pattern with no bound arguments (all variables)
|
||||||
|
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::var("Y")]);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 3);
|
||||||
|
|
||||||
|
// Pattern with non-matching bound argument
|
||||||
|
let pattern = Atom::new("Parent", vec![Term::constant("dave"), Term::var("Y")]);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_facts_matching_pattern_selectivity() {
|
||||||
|
// Create instance with skewed distribution
|
||||||
|
let mut instance = Instance::new();
|
||||||
|
|
||||||
|
// Many facts with first arg "common"
|
||||||
|
for i in 0..100 {
|
||||||
|
instance.add(Atom::new(
|
||||||
|
"R",
|
||||||
|
vec![Term::constant("common"), Term::constant(format!("v{}", i))],
|
||||||
|
));
|
||||||
|
}
|
||||||
|
// One fact with first arg "rare"
|
||||||
|
instance.add(Atom::new(
|
||||||
|
"R",
|
||||||
|
vec![Term::constant("rare"), Term::constant("x")],
|
||||||
|
));
|
||||||
|
|
||||||
|
// Pattern matching on "rare" should use more selective index
|
||||||
|
let pattern = Atom::new("R", vec![Term::constant("rare"), Term::var("Y")]);
|
||||||
|
let matches = instance.facts_matching_pattern(&pattern);
|
||||||
|
assert_eq!(matches.len(), 1);
|
||||||
|
assert_eq!(matches[0].terms[0], Term::constant("rare"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,12 +5,17 @@ pub mod instance;
|
|||||||
pub mod rule;
|
pub mod rule;
|
||||||
pub mod substitution;
|
pub mod substitution;
|
||||||
pub mod term;
|
pub mod term;
|
||||||
|
pub mod union_find;
|
||||||
|
|
||||||
mod engine;
|
mod engine;
|
||||||
|
|
||||||
pub use atom::Atom;
|
pub use atom::Atom;
|
||||||
pub use engine::{ChaseConfig, ChaseResult, chase, chase_with_config};
|
pub use engine::{
|
||||||
|
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
|
||||||
|
chase_with_egds, standard_chase,
|
||||||
|
};
|
||||||
pub use instance::{Instance, InstanceError};
|
pub use instance::{Instance, InstanceError};
|
||||||
pub use rule::Rule;
|
pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder};
|
||||||
pub use substitution::Substitution;
|
pub use substitution::Substitution;
|
||||||
pub use term::Term;
|
pub use term::Term;
|
||||||
|
pub use union_find::{MergeError, UnionFind};
|
||||||
|
|||||||
@ -121,6 +121,125 @@ impl Default for RuleBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An equality constraint between two terms.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct Equality {
|
||||||
|
pub left: Term,
|
||||||
|
pub right: Term,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Equality {
|
||||||
|
/// Create a new equality constraint.
|
||||||
|
pub fn new(left: Term, right: Term) -> Self {
|
||||||
|
Equality { left, right }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all variables in this equality.
|
||||||
|
pub fn variables(&self) -> Vec<&String> {
|
||||||
|
let mut vars = Vec::new();
|
||||||
|
if let Term::Variable(v) = &self.left {
|
||||||
|
vars.push(v);
|
||||||
|
}
|
||||||
|
if let Term::Variable(v) = &self.right {
|
||||||
|
vars.push(v);
|
||||||
|
}
|
||||||
|
vars
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Equality {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{} = {}", self.left, self.right)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An equality-generating dependency (EGD).
|
||||||
|
///
|
||||||
|
/// EGDs have the form: body -> X = Y
|
||||||
|
/// When the body matches, the equality X = Y is enforced.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Egd {
|
||||||
|
/// The body atoms (conjunction).
|
||||||
|
pub body: Vec<Atom>,
|
||||||
|
/// The equality to enforce.
|
||||||
|
pub equality: Equality,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Egd {
|
||||||
|
/// Create a new EGD.
|
||||||
|
pub fn new(body: Vec<Atom>, equality: Equality) -> Self {
|
||||||
|
Egd { body, equality }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all variables appearing in the body.
|
||||||
|
pub fn body_variables(&self) -> HashSet<String> {
|
||||||
|
self.body
|
||||||
|
.iter()
|
||||||
|
.flat_map(|a| a.variables())
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all variables appearing in the equality.
|
||||||
|
pub fn equality_variables(&self) -> HashSet<String> {
|
||||||
|
self.equality.variables().into_iter().cloned().collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Egd {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
for (i, atom) in self.body.iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
write!(f, "{}", atom)?;
|
||||||
|
}
|
||||||
|
write!(f, " → {}", self.equality)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for creating EGDs with a fluent API.
|
||||||
|
pub struct EgdBuilder {
|
||||||
|
body: Vec<Atom>,
|
||||||
|
equality: Option<Equality>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EgdBuilder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
EgdBuilder {
|
||||||
|
body: Vec::new(),
|
||||||
|
equality: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an atom to the body.
|
||||||
|
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
||||||
|
self.body.push(Atom::new(predicate, terms));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the equality to enforce.
|
||||||
|
pub fn then_equal(mut self, left: Term, right: Term) -> Self {
|
||||||
|
self.equality = Some(Equality::new(left, right));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the EGD.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if `then_equal()` was not called.
|
||||||
|
#[allow(clippy::expect_used)]
|
||||||
|
pub fn build(self) -> Egd {
|
||||||
|
Egd::new(self.body, self.equality.expect("EGD must have an equality"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for EgdBuilder {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@ -39,6 +39,16 @@ impl Term {
|
|||||||
pub fn is_ground(&self) -> bool {
|
pub fn is_ground(&self) -> bool {
|
||||||
!self.is_variable()
|
!self.is_variable()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if this term is a null.
|
||||||
|
pub fn is_null(&self) -> bool {
|
||||||
|
matches!(self, Term::Null(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this term is a constant.
|
||||||
|
pub fn is_constant(&self) -> bool {
|
||||||
|
matches!(self, Term::Constant(_))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Term {
|
impl fmt::Display for Term {
|
||||||
@ -76,4 +86,20 @@ mod tests {
|
|||||||
assert!(!v.is_ground());
|
assert!(!v.is_ground());
|
||||||
assert!(v.is_variable());
|
assert!(v.is_variable());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_null_and_is_constant() {
|
||||||
|
let c = Term::constant("alice");
|
||||||
|
let n = Term::null(1);
|
||||||
|
let v = Term::var("X");
|
||||||
|
|
||||||
|
assert!(c.is_constant());
|
||||||
|
assert!(!c.is_null());
|
||||||
|
|
||||||
|
assert!(n.is_null());
|
||||||
|
assert!(!n.is_constant());
|
||||||
|
|
||||||
|
assert!(!v.is_constant());
|
||||||
|
assert!(!v.is_null());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
323
src/chase/union_find.rs
Normal file
323
src/chase/union_find.rs
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
//! Union-Find data structure for managing null equivalence classes in EGDs.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use super::atom::Atom;
|
||||||
|
use super::term::Term;
|
||||||
|
|
||||||
|
/// Error that occurs when merging two different constants.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct MergeError {
|
||||||
|
pub left: Term,
|
||||||
|
pub right: Term,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for MergeError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"cannot merge different constants: {} and {}",
|
||||||
|
self.left, self.right
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for MergeError {}
|
||||||
|
|
||||||
|
/// Union-Find data structure for managing equivalence classes of terms.
|
||||||
|
///
|
||||||
|
/// Used by EGDs to track which nulls have been merged and what their
|
||||||
|
/// canonical representatives are. Constants are always their own canonical
|
||||||
|
/// form, and merging a null with a constant makes the constant the canonical
|
||||||
|
/// form for that equivalence class.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct UnionFind {
|
||||||
|
/// Maps null IDs to their parent in the union-find tree.
|
||||||
|
parent: HashMap<usize, usize>,
|
||||||
|
/// Rank for union-by-rank optimization.
|
||||||
|
rank: HashMap<usize, usize>,
|
||||||
|
/// Maps null IDs to their canonical term (constant if merged with one).
|
||||||
|
canonical: HashMap<usize, Term>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UnionFind {
|
||||||
|
/// Create a new empty union-find structure.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the representative for a null ID, with path compression.
|
||||||
|
pub fn find(&mut self, null_id: usize) -> usize {
|
||||||
|
// Ensure the null exists in our structure and get the parent
|
||||||
|
let parent = *self.parent.entry(null_id).or_insert_with(|| {
|
||||||
|
self.rank.insert(null_id, 0);
|
||||||
|
null_id
|
||||||
|
});
|
||||||
|
|
||||||
|
if parent != null_id {
|
||||||
|
// Path compression: make this node point directly to the root
|
||||||
|
let root = self.find(parent);
|
||||||
|
self.parent.insert(null_id, root);
|
||||||
|
root
|
||||||
|
} else {
|
||||||
|
null_id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the canonical term for a given term.
|
||||||
|
///
|
||||||
|
/// - Constants are always their own canonical form.
|
||||||
|
/// - Variables are always their own canonical form (shouldn't appear in instances).
|
||||||
|
/// - Nulls are mapped to their canonical representative (which may be a constant
|
||||||
|
/// if they were merged with one).
|
||||||
|
pub fn canonical_term(&mut self, term: &Term) -> Term {
|
||||||
|
match term {
|
||||||
|
Term::Constant(_) | Term::Variable(_) => term.clone(),
|
||||||
|
Term::Null(id) => {
|
||||||
|
let root = self.find(*id);
|
||||||
|
// Check if this equivalence class has a canonical constant
|
||||||
|
if let Some(canonical) = self.canonical.get(&root) {
|
||||||
|
canonical.clone()
|
||||||
|
} else {
|
||||||
|
Term::Null(root)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Canonicalize an atom by replacing all terms with their canonical forms.
|
||||||
|
pub fn canonicalize_atom(&mut self, atom: &Atom) -> Atom {
|
||||||
|
Atom::new(
|
||||||
|
atom.predicate.clone(),
|
||||||
|
atom.terms.iter().map(|t| self.canonical_term(t)).collect(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge two terms, returning an error if they are different constants.
|
||||||
|
///
|
||||||
|
/// The merge rules are:
|
||||||
|
/// - Null + Null: union the equivalence classes
|
||||||
|
/// - Null + Constant: the constant becomes the canonical form for the null's class
|
||||||
|
/// - Constant + Constant: error if different, no-op if same
|
||||||
|
pub fn merge(&mut self, left: &Term, right: &Term) -> Result<(), MergeError> {
|
||||||
|
match (left, right) {
|
||||||
|
(Term::Constant(c1), Term::Constant(c2)) => {
|
||||||
|
if c1 != c2 {
|
||||||
|
Err(MergeError {
|
||||||
|
left: left.clone(),
|
||||||
|
right: right.clone(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Term::Null(id), Term::Constant(_)) => {
|
||||||
|
let root = self.find(*id);
|
||||||
|
// Check if this class already has a canonical constant
|
||||||
|
if let Some(existing) = self.canonical.get(&root) {
|
||||||
|
if existing != right {
|
||||||
|
return Err(MergeError {
|
||||||
|
left: existing.clone(),
|
||||||
|
right: right.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.canonical.insert(root, right.clone());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
(Term::Constant(_), Term::Null(id)) => {
|
||||||
|
let root = self.find(*id);
|
||||||
|
if let Some(existing) = self.canonical.get(&root) {
|
||||||
|
if existing != left {
|
||||||
|
return Err(MergeError {
|
||||||
|
left: left.clone(),
|
||||||
|
right: existing.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.canonical.insert(root, left.clone());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
(Term::Null(id1), Term::Null(id2)) => {
|
||||||
|
let root1 = self.find(*id1);
|
||||||
|
let root2 = self.find(*id2);
|
||||||
|
|
||||||
|
if root1 == root2 {
|
||||||
|
return Ok(()); // Already in the same class
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if both have canonical constants
|
||||||
|
let canon1 = self.canonical.get(&root1).cloned();
|
||||||
|
let canon2 = self.canonical.get(&root2).cloned();
|
||||||
|
|
||||||
|
match (&canon1, &canon2) {
|
||||||
|
(Some(c1), Some(c2)) if c1 != c2 => {
|
||||||
|
return Err(MergeError {
|
||||||
|
left: c1.clone(),
|
||||||
|
right: c2.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Union by rank
|
||||||
|
let rank1 = *self.rank.get(&root1).unwrap_or(&0);
|
||||||
|
let rank2 = *self.rank.get(&root2).unwrap_or(&0);
|
||||||
|
|
||||||
|
let (new_root, child) = if rank1 < rank2 {
|
||||||
|
(root2, root1)
|
||||||
|
} else if rank1 > rank2 {
|
||||||
|
(root1, root2)
|
||||||
|
} else {
|
||||||
|
self.rank.insert(root1, rank1 + 1);
|
||||||
|
(root1, root2)
|
||||||
|
};
|
||||||
|
|
||||||
|
self.parent.insert(child, new_root);
|
||||||
|
|
||||||
|
// Propagate canonical constant to new root
|
||||||
|
let canonical = canon1.or(canon2);
|
||||||
|
if let Some(c) = canonical {
|
||||||
|
self.canonical.insert(new_root, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
// Variables shouldn't be merged (they shouldn't appear in instances)
|
||||||
|
(Term::Variable(_), _) | (_, Term::Variable(_)) => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if two terms are in the same equivalence class.
|
||||||
|
pub fn are_equivalent(&mut self, left: &Term, right: &Term) -> bool {
|
||||||
|
self.canonical_term(left) == self.canonical_term(right)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_nulls() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Merge two nulls
|
||||||
|
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||||
|
|
||||||
|
// They should now have the same canonical form
|
||||||
|
let canon0 = uf.canonical_term(&Term::Null(0));
|
||||||
|
let canon1 = uf.canonical_term(&Term::Null(1));
|
||||||
|
assert_eq!(canon0, canon1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_null_with_constant() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Merge null with constant
|
||||||
|
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||||
|
|
||||||
|
// Null should now have the constant as canonical form
|
||||||
|
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("alice"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_transitive() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Merge: null0 = null1, null1 = constant
|
||||||
|
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||||
|
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
||||||
|
|
||||||
|
// Both nulls should have the constant as canonical
|
||||||
|
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("bob"));
|
||||||
|
assert_eq!(uf.canonical_term(&Term::Null(1)), Term::constant("bob"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_different_constants_fails() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Merging different constants should fail
|
||||||
|
let result = uf.merge(&Term::constant("alice"), &Term::constant("bob"));
|
||||||
|
assert!(result.is_err());
|
||||||
|
|
||||||
|
let err = result.unwrap_err();
|
||||||
|
assert_eq!(err.left, Term::constant("alice"));
|
||||||
|
assert_eq!(err.right, Term::constant("bob"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_same_constant_ok() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Merging the same constant should be fine
|
||||||
|
uf.merge(&Term::constant("alice"), &Term::constant("alice"))
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_merge_nulls_with_conflicting_constants() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// null0 = alice, null1 = bob
|
||||||
|
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||||
|
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
||||||
|
|
||||||
|
// Merging null0 and null1 should fail (alice != bob)
|
||||||
|
let result = uf.merge(&Term::Null(0), &Term::Null(1));
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_canonicalize_atom() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
||||||
|
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
||||||
|
|
||||||
|
let atom = Atom::new("R", vec![Term::Null(0), Term::Null(1), Term::constant("x")]);
|
||||||
|
|
||||||
|
let canonical = uf.canonicalize_atom(&atom);
|
||||||
|
|
||||||
|
assert_eq!(canonical.terms[0], Term::constant("alice"));
|
||||||
|
// Null(1) and Null(2) are equivalent but have no constant, so canonical is one of them
|
||||||
|
assert!(canonical.terms[1].is_null());
|
||||||
|
assert_eq!(canonical.terms[2], Term::constant("x"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_path_compression() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
// Create a chain: 0 -> 1 -> 2 -> 3
|
||||||
|
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||||
|
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
||||||
|
uf.merge(&Term::Null(2), &Term::Null(3)).unwrap();
|
||||||
|
|
||||||
|
// Finding from 0 should compress the path
|
||||||
|
let root = uf.find(0);
|
||||||
|
|
||||||
|
// All should point to the same root
|
||||||
|
assert_eq!(uf.find(1), root);
|
||||||
|
assert_eq!(uf.find(2), root);
|
||||||
|
assert_eq!(uf.find(3), root);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_are_equivalent() {
|
||||||
|
let mut uf = UnionFind::new();
|
||||||
|
|
||||||
|
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
||||||
|
|
||||||
|
assert!(uf.are_equivalent(&Term::Null(0), &Term::Null(1)));
|
||||||
|
assert!(!uf.are_equivalent(&Term::Null(0), &Term::Null(2)));
|
||||||
|
assert!(uf.are_equivalent(&Term::constant("a"), &Term::constant("a")));
|
||||||
|
assert!(!uf.are_equivalent(&Term::constant("a"), &Term::constant("b")));
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -86,6 +86,7 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: false,
|
terminated: false,
|
||||||
|
error: None,
|
||||||
},
|
},
|
||||||
provenance,
|
provenance,
|
||||||
};
|
};
|
||||||
@ -105,6 +106,7 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: true,
|
terminated: true,
|
||||||
|
error: None,
|
||||||
},
|
},
|
||||||
provenance,
|
provenance,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -3,6 +3,7 @@ pub mod frontend;
|
|||||||
|
|
||||||
// Re-export main types for convenience
|
// Re-export main types for convenience
|
||||||
pub use chase::{
|
pub use chase::{
|
||||||
Atom, ChaseConfig, ChaseResult, Instance, InstanceError, Rule, Substitution, Term, chase,
|
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Egd, EgdBuilder, Equality, Instance,
|
||||||
chase_with_config,
|
InstanceError, MergeError, Rule, RuleBuilder, Substitution, Term, UnionFind, chase, chase_full,
|
||||||
|
chase_with_config, chase_with_egds, standard_chase,
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user