Compare commits
No commits in common. "ee21bee0391d931d35b44b93a5263967b324d9eb" and "fc66b8e39303d68859b491706296a00e77d39b90" have entirely different histories.
ee21bee039
...
fc66b8e393
@ -1,13 +1,14 @@
|
|||||||
## Chase-rs
|
## Chase-rs
|
||||||
|
|
||||||
An implementation of the chase algorithm in Rust for reasoning engines.
|
An implementation of the chase algorithm in Rust for advanced reasoning engines.
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
||||||
The chase algorithm is a technique in context of database theory and knowledge representation used for:
|
The chase algorithm is a fundamental technique in context of database theory and knowledge representation used for:
|
||||||
|
|
||||||
- Query answering under tuple-generating dependencies (TGDs)
|
- Query answering under tuple-generating dependencies (TGDs)
|
||||||
- Computing universal models
|
- Computing universal models
|
||||||
|
- Ontology-based data access (OBDA)
|
||||||
- Datalog with existential rules
|
- Datalog with existential rules
|
||||||
|
|
||||||
This implementation provides a restricted-chase style materialization with active-trigger checks.
|
This implementation provides a restricted-chase style materialization with active-trigger checks.
|
||||||
|
|||||||
@ -1,44 +1,12 @@
|
|||||||
//! Core chase algorithm implementation.
|
//! Core chase algorithm implementation.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::error::Error;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use super::atom::Atom;
|
use super::atom::Atom;
|
||||||
use super::instance::Instance;
|
use super::instance::Instance;
|
||||||
use super::rule::{Egd, Rule};
|
use super::rule::Rule;
|
||||||
use super::substitution::{Substitution, unify_atom};
|
use super::substitution::{Substitution, unify_atom};
|
||||||
use super::term::Term;
|
use super::term::Term;
|
||||||
use super::union_find::{MergeError, UnionFind};
|
|
||||||
|
|
||||||
/// Error that can occur during chase with EGDs.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum ChaseError {
|
|
||||||
/// EGD tried to equate two different constants.
|
|
||||||
EgdConflict(MergeError),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ChaseError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
ChaseError::EgdConflict(e) => write!(f, "EGD conflict: {}", e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Error for ChaseError {
|
|
||||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
|
||||||
match self {
|
|
||||||
ChaseError::EgdConflict(e) => Some(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MergeError> for ChaseError {
|
|
||||||
fn from(e: MergeError) -> Self {
|
|
||||||
ChaseError::EgdConflict(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Result of running the chase algorithm.
|
/// Result of running the chase algorithm.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -49,20 +17,6 @@ pub struct ChaseResult {
|
|||||||
pub steps: usize,
|
pub steps: usize,
|
||||||
/// Whether the chase terminated (vs hitting a limit).
|
/// Whether the chase terminated (vs hitting a limit).
|
||||||
pub terminated: bool,
|
pub terminated: bool,
|
||||||
/// Error that occurred during chase (e.g., EGD conflict).
|
|
||||||
pub error: Option<ChaseError>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The variant of chase algorithm to use.
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
||||||
pub enum ChaseVariant {
|
|
||||||
/// Standard chase: no trigger tracking, terminates only when no new facts.
|
|
||||||
/// May re-apply rules with the same bindings if heads are satisfied.
|
|
||||||
Standard,
|
|
||||||
/// Restricted chase: uses trigger tracking to avoid re-applying the same
|
|
||||||
/// rule with the same frontier variable bindings. This is the default.
|
|
||||||
#[default]
|
|
||||||
Restricted,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration for the chase algorithm.
|
/// Configuration for the chase algorithm.
|
||||||
@ -70,16 +24,11 @@ pub enum ChaseVariant {
|
|||||||
pub struct ChaseConfig {
|
pub struct ChaseConfig {
|
||||||
/// Maximum number of chase steps before giving up.
|
/// Maximum number of chase steps before giving up.
|
||||||
pub max_steps: usize,
|
pub max_steps: usize,
|
||||||
/// The chase variant to use.
|
|
||||||
pub variant: ChaseVariant,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ChaseConfig {
|
impl Default for ChaseConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
ChaseConfig {
|
ChaseConfig { max_steps: 10_000 }
|
||||||
max_steps: 10_000,
|
|
||||||
variant: ChaseVariant::default(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,7 +79,7 @@ impl Trigger {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the restricted chase algorithm (default).
|
/// Run the standard chase algorithm.
|
||||||
///
|
///
|
||||||
/// The chase repeatedly applies rules to derive new facts until no more
|
/// The chase repeatedly applies rules to derive new facts until no more
|
||||||
/// facts can be derived (fixpoint) or a limit is reached.
|
/// facts can be derived (fixpoint) or a limit is reached.
|
||||||
@ -142,22 +91,6 @@ pub fn chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
|||||||
chase_with_config(instance, rules, ChaseConfig::default())
|
chase_with_config(instance, rules, ChaseConfig::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the standard chase algorithm (no trigger tracking).
|
|
||||||
///
|
|
||||||
/// The standard chase applies rules whenever the body matches, without
|
|
||||||
/// tracking which rule applications have been performed. It terminates
|
|
||||||
/// only when no new facts are derived.
|
|
||||||
///
|
|
||||||
/// This is simpler but may be less efficient for rules with existentials,
|
|
||||||
/// as it doesn't prevent re-checking already-satisfied rule applications.
|
|
||||||
pub fn standard_chase(instance: Instance, rules: &[Rule]) -> ChaseResult {
|
|
||||||
let config = ChaseConfig {
|
|
||||||
variant: ChaseVariant::Standard,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
chase_with_config(instance, rules, config)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run the chase with custom configuration.
|
/// Run the chase with custom configuration.
|
||||||
pub fn chase_with_config(
|
pub fn chase_with_config(
|
||||||
mut instance: Instance,
|
mut instance: Instance,
|
||||||
@ -174,16 +107,10 @@ pub fn chase_with_config(
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: false,
|
terminated: false,
|
||||||
error: None,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_facts = match config.variant {
|
let new_facts = chase_step(&instance, rules, &mut null_gen, &mut applied_triggers);
|
||||||
ChaseVariant::Standard => standard_chase_step(&instance, rules, &mut null_gen),
|
|
||||||
ChaseVariant::Restricted => {
|
|
||||||
restricted_chase_step(&instance, rules, &mut null_gen, &mut applied_triggers)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if new_facts.is_empty() {
|
if new_facts.is_empty() {
|
||||||
// Fixpoint reached
|
// Fixpoint reached
|
||||||
@ -191,7 +118,6 @@ pub fn chase_with_config(
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: true,
|
terminated: true,
|
||||||
error: None,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,40 +128,8 @@ pub fn chase_with_config(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform a single standard chase step: apply rules without trigger tracking.
|
/// Perform a single chase step: find all applicable rule instances and derive new facts.
|
||||||
fn standard_chase_step(
|
fn chase_step(
|
||||||
instance: &Instance,
|
|
||||||
rules: &[Rule],
|
|
||||||
null_gen: &mut NullGenerator,
|
|
||||||
) -> Vec<Atom> {
|
|
||||||
let mut new_facts = Vec::new();
|
|
||||||
|
|
||||||
for rule in rules {
|
|
||||||
// Find all ways to match the rule body against the instance
|
|
||||||
let matches = find_body_matches(instance, &rule.body);
|
|
||||||
|
|
||||||
for subst in matches {
|
|
||||||
// In standard chase, we only check if head is satisfied
|
|
||||||
if head_is_satisfied(instance, rule, &subst) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate head atoms with this substitution
|
|
||||||
let derived = apply_rule_head(rule, &subst, null_gen);
|
|
||||||
|
|
||||||
for fact in derived {
|
|
||||||
if !instance.contains(&fact) {
|
|
||||||
new_facts.push(fact);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
new_facts
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Perform a single restricted chase step: use trigger tracking to avoid redundant applications.
|
|
||||||
fn restricted_chase_step(
|
|
||||||
instance: &Instance,
|
instance: &Instance,
|
||||||
rules: &[Rule],
|
rules: &[Rule],
|
||||||
null_gen: &mut NullGenerator,
|
null_gen: &mut NullGenerator,
|
||||||
@ -292,8 +186,8 @@ fn find_body_matches(instance: &Instance, body: &[Atom]) -> Vec<Substitution> {
|
|||||||
// Apply current substitution to the body atom
|
// Apply current substitution to the body atom
|
||||||
let pattern = subst.apply_atom(body_atom);
|
let pattern = subst.apply_atom(body_atom);
|
||||||
|
|
||||||
// Find all facts that match this pattern using indexed lookup
|
// Find all facts that match this pattern
|
||||||
for fact in instance.facts_matching_pattern(&pattern) {
|
for fact in instance.facts_for_predicate(&pattern.predicate) {
|
||||||
if let Some(new_subst) = unify_atom(&pattern, fact) {
|
if let Some(new_subst) = unify_atom(&pattern, fact) {
|
||||||
// Combine substitutions
|
// Combine substitutions
|
||||||
let mut combined = subst.clone();
|
let mut combined = subst.clone();
|
||||||
@ -365,145 +259,6 @@ fn term_null_id(term: &Term) -> Option<usize> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A trigger for EGD applications, tracking which EGD was applied with which body bindings.
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
||||||
struct EgdTrigger {
|
|
||||||
egd_index: usize,
|
|
||||||
/// All body variable bindings for the EGD.
|
|
||||||
body_bindings: Vec<(String, Term)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EgdTrigger {
|
|
||||||
fn new(egd_index: usize, egd: &Egd, subst: &Substitution) -> Self {
|
|
||||||
let body_vars = egd.body_variables();
|
|
||||||
let mut bindings: Vec<_> = body_vars
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|v| subst.get(&v).map(|t| (v, t.clone())))
|
|
||||||
.collect();
|
|
||||||
bindings.sort_by(|a, b| a.0.cmp(&b.0));
|
|
||||||
|
|
||||||
EgdTrigger {
|
|
||||||
egd_index,
|
|
||||||
body_bindings: bindings,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Apply EGDs to the instance, returning any merge error that occurs.
|
|
||||||
fn apply_egds(
|
|
||||||
instance: &Instance,
|
|
||||||
egds: &[Egd],
|
|
||||||
uf: &mut UnionFind,
|
|
||||||
applied_egd_triggers: &mut HashSet<EgdTrigger>,
|
|
||||||
) -> Result<bool, MergeError> {
|
|
||||||
let mut made_changes = false;
|
|
||||||
|
|
||||||
for (egd_idx, egd) in egds.iter().enumerate() {
|
|
||||||
let matches = find_body_matches(instance, &egd.body);
|
|
||||||
|
|
||||||
for subst in matches {
|
|
||||||
let trigger = EgdTrigger::new(egd_idx, egd, &subst);
|
|
||||||
|
|
||||||
if applied_egd_triggers.contains(&trigger) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
applied_egd_triggers.insert(trigger);
|
|
||||||
|
|
||||||
// Get the terms that should be equal
|
|
||||||
let left = subst.apply_term(&egd.equality.left);
|
|
||||||
let right = subst.apply_term(&egd.equality.right);
|
|
||||||
|
|
||||||
// Merge them in the union-find
|
|
||||||
uf.merge(&left, &right)?;
|
|
||||||
made_changes = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(made_changes)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run chase with both TGDs and EGDs.
|
|
||||||
///
|
|
||||||
/// The chase alternates between:
|
|
||||||
/// 1. Applying TGDs to derive new facts
|
|
||||||
/// 2. Applying EGDs to enforce equalities
|
|
||||||
/// 3. Canonicalizing the instance to reflect merged nulls
|
|
||||||
///
|
|
||||||
/// Terminates when fixpoint is reached or an error occurs (EGD conflict).
|
|
||||||
pub fn chase_with_egds(instance: Instance, tgds: &[Rule], egds: &[Egd]) -> ChaseResult {
|
|
||||||
chase_full(instance, tgds, egds, ChaseConfig::default())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run the full chase with TGDs, EGDs, and custom configuration.
|
|
||||||
pub fn chase_full(
|
|
||||||
mut instance: Instance,
|
|
||||||
tgds: &[Rule],
|
|
||||||
egds: &[Egd],
|
|
||||||
config: ChaseConfig,
|
|
||||||
) -> ChaseResult {
|
|
||||||
let mut null_gen = NullGenerator::seeded_from(&instance, tgds);
|
|
||||||
let mut applied_triggers: HashSet<Trigger> = HashSet::new();
|
|
||||||
let mut applied_egd_triggers: HashSet<EgdTrigger> = HashSet::new();
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
let mut steps = 0;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
if steps >= config.max_steps {
|
|
||||||
return ChaseResult {
|
|
||||||
instance,
|
|
||||||
steps,
|
|
||||||
terminated: false,
|
|
||||||
error: None,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply TGDs
|
|
||||||
let new_facts = match config.variant {
|
|
||||||
ChaseVariant::Standard => standard_chase_step(&instance, tgds, &mut null_gen),
|
|
||||||
ChaseVariant::Restricted => {
|
|
||||||
restricted_chase_step(&instance, tgds, &mut null_gen, &mut applied_triggers)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let tgd_changes = !new_facts.is_empty();
|
|
||||||
for fact in new_facts {
|
|
||||||
instance.add(fact);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply EGDs
|
|
||||||
let egd_result = apply_egds(&instance, egds, &mut uf, &mut applied_egd_triggers);
|
|
||||||
|
|
||||||
match egd_result {
|
|
||||||
Err(e) => {
|
|
||||||
return ChaseResult {
|
|
||||||
instance,
|
|
||||||
steps,
|
|
||||||
terminated: false,
|
|
||||||
error: Some(ChaseError::EgdConflict(e)),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
Ok(egd_changes) => {
|
|
||||||
// Canonicalize instance if EGDs made changes
|
|
||||||
if egd_changes {
|
|
||||||
instance = instance.canonicalize(&mut uf);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for fixpoint
|
|
||||||
if !tgd_changes && !egd_changes {
|
|
||||||
return ChaseResult {
|
|
||||||
instance,
|
|
||||||
steps,
|
|
||||||
terminated: true,
|
|
||||||
error: None,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
steps += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -644,286 +399,4 @@ mod tests {
|
|||||||
assert!(result.terminated);
|
assert!(result.terminated);
|
||||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_standard_chase_basic() {
|
|
||||||
// Test standard chase with datalog rules
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("alice"), Term::constant("bob")],
|
|
||||||
),
|
|
||||||
Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("bob"), Term::constant("carol")],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let rule1 = RuleBuilder::new()
|
|
||||||
.when("Parent", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.then("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let rule2 = RuleBuilder::new()
|
|
||||||
.when("Ancestor", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("Parent", vec![Term::var("Y"), Term::var("Z")])
|
|
||||||
.then("Ancestor", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = standard_chase(instance, &[rule1, rule2]);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
let ancestors = result.instance.facts_for_predicate("Ancestor");
|
|
||||||
assert_eq!(ancestors.len(), 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_standard_and_restricted_reach_same_fixpoint() {
|
|
||||||
// For datalog rules (no existentials), both should reach the same result
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]),
|
|
||||||
Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]),
|
|
||||||
Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let rule1 = RuleBuilder::new()
|
|
||||||
.when("Edge", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.then("Path", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let rule2 = RuleBuilder::new()
|
|
||||||
.when("Path", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("Edge", vec![Term::var("Y"), Term::var("Z")])
|
|
||||||
.then("Path", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let rules = vec![rule1, rule2];
|
|
||||||
|
|
||||||
let standard_result = standard_chase(instance.clone(), &rules);
|
|
||||||
let restricted_result = chase(instance, &rules);
|
|
||||||
|
|
||||||
assert!(standard_result.terminated);
|
|
||||||
assert!(restricted_result.terminated);
|
|
||||||
|
|
||||||
// Both should derive the same paths
|
|
||||||
let standard_paths = standard_result.instance.facts_for_predicate("Path");
|
|
||||||
let restricted_paths = restricted_result.instance.facts_for_predicate("Path");
|
|
||||||
assert_eq!(standard_paths.len(), restricted_paths.len());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_standard_chase_with_existentials() {
|
|
||||||
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let rule = RuleBuilder::new()
|
|
||||||
.when("Person", vec![Term::var("X")])
|
|
||||||
.then("HasSSN", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = standard_chase(instance, &[rule]);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
let has_ssn = result.instance.facts_for_predicate("HasSSN");
|
|
||||||
assert_eq!(has_ssn.len(), 1);
|
|
||||||
assert!(matches!(has_ssn[0].terms[1], Term::Null(_)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chase_variant_config() {
|
|
||||||
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let rule = RuleBuilder::new()
|
|
||||||
.when("A", vec![Term::var("X")])
|
|
||||||
.then("B", vec![Term::var("X")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// Test standard variant via config
|
|
||||||
let standard_config = ChaseConfig {
|
|
||||||
variant: ChaseVariant::Standard,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let standard_result = chase_with_config(instance.clone(), &[rule.clone()], standard_config);
|
|
||||||
|
|
||||||
// Test restricted variant via config
|
|
||||||
let restricted_config = ChaseConfig {
|
|
||||||
variant: ChaseVariant::Restricted,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let restricted_result = chase_with_config(instance, &[rule], restricted_config);
|
|
||||||
|
|
||||||
assert!(standard_result.terminated);
|
|
||||||
assert!(restricted_result.terminated);
|
|
||||||
assert_eq!(
|
|
||||||
standard_result.instance.facts_for_predicate("B").len(),
|
|
||||||
restricted_result.instance.facts_for_predicate("B").len()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// EGD tests
|
|
||||||
use crate::chase::rule::EgdBuilder;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_egd_functional_dependency_merges_nulls() {
|
|
||||||
// Functional dependency: R(X, Y), R(X, Z) -> Y = Z
|
|
||||||
// This should merge the nulls created for the same X value
|
|
||||||
let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// TGD: Person(X) -> R(X, Y)
|
|
||||||
let tgd = RuleBuilder::new()
|
|
||||||
.when("Person", vec![Term::var("X")])
|
|
||||||
.then("R", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// First run without EGD to create two separate facts with nulls
|
|
||||||
// We simulate this by having two different TGDs create facts
|
|
||||||
let tgd2 = RuleBuilder::new()
|
|
||||||
.when("Person", vec![Term::var("X")])
|
|
||||||
.then("S", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// EGD: R(X, Y), S(X, Z) -> Y = Z
|
|
||||||
let egd = EgdBuilder::new()
|
|
||||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("S", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = chase_with_egds(instance, &[tgd, tgd2], &[egd]);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
assert!(result.error.is_none());
|
|
||||||
|
|
||||||
// After EGD application, the nulls should be merged
|
|
||||||
let r_facts = result.instance.facts_for_predicate("R");
|
|
||||||
let s_facts = result.instance.facts_for_predicate("S");
|
|
||||||
assert_eq!(r_facts.len(), 1);
|
|
||||||
assert_eq!(s_facts.len(), 1);
|
|
||||||
|
|
||||||
// The second argument of R and S should be the same (merged)
|
|
||||||
assert_eq!(r_facts[0].terms[1], s_facts[0].terms[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_egd_conflict_different_constants() {
|
|
||||||
// EGD that tries to equate different constants should fail
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
|
||||||
Atom::new("R", vec![Term::constant("x"), Term::constant("bob")]),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
|
||||||
// This should fail because alice != bob
|
|
||||||
let egd = EgdBuilder::new()
|
|
||||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("R", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = chase_with_egds(instance, &[], &[egd]);
|
|
||||||
|
|
||||||
assert!(!result.terminated);
|
|
||||||
assert!(result.error.is_some());
|
|
||||||
assert!(matches!(result.error, Some(ChaseError::EgdConflict(_))));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_egd_null_merged_with_constant() {
|
|
||||||
// EGD that merges a null with a constant
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new("R", vec![Term::constant("x"), Term::Null(0)]),
|
|
||||||
Atom::new("R", vec![Term::constant("x"), Term::constant("alice")]),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// EGD: R(X, Y), R(X, Z) -> Y = Z
|
|
||||||
let egd = EgdBuilder::new()
|
|
||||||
.when("R", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("R", vec![Term::var("X"), Term::var("Z")])
|
|
||||||
.then_equal(Term::var("Y"), Term::var("Z"))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = chase_with_egds(instance, &[], &[egd]);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
assert!(result.error.is_none());
|
|
||||||
|
|
||||||
// After canonicalization, there should be only one R fact
|
|
||||||
// with "alice" as the second argument
|
|
||||||
let r_facts = result.instance.facts_for_predicate("R");
|
|
||||||
assert_eq!(r_facts.len(), 1);
|
|
||||||
assert_eq!(r_facts[0].terms[1], Term::constant("alice"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tgd_and_egd_interaction() {
|
|
||||||
// TGD creates facts, EGD enforces constraints
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new("Person", vec![Term::constant("alice")]),
|
|
||||||
Atom::new("Person", vec![Term::constant("bob")]),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// TGD: Person(X) -> HasManager(X, Y)
|
|
||||||
let tgd = RuleBuilder::new()
|
|
||||||
.when("Person", vec![Term::var("X")])
|
|
||||||
.then("HasManager", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
// EGD: HasManager(X, Y), HasManager(Z, W) -> Y = W
|
|
||||||
// Everyone has the same manager
|
|
||||||
let egd = EgdBuilder::new()
|
|
||||||
.when("HasManager", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.when("HasManager", vec![Term::var("Z"), Term::var("W")])
|
|
||||||
.then_equal(Term::var("Y"), Term::var("W"))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let result = chase_with_egds(instance, &[tgd], &[egd]);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
assert!(result.error.is_none());
|
|
||||||
|
|
||||||
let manager_facts = result.instance.facts_for_predicate("HasManager");
|
|
||||||
assert_eq!(manager_facts.len(), 2);
|
|
||||||
|
|
||||||
// Both should have the same manager (second argument)
|
|
||||||
let managers: Vec<_> = manager_facts.iter().map(|f| &f.terms[1]).collect();
|
|
||||||
assert_eq!(managers[0], managers[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chase_full_with_custom_config() {
|
|
||||||
let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let tgd = RuleBuilder::new()
|
|
||||||
.when("A", vec![Term::var("X")])
|
|
||||||
.then("B", vec![Term::var("X"), Term::var("Y")])
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let config = ChaseConfig {
|
|
||||||
max_steps: 100,
|
|
||||||
variant: ChaseVariant::Standard,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = chase_full(instance, &[tgd], &[], config);
|
|
||||||
|
|
||||||
assert!(result.terminated);
|
|
||||||
assert!(result.error.is_none());
|
|
||||||
assert_eq!(result.instance.facts_for_predicate("B").len(), 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,8 +5,6 @@ use std::error::Error;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use super::atom::Atom;
|
use super::atom::Atom;
|
||||||
use super::term::Term;
|
|
||||||
use super::union_find::UnionFind;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum InstanceError {
|
pub enum InstanceError {
|
||||||
@ -25,30 +23,10 @@ impl fmt::Display for InstanceError {
|
|||||||
|
|
||||||
impl Error for InstanceError {}
|
impl Error for InstanceError {}
|
||||||
|
|
||||||
/// A key for argument-position indexing: (predicate, position, value).
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
||||||
struct IndexKey {
|
|
||||||
predicate: String,
|
|
||||||
position: usize,
|
|
||||||
value: Term,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexKey {
|
|
||||||
fn new(predicate: &str, position: usize, value: &Term) -> Self {
|
|
||||||
IndexKey {
|
|
||||||
predicate: predicate.to_string(),
|
|
||||||
position,
|
|
||||||
value: value.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A database instance containing ground atoms.
|
/// A database instance containing ground atoms.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct Instance {
|
pub struct Instance {
|
||||||
facts_by_predicate: HashMap<String, HashSet<Atom>>,
|
facts_by_predicate: HashMap<String, HashSet<Atom>>,
|
||||||
/// Secondary index: maps (predicate, position, value) to atoms with that value at that position.
|
|
||||||
argument_index: HashMap<IndexKey, HashSet<Atom>>,
|
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,7 +35,6 @@ impl Instance {
|
|||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Instance {
|
Instance {
|
||||||
facts_by_predicate: HashMap::new(),
|
facts_by_predicate: HashMap::new(),
|
||||||
argument_index: HashMap::new(),
|
|
||||||
len: 0,
|
len: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -72,16 +49,8 @@ impl Instance {
|
|||||||
.facts_by_predicate
|
.facts_by_predicate
|
||||||
.entry(fact.predicate.clone())
|
.entry(fact.predicate.clone())
|
||||||
.or_default();
|
.or_default();
|
||||||
let inserted = bucket.insert(fact.clone());
|
let inserted = bucket.insert(fact);
|
||||||
if inserted {
|
if inserted {
|
||||||
// Populate argument indexes
|
|
||||||
for (position, term) in fact.terms.iter().enumerate() {
|
|
||||||
let key = IndexKey::new(&fact.predicate, position, term);
|
|
||||||
self.argument_index
|
|
||||||
.entry(key)
|
|
||||||
.or_default()
|
|
||||||
.insert(fact.clone());
|
|
||||||
}
|
|
||||||
self.len += 1;
|
self.len += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,9 +59,7 @@ impl Instance {
|
|||||||
|
|
||||||
/// Add a fact to the instance. Returns true if the fact was new.
|
/// Add a fact to the instance. Returns true if the fact was new.
|
||||||
///
|
///
|
||||||
/// # Panics
|
|
||||||
/// Panics if the atom is not ground.
|
/// Panics if the atom is not ground.
|
||||||
#[allow(clippy::expect_used)]
|
|
||||||
pub fn add(&mut self, fact: Atom) -> bool {
|
pub fn add(&mut self, fact: Atom) -> bool {
|
||||||
self.try_add(fact).expect("facts must be ground atoms")
|
self.try_add(fact).expect("facts must be ground atoms")
|
||||||
}
|
}
|
||||||
@ -129,75 +96,6 @@ impl Instance {
|
|||||||
.flat_map(|facts| facts.iter())
|
.flat_map(|facts| facts.iter())
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get facts matching a pattern, using argument indexes for efficient lookup.
|
|
||||||
///
|
|
||||||
/// The pattern may contain variables (which match anything) and ground terms
|
|
||||||
/// (which must match exactly). This method uses a selectivity heuristic to
|
|
||||||
/// choose the most selective index for lookup.
|
|
||||||
pub fn facts_matching_pattern(&self, pattern: &Atom) -> Vec<&Atom> {
|
|
||||||
// Find bound (ground) positions and their selectivity
|
|
||||||
let bound_positions: Vec<(usize, &Term)> = pattern
|
|
||||||
.terms
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter(|(_, term)| term.is_ground())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if bound_positions.is_empty() {
|
|
||||||
// No bound arguments, fall back to predicate lookup
|
|
||||||
return self.facts_for_predicate(&pattern.predicate);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use selectivity heuristic: pick the bound position with the smallest bucket
|
|
||||||
let mut best_key: Option<IndexKey> = None;
|
|
||||||
let mut best_size = usize::MAX;
|
|
||||||
|
|
||||||
for (position, term) in &bound_positions {
|
|
||||||
let key = IndexKey::new(&pattern.predicate, *position, term);
|
|
||||||
let size = self.argument_index.get(&key).map(|s| s.len()).unwrap_or(0);
|
|
||||||
|
|
||||||
if size < best_size {
|
|
||||||
best_size = size;
|
|
||||||
best_key = Some(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If best bucket is empty, return empty
|
|
||||||
let Some(key) = best_key else {
|
|
||||||
return Vec::new();
|
|
||||||
};
|
|
||||||
|
|
||||||
let Some(candidates) = self.argument_index.get(&key) else {
|
|
||||||
return Vec::new();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Filter candidates by arity and all other bound positions
|
|
||||||
let pattern_arity = pattern.arity();
|
|
||||||
candidates
|
|
||||||
.iter()
|
|
||||||
.filter(|fact| {
|
|
||||||
fact.arity() == pattern_arity
|
|
||||||
&& bound_positions
|
|
||||||
.iter()
|
|
||||||
.all(|(pos, term)| &fact.terms[*pos] == *term)
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new instance with all atoms canonicalized according to the union-find.
|
|
||||||
///
|
|
||||||
/// This replaces all nulls with their canonical representatives and removes
|
|
||||||
/// duplicate atoms that result from canonicalization.
|
|
||||||
pub fn canonicalize(&self, uf: &mut UnionFind) -> Instance {
|
|
||||||
let mut new_instance = Instance::new();
|
|
||||||
for atom in self.iter() {
|
|
||||||
let canonical = uf.canonicalize_atom(atom);
|
|
||||||
// try_add handles deduplication
|
|
||||||
let _ = new_instance.try_add(canonical);
|
|
||||||
}
|
|
||||||
new_instance
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Instance {
|
impl fmt::Display for Instance {
|
||||||
@ -273,77 +171,4 @@ mod tests {
|
|||||||
assert!(matches!(error, InstanceError::NonGroundFact(_)));
|
assert!(matches!(error, InstanceError::NonGroundFact(_)));
|
||||||
assert!(instance.is_empty());
|
assert!(instance.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_facts_matching_pattern_with_bound_arg() {
|
|
||||||
let instance: Instance = vec![
|
|
||||||
Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("alice"), Term::constant("bob")],
|
|
||||||
),
|
|
||||||
Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("alice"), Term::constant("carol")],
|
|
||||||
),
|
|
||||||
Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("bob"), Term::constant("carol")],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Pattern with first argument bound to "alice"
|
|
||||||
let pattern = Atom::new("Parent", vec![Term::constant("alice"), Term::var("Y")]);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 2);
|
|
||||||
|
|
||||||
// Pattern with second argument bound to "carol"
|
|
||||||
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("carol")]);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 2);
|
|
||||||
|
|
||||||
// Pattern with both arguments bound
|
|
||||||
let pattern = Atom::new(
|
|
||||||
"Parent",
|
|
||||||
vec![Term::constant("alice"), Term::constant("bob")],
|
|
||||||
);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 1);
|
|
||||||
|
|
||||||
// Pattern with no bound arguments (all variables)
|
|
||||||
let pattern = Atom::new("Parent", vec![Term::var("X"), Term::var("Y")]);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 3);
|
|
||||||
|
|
||||||
// Pattern with non-matching bound argument
|
|
||||||
let pattern = Atom::new("Parent", vec![Term::constant("dave"), Term::var("Y")]);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_facts_matching_pattern_selectivity() {
|
|
||||||
// Create instance with skewed distribution
|
|
||||||
let mut instance = Instance::new();
|
|
||||||
|
|
||||||
// Many facts with first arg "common"
|
|
||||||
for i in 0..100 {
|
|
||||||
instance.add(Atom::new(
|
|
||||||
"R",
|
|
||||||
vec![Term::constant("common"), Term::constant(format!("v{}", i))],
|
|
||||||
));
|
|
||||||
}
|
|
||||||
// One fact with first arg "rare"
|
|
||||||
instance.add(Atom::new(
|
|
||||||
"R",
|
|
||||||
vec![Term::constant("rare"), Term::constant("x")],
|
|
||||||
));
|
|
||||||
|
|
||||||
// Pattern matching on "rare" should use more selective index
|
|
||||||
let pattern = Atom::new("R", vec![Term::constant("rare"), Term::var("Y")]);
|
|
||||||
let matches = instance.facts_matching_pattern(&pattern);
|
|
||||||
assert_eq!(matches.len(), 1);
|
|
||||||
assert_eq!(matches[0].terms[0], Term::constant("rare"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,17 +5,12 @@ pub mod instance;
|
|||||||
pub mod rule;
|
pub mod rule;
|
||||||
pub mod substitution;
|
pub mod substitution;
|
||||||
pub mod term;
|
pub mod term;
|
||||||
pub mod union_find;
|
|
||||||
|
|
||||||
mod engine;
|
mod engine;
|
||||||
|
|
||||||
pub use atom::Atom;
|
pub use atom::Atom;
|
||||||
pub use engine::{
|
pub use engine::{ChaseConfig, ChaseResult, chase, chase_with_config};
|
||||||
ChaseConfig, ChaseError, ChaseResult, ChaseVariant, chase, chase_full, chase_with_config,
|
|
||||||
chase_with_egds, standard_chase,
|
|
||||||
};
|
|
||||||
pub use instance::{Instance, InstanceError};
|
pub use instance::{Instance, InstanceError};
|
||||||
pub use rule::{Egd, EgdBuilder, Equality, Rule, RuleBuilder};
|
pub use rule::Rule;
|
||||||
pub use substitution::Substitution;
|
pub use substitution::Substitution;
|
||||||
pub use term::Term;
|
pub use term::Term;
|
||||||
pub use union_find::{MergeError, UnionFind};
|
|
||||||
|
|||||||
@ -121,125 +121,6 @@ impl Default for RuleBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An equality constraint between two terms.
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct Equality {
|
|
||||||
pub left: Term,
|
|
||||||
pub right: Term,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Equality {
|
|
||||||
/// Create a new equality constraint.
|
|
||||||
pub fn new(left: Term, right: Term) -> Self {
|
|
||||||
Equality { left, right }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get all variables in this equality.
|
|
||||||
pub fn variables(&self) -> Vec<&String> {
|
|
||||||
let mut vars = Vec::new();
|
|
||||||
if let Term::Variable(v) = &self.left {
|
|
||||||
vars.push(v);
|
|
||||||
}
|
|
||||||
if let Term::Variable(v) = &self.right {
|
|
||||||
vars.push(v);
|
|
||||||
}
|
|
||||||
vars
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for Equality {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
write!(f, "{} = {}", self.left, self.right)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An equality-generating dependency (EGD).
|
|
||||||
///
|
|
||||||
/// EGDs have the form: body -> X = Y
|
|
||||||
/// When the body matches, the equality X = Y is enforced.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Egd {
|
|
||||||
/// The body atoms (conjunction).
|
|
||||||
pub body: Vec<Atom>,
|
|
||||||
/// The equality to enforce.
|
|
||||||
pub equality: Equality,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Egd {
|
|
||||||
/// Create a new EGD.
|
|
||||||
pub fn new(body: Vec<Atom>, equality: Equality) -> Self {
|
|
||||||
Egd { body, equality }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get all variables appearing in the body.
|
|
||||||
pub fn body_variables(&self) -> HashSet<String> {
|
|
||||||
self.body
|
|
||||||
.iter()
|
|
||||||
.flat_map(|a| a.variables())
|
|
||||||
.cloned()
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get all variables appearing in the equality.
|
|
||||||
pub fn equality_variables(&self) -> HashSet<String> {
|
|
||||||
self.equality.variables().into_iter().cloned().collect()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for Egd {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
for (i, atom) in self.body.iter().enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
write!(f, "{}", atom)?;
|
|
||||||
}
|
|
||||||
write!(f, " → {}", self.equality)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Builder for creating EGDs with a fluent API.
|
|
||||||
pub struct EgdBuilder {
|
|
||||||
body: Vec<Atom>,
|
|
||||||
equality: Option<Equality>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EgdBuilder {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
EgdBuilder {
|
|
||||||
body: Vec::new(),
|
|
||||||
equality: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add an atom to the body.
|
|
||||||
pub fn when(mut self, predicate: &str, terms: Vec<Term>) -> Self {
|
|
||||||
self.body.push(Atom::new(predicate, terms));
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the equality to enforce.
|
|
||||||
pub fn then_equal(mut self, left: Term, right: Term) -> Self {
|
|
||||||
self.equality = Some(Equality::new(left, right));
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build the EGD.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
/// Panics if `then_equal()` was not called.
|
|
||||||
#[allow(clippy::expect_used)]
|
|
||||||
pub fn build(self) -> Egd {
|
|
||||||
Egd::new(self.body, self.equality.expect("EGD must have an equality"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for EgdBuilder {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@ -39,16 +39,6 @@ impl Term {
|
|||||||
pub fn is_ground(&self) -> bool {
|
pub fn is_ground(&self) -> bool {
|
||||||
!self.is_variable()
|
!self.is_variable()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if this term is a null.
|
|
||||||
pub fn is_null(&self) -> bool {
|
|
||||||
matches!(self, Term::Null(_))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if this term is a constant.
|
|
||||||
pub fn is_constant(&self) -> bool {
|
|
||||||
matches!(self, Term::Constant(_))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Term {
|
impl fmt::Display for Term {
|
||||||
@ -86,20 +76,4 @@ mod tests {
|
|||||||
assert!(!v.is_ground());
|
assert!(!v.is_ground());
|
||||||
assert!(v.is_variable());
|
assert!(v.is_variable());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_is_null_and_is_constant() {
|
|
||||||
let c = Term::constant("alice");
|
|
||||||
let n = Term::null(1);
|
|
||||||
let v = Term::var("X");
|
|
||||||
|
|
||||||
assert!(c.is_constant());
|
|
||||||
assert!(!c.is_null());
|
|
||||||
|
|
||||||
assert!(n.is_null());
|
|
||||||
assert!(!n.is_constant());
|
|
||||||
|
|
||||||
assert!(!v.is_constant());
|
|
||||||
assert!(!v.is_null());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,323 +0,0 @@
|
|||||||
//! Union-Find data structure for managing null equivalence classes in EGDs.
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::error::Error;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use super::atom::Atom;
|
|
||||||
use super::term::Term;
|
|
||||||
|
|
||||||
/// Error that occurs when merging two different constants.
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct MergeError {
|
|
||||||
pub left: Term,
|
|
||||||
pub right: Term,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for MergeError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"cannot merge different constants: {} and {}",
|
|
||||||
self.left, self.right
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Error for MergeError {}
|
|
||||||
|
|
||||||
/// Union-Find data structure for managing equivalence classes of terms.
|
|
||||||
///
|
|
||||||
/// Used by EGDs to track which nulls have been merged and what their
|
|
||||||
/// canonical representatives are. Constants are always their own canonical
|
|
||||||
/// form, and merging a null with a constant makes the constant the canonical
|
|
||||||
/// form for that equivalence class.
|
|
||||||
#[derive(Debug, Clone, Default)]
|
|
||||||
pub struct UnionFind {
|
|
||||||
/// Maps null IDs to their parent in the union-find tree.
|
|
||||||
parent: HashMap<usize, usize>,
|
|
||||||
/// Rank for union-by-rank optimization.
|
|
||||||
rank: HashMap<usize, usize>,
|
|
||||||
/// Maps null IDs to their canonical term (constant if merged with one).
|
|
||||||
canonical: HashMap<usize, Term>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl UnionFind {
|
|
||||||
/// Create a new empty union-find structure.
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self::default()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Find the representative for a null ID, with path compression.
|
|
||||||
pub fn find(&mut self, null_id: usize) -> usize {
|
|
||||||
// Ensure the null exists in our structure and get the parent
|
|
||||||
let parent = *self.parent.entry(null_id).or_insert_with(|| {
|
|
||||||
self.rank.insert(null_id, 0);
|
|
||||||
null_id
|
|
||||||
});
|
|
||||||
|
|
||||||
if parent != null_id {
|
|
||||||
// Path compression: make this node point directly to the root
|
|
||||||
let root = self.find(parent);
|
|
||||||
self.parent.insert(null_id, root);
|
|
||||||
root
|
|
||||||
} else {
|
|
||||||
null_id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the canonical term for a given term.
|
|
||||||
///
|
|
||||||
/// - Constants are always their own canonical form.
|
|
||||||
/// - Variables are always their own canonical form (shouldn't appear in instances).
|
|
||||||
/// - Nulls are mapped to their canonical representative (which may be a constant
|
|
||||||
/// if they were merged with one).
|
|
||||||
pub fn canonical_term(&mut self, term: &Term) -> Term {
|
|
||||||
match term {
|
|
||||||
Term::Constant(_) | Term::Variable(_) => term.clone(),
|
|
||||||
Term::Null(id) => {
|
|
||||||
let root = self.find(*id);
|
|
||||||
// Check if this equivalence class has a canonical constant
|
|
||||||
if let Some(canonical) = self.canonical.get(&root) {
|
|
||||||
canonical.clone()
|
|
||||||
} else {
|
|
||||||
Term::Null(root)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Canonicalize an atom by replacing all terms with their canonical forms.
|
|
||||||
pub fn canonicalize_atom(&mut self, atom: &Atom) -> Atom {
|
|
||||||
Atom::new(
|
|
||||||
atom.predicate.clone(),
|
|
||||||
atom.terms.iter().map(|t| self.canonical_term(t)).collect(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Merge two terms, returning an error if they are different constants.
|
|
||||||
///
|
|
||||||
/// The merge rules are:
|
|
||||||
/// - Null + Null: union the equivalence classes
|
|
||||||
/// - Null + Constant: the constant becomes the canonical form for the null's class
|
|
||||||
/// - Constant + Constant: error if different, no-op if same
|
|
||||||
pub fn merge(&mut self, left: &Term, right: &Term) -> Result<(), MergeError> {
|
|
||||||
match (left, right) {
|
|
||||||
(Term::Constant(c1), Term::Constant(c2)) => {
|
|
||||||
if c1 != c2 {
|
|
||||||
Err(MergeError {
|
|
||||||
left: left.clone(),
|
|
||||||
right: right.clone(),
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(Term::Null(id), Term::Constant(_)) => {
|
|
||||||
let root = self.find(*id);
|
|
||||||
// Check if this class already has a canonical constant
|
|
||||||
if let Some(existing) = self.canonical.get(&root) {
|
|
||||||
if existing != right {
|
|
||||||
return Err(MergeError {
|
|
||||||
left: existing.clone(),
|
|
||||||
right: right.clone(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.canonical.insert(root, right.clone());
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
(Term::Constant(_), Term::Null(id)) => {
|
|
||||||
let root = self.find(*id);
|
|
||||||
if let Some(existing) = self.canonical.get(&root) {
|
|
||||||
if existing != left {
|
|
||||||
return Err(MergeError {
|
|
||||||
left: left.clone(),
|
|
||||||
right: existing.clone(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.canonical.insert(root, left.clone());
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
(Term::Null(id1), Term::Null(id2)) => {
|
|
||||||
let root1 = self.find(*id1);
|
|
||||||
let root2 = self.find(*id2);
|
|
||||||
|
|
||||||
if root1 == root2 {
|
|
||||||
return Ok(()); // Already in the same class
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if both have canonical constants
|
|
||||||
let canon1 = self.canonical.get(&root1).cloned();
|
|
||||||
let canon2 = self.canonical.get(&root2).cloned();
|
|
||||||
|
|
||||||
match (&canon1, &canon2) {
|
|
||||||
(Some(c1), Some(c2)) if c1 != c2 => {
|
|
||||||
return Err(MergeError {
|
|
||||||
left: c1.clone(),
|
|
||||||
right: c2.clone(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Union by rank
|
|
||||||
let rank1 = *self.rank.get(&root1).unwrap_or(&0);
|
|
||||||
let rank2 = *self.rank.get(&root2).unwrap_or(&0);
|
|
||||||
|
|
||||||
let (new_root, child) = if rank1 < rank2 {
|
|
||||||
(root2, root1)
|
|
||||||
} else if rank1 > rank2 {
|
|
||||||
(root1, root2)
|
|
||||||
} else {
|
|
||||||
self.rank.insert(root1, rank1 + 1);
|
|
||||||
(root1, root2)
|
|
||||||
};
|
|
||||||
|
|
||||||
self.parent.insert(child, new_root);
|
|
||||||
|
|
||||||
// Propagate canonical constant to new root
|
|
||||||
let canonical = canon1.or(canon2);
|
|
||||||
if let Some(c) = canonical {
|
|
||||||
self.canonical.insert(new_root, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
// Variables shouldn't be merged (they shouldn't appear in instances)
|
|
||||||
(Term::Variable(_), _) | (_, Term::Variable(_)) => Ok(()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if two terms are in the same equivalence class.
|
|
||||||
pub fn are_equivalent(&mut self, left: &Term, right: &Term) -> bool {
|
|
||||||
self.canonical_term(left) == self.canonical_term(right)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_nulls() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Merge two nulls
|
|
||||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
|
||||||
|
|
||||||
// They should now have the same canonical form
|
|
||||||
let canon0 = uf.canonical_term(&Term::Null(0));
|
|
||||||
let canon1 = uf.canonical_term(&Term::Null(1));
|
|
||||||
assert_eq!(canon0, canon1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_null_with_constant() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Merge null with constant
|
|
||||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
|
||||||
|
|
||||||
// Null should now have the constant as canonical form
|
|
||||||
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("alice"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_transitive() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Merge: null0 = null1, null1 = constant
|
|
||||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
|
||||||
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
|
||||||
|
|
||||||
// Both nulls should have the constant as canonical
|
|
||||||
assert_eq!(uf.canonical_term(&Term::Null(0)), Term::constant("bob"));
|
|
||||||
assert_eq!(uf.canonical_term(&Term::Null(1)), Term::constant("bob"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_different_constants_fails() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Merging different constants should fail
|
|
||||||
let result = uf.merge(&Term::constant("alice"), &Term::constant("bob"));
|
|
||||||
assert!(result.is_err());
|
|
||||||
|
|
||||||
let err = result.unwrap_err();
|
|
||||||
assert_eq!(err.left, Term::constant("alice"));
|
|
||||||
assert_eq!(err.right, Term::constant("bob"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_same_constant_ok() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Merging the same constant should be fine
|
|
||||||
uf.merge(&Term::constant("alice"), &Term::constant("alice"))
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_merge_nulls_with_conflicting_constants() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// null0 = alice, null1 = bob
|
|
||||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
|
||||||
uf.merge(&Term::Null(1), &Term::constant("bob")).unwrap();
|
|
||||||
|
|
||||||
// Merging null0 and null1 should fail (alice != bob)
|
|
||||||
let result = uf.merge(&Term::Null(0), &Term::Null(1));
|
|
||||||
assert!(result.is_err());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_canonicalize_atom() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
uf.merge(&Term::Null(0), &Term::constant("alice")).unwrap();
|
|
||||||
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
|
||||||
|
|
||||||
let atom = Atom::new("R", vec![Term::Null(0), Term::Null(1), Term::constant("x")]);
|
|
||||||
|
|
||||||
let canonical = uf.canonicalize_atom(&atom);
|
|
||||||
|
|
||||||
assert_eq!(canonical.terms[0], Term::constant("alice"));
|
|
||||||
// Null(1) and Null(2) are equivalent but have no constant, so canonical is one of them
|
|
||||||
assert!(canonical.terms[1].is_null());
|
|
||||||
assert_eq!(canonical.terms[2], Term::constant("x"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_path_compression() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
// Create a chain: 0 -> 1 -> 2 -> 3
|
|
||||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
|
||||||
uf.merge(&Term::Null(1), &Term::Null(2)).unwrap();
|
|
||||||
uf.merge(&Term::Null(2), &Term::Null(3)).unwrap();
|
|
||||||
|
|
||||||
// Finding from 0 should compress the path
|
|
||||||
let root = uf.find(0);
|
|
||||||
|
|
||||||
// All should point to the same root
|
|
||||||
assert_eq!(uf.find(1), root);
|
|
||||||
assert_eq!(uf.find(2), root);
|
|
||||||
assert_eq!(uf.find(3), root);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_are_equivalent() {
|
|
||||||
let mut uf = UnionFind::new();
|
|
||||||
|
|
||||||
uf.merge(&Term::Null(0), &Term::Null(1)).unwrap();
|
|
||||||
|
|
||||||
assert!(uf.are_equivalent(&Term::Null(0), &Term::Null(1)));
|
|
||||||
assert!(!uf.are_equivalent(&Term::Null(0), &Term::Null(2)));
|
|
||||||
assert!(uf.are_equivalent(&Term::constant("a"), &Term::constant("a")));
|
|
||||||
assert!(!uf.are_equivalent(&Term::constant("a"), &Term::constant("b")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -86,7 +86,6 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: false,
|
terminated: false,
|
||||||
error: None,
|
|
||||||
},
|
},
|
||||||
provenance,
|
provenance,
|
||||||
};
|
};
|
||||||
@ -106,7 +105,6 @@ pub fn materialize(base_instance: Instance, rules: &[Rule]) -> MaterializedState
|
|||||||
instance,
|
instance,
|
||||||
steps,
|
steps,
|
||||||
terminated: true,
|
terminated: true,
|
||||||
error: None,
|
|
||||||
},
|
},
|
||||||
provenance,
|
provenance,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -3,7 +3,6 @@ pub mod frontend;
|
|||||||
|
|
||||||
// Re-export main types for convenience
|
// Re-export main types for convenience
|
||||||
pub use chase::{
|
pub use chase::{
|
||||||
Atom, ChaseConfig, ChaseError, ChaseResult, ChaseVariant, Egd, EgdBuilder, Equality, Instance,
|
Atom, ChaseConfig, ChaseResult, Instance, InstanceError, Rule, Substitution, Term, chase,
|
||||||
InstanceError, MergeError, Rule, RuleBuilder, Substitution, Term, UnionFind, chase, chase_full,
|
chase_with_config,
|
||||||
chase_with_config, chase_with_egds, standard_chase,
|
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user