From c53cc11e5f2195cdc74659f3893f8e06ae8b9f15 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Mon, 9 Mar 2026 09:59:10 +0100 Subject: [PATCH] WIP --- .editorconfig | 2 +- .gitignore | 1 + .pre-commit-config.yaml | 2 +- AGENTS.md | 180 ++++++++++ Cargo.toml | 36 +- Makefile | 16 +- README.md | 132 +++++--- assets/logos/README.md | 3 - assets/logos/corro.svg | 113 ------- assets/logos/cuddlyferris.svg | 52 --- assets/logos/rustacean-flat-gesture.svg | 58 ---- assets/logos/rustacean-flat-happy.svg | 53 --- assets/logos/rustacean-flat-noshadow.svg | 68 ---- assets/logos/rustacean-orig-noshadow.svg | 84 ----- assets/make_figures.sh | 12 - benches/project_benchmarks.rs | 8 - docs/README.md | 5 - examples/basic_usage.rs | 5 - src/chase/atom.rs | 91 +++++ src/chase/engine.rs | 376 +++++++++++++++++++++ src/chase/instance.rs | 122 +++++++ src/chase/mod.rs | 16 + src/chase/rule.rs | 170 ++++++++++ src/chase/substitution.rs | 147 ++++++++ src/chase/term.rs | 79 +++++ src/cli.rs | 34 -- src/lib.rs | 6 +- src/logging.rs | 17 - src/main.rs | 7 +- tests/integration_tests.rs | 183 ++++++++++ python/.gitkeep => tests/property_tests.rs | 0 tests/regression_tests.rs | 0 tests/testdata/README.md | 22 -- tests/testdata/check_datasets.sql | 11 - tests/testdata/download_datasets.sh | 20 -- 35 files changed, 1475 insertions(+), 656 deletions(-) create mode 100644 AGENTS.md delete mode 100644 assets/logos/README.md delete mode 100644 assets/logos/corro.svg delete mode 100644 assets/logos/cuddlyferris.svg delete mode 100644 assets/logos/rustacean-flat-gesture.svg delete mode 100644 assets/logos/rustacean-flat-happy.svg delete mode 100644 assets/logos/rustacean-flat-noshadow.svg delete mode 100644 assets/logos/rustacean-orig-noshadow.svg delete mode 100644 assets/make_figures.sh delete mode 100644 benches/project_benchmarks.rs delete mode 100644 docs/README.md delete mode 100644 examples/basic_usage.rs create mode 100644 src/chase/atom.rs create mode 100644 src/chase/engine.rs create mode 100644 src/chase/instance.rs create mode 100644 src/chase/mod.rs create mode 100644 src/chase/rule.rs create mode 100644 src/chase/substitution.rs create mode 100644 src/chase/term.rs delete mode 100644 src/cli.rs delete mode 100644 src/logging.rs rename python/.gitkeep => tests/property_tests.rs (100%) create mode 100644 tests/regression_tests.rs delete mode 100644 tests/testdata/README.md delete mode 100644 tests/testdata/check_datasets.sql delete mode 100644 tests/testdata/download_datasets.sh diff --git a/.editorconfig b/.editorconfig index f9a21c6..9ed9473 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true -[*.rs] +[*.{rs,py}] max_line_length = 100 [*.md] diff --git a/.gitignore b/.gitignore index 6095f34..f96ff5a 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,4 @@ Cargo.lock .DS_Store .benchmarks .env +.claude/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fc020bc..c8f7442 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ default_stages: [ pre-push ] fail_fast: false -exclude: '^(benches/|tests/)' +exclude: '^(benches/|tests/|examples/|docs/)' repos: - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5730c68 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,180 @@ +# AGENTS.md + +This file provides guidance to coding agents collaborating on this repository. + +## Mission + +chase-rs is an efficient implementation of the chase algorithm in Rust for advanced reasoning engines. +Priorities, in order: + +1. Correctness of reasoning (sound and complete chase). +2. Termination guarantees (restricted chase for existential rules). +3. Performance and scalability. +4. Clear, maintainable, idiomatic Rust code. + +## Core Rules + +- Use English for code, comments, docs, and tests. +- Keep all chase state inside well-defined structs; avoid global mutable state. +- Prefer small, focused changes over large refactoring. +- Add comments only when they clarify non-obvious behavior. +- Follow Rust idioms: use `Result` for errors, iterators over manual loops, etc. + +Quick examples: + +- Good: add a new chase variant by implementing a trait or strategy pattern. +- Bad: add global configuration that affects all chase instances. + +## Repository Layout + +- `src/`: core implementation. +- `src/chase/`: chase algorithm modules. + - `term.rs`: terms (constants, nulls, variables). + - `atom.rs`: atoms (predicate applied to terms). + - `instance.rs`: database instance (set of facts). + - `rule.rs`: TGDs (tuple-generating dependencies). + - `substitution.rs`: variable bindings and unification. + - `engine.rs`: core chase algorithm. +- `tests/`: integration, regression, and property-based tests. + +## Architecture Constraints + +- `Instance` holds the database state (set of ground atoms). +- `Rule` represents tuple-generating dependencies (TGDs). +- The chase engine is stateless; state is passed explicitly. +- New chase variants should be composable with existing infrastructure. +- Existential variables generate labeled nulls (`Term::Null`). + +## Rust Conventions + +- Target stable Rust (edition 2024, rust-version 1.92). +- Use `#[derive(...)]` for common traits where appropriate. +- Prefer `&str` over `String` in function parameters when ownership isn't needed. +- Use `impl Trait` for return types when the concrete type is an implementation detail. +- Run `cargo clippy` and address warnings before committing. + +## Required Validation + +Run these checks for any non-trivial change: + +1. `cargo test` (all unit and integration tests) +2. `cargo clippy` (lint checks) +3. `cargo fmt --check` (formatting) + +For performance-sensitive changes: + +1. Add benchmarks if they don't exist +2. Compare before/after performance + +## First Contribution Flow + +Use this sequence for your first change: + +1. Read `src/chase/mod.rs` and the relevant module files. +2. Implement the smallest possible code change. +3. Add or update tests that fail before and pass after. +4. Run `cargo test`. +5. Run `cargo clippy` and fix any warnings. +6. Update docs if public API behavior changed. + +Example scopes that are good first tasks: + +- Add tests for an edge case in unification. +- Implement a new utility method on `Instance` or `Atom`. +- Add support for equality-generating dependencies (EGDs). +- Improve error handling with proper `Result` types. + +## Testing Expectations + +- No chase logic change is complete without tests. +- Unit tests go in `#[cfg(test)] mod tests` within each module. +- Integration tests go in `tests/integration_tests.rs`. +- Regression tests for bug fixes go in `tests/regression_tests.rs`. +- Property-based tests go in `tests/property_tests.rs`. +- Do not merge code that breaks existing tests. + +Minimal unit-test checklist: + +1. Create an `Instance` with relevant facts. +2. Define rules using `RuleBuilder`. +3. Run `chase(instance, &rules)`. +4. Assert on `result.terminated`, `result.instance`, and derived facts. + +Example test skeleton: + +```rust +#[test] +fn test_example() { + let instance: Instance = vec![ + Atom::new("Pred", vec![Term::constant("a")]), + ].into_iter().collect(); + + let rule = RuleBuilder::new() + .when("Pred", vec![Term::var("X")]) + .then("Derived", vec![Term::var("X")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + assert_eq!(result.instance.facts_for_predicate("Derived").len(), 1); +} +``` + +## Change Design Checklist + +Before coding: + +1. Confirm whether the change affects chase semantics or termination. +2. Identify affected tests. +3. Consider impact on API stability. + +Before submitting: + +1. Verify `cargo test` passes. +2. Verify `cargo clippy` has no warnings. +3. Ensure tests were added/updated where relevant. + +## Review Guidelines (P0/P1 Focus) + +Review output should be concise and only include critical issues. + +- `P0`: must-fix defects (incorrect reasoning, non-termination, soundness bugs). +- `P1`: high-priority defects (likely functional bug, performance regression, API breakage). + +Do not include: + +- style-only nitpicks, +- praise/summary of what is already good, +- exhaustive restatement of the patch. + +Use this review format: + +1. `Severity` (`P0`/`P1`) +2. `File:line` +3. `Issue` +4. `Why it matters` +5. `Minimal fix direction` + +## Practical Notes for Agents + +- Prefer targeted edits over broad mechanical rewrites. +- If you detect contradictory repository conventions, follow existing code and update docs accordingly. +- When uncertain about correctness, add/extend tests first, then optimize. +- The chase algorithm has well-defined semantics; consult database theory literature if needed. + +## Commit and PR Hygiene + +- Keep commits scoped to one logical change. +- PR descriptions should include: + 1. behavioral change summary, + 2. tests added/updated, + 3. performance impact (if applicable), + 4. API changes (if any). + +Suggested PR checklist: + +- [ ] Tests added/updated for behavior changes +- [ ] `cargo test` passes +- [ ] `cargo clippy` has no warnings +- [ ] `cargo fmt` applied diff --git a/Cargo.toml b/Cargo.toml index e9f5af3..efb5bed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,35 +2,28 @@ name = "chase-rs" version = "0.1.0-alpha.1" description = "Implementation of chase algorithm in Rust" -repository = "https://github.com/habedi/template-rust-project" -license = "MIT OR Apache-2.0" +repository = "https://code.obsidian.systems/habedi-work/chase-rs" +license = "BSD-3" readme = "README.md" -keywords = ["project-template", "rust", "library", "application"] -authors = ["Hassan Abedi "] -homepage = "https://github.com/habedi/template-rust-project" -documentation = "https://docs.rs/template-rust-project" -categories = ["development-tools"] -edition = "2021" -rust-version = "1.83" +edition = "2024" +rust-version = "1.92" +publish = false resolver = "2" include = [ - "assets/**/*", - "docs/**/*", "src/**/*", "Cargo.toml", "README.md", - "LICENSE-MIT", - "LICENSE-APACHE" + "LICENSE", ] [lib] -name = "template_rust_project" +name = "chase_rs" path = "src/lib.rs" [[bin]] -name = "template-rust-project" +name = "chase-rs" path = "src/main.rs" [features] @@ -38,16 +31,6 @@ default = [] # No features enabled by default binaries = [] [dependencies] -ctor = "0.6.0" -tracing = "0.1.41" -tracing-subscriber = "0.3.19" - -[dev-dependencies] -criterion = { version = "0.7.0", features = ["html_reports"] } - -[[bench]] -name = "project_benchmarks" -harness = false [profile.release] strip = "debuginfo" @@ -55,9 +38,6 @@ panic = "unwind" codegen-units = 1 lto = true -[profile.bench] -debug = true - [profile.test] debug = true diff --git a/Makefile b/Makefile index a0ec859..2a85d23 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,5 @@ -# Load environment variables from .env file -ifneq (,$(wildcard ./.env)) - include .env - export $(shell sed 's/=.*//' .env) -else - $(warning .env file not found. Environment variables not loaded.) -endif - # Variables -PROJ_REPO = github.com/habedi/template-rust-project -BINARY_NAME := $(or $(PROJ_BINARY), $(notdir $(PROJ_REPO))) +BINARY_NAME := "chase-cli" BINARY := target/release/$(BINARY_NAME) PATH := /snap/bin:$(PATH) DEBUG_PROJ := 0 @@ -103,11 +94,6 @@ docs: format ## Generate the documentation @echo "Generating documentation..." @cargo doc --no-deps --document-private-items -.PHONE: figs -figs: ## Generate the figures in the assets directory - @echo "Generating figures..." - @$(SHELL) $(ASSET_DIR)/make_figures.sh $(ASSET_DIR) - .PHONY: fix-lint fix-lint: ## Fix the linter warnings @echo "Fixing linter warnings..." diff --git a/README.md b/README.md index 7605789..e5cb609 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,104 @@ -## A Template for Rust Projects +# chase-rs -
- - template-rust-project logo - -
-
+An efficient implementation of the chase algorithm in Rust for advanced reasoning engines. -[![Tests](https://img.shields.io/github/actions/workflow/status/habedi/template-rust-project/tests.yml?label=tests&style=flat&labelColor=282c34&color=4caf50&logo=github)](https://github.com/habedi/template-rust-project/actions/workflows/tests.yml) -[![Lints](https://img.shields.io/github/actions/workflow/status/habedi/template-rust-project/lints.yml?label=lints&style=flat&labelColor=282c34&color=4caf50&logo=github)](https://github.com/habedi/template-rust-project/actions/workflows/lints.yml) -[![Linux Build](https://img.shields.io/github/actions/workflow/status/habedi/template-rust-project/build_linux.yml?label=linux%20build&style=flat&labelColor=282c34&color=4caf50&logo=linux)](https://github.com/habedi/template-rust-project/actions/workflows/build_linux.yml) -[![Windows Build](https://img.shields.io/github/actions/workflow/status/habedi/template-rust-project/build_windows.yml?label=windows%20build&style=flat&labelColor=282c34&color=4caf50&logo=github)](https://github.com/habedi/template-rust-project/actions/workflows/build_windows.yml) -[![MacOS Build](https://img.shields.io/github/actions/workflow/status/habedi/template-rust-project/build_macos.yml?label=macos%20build&style=flat&labelColor=282c34&color=4caf50&logo=apple)](https://github.com/habedi/template-rust-project/actions/workflows/build_macos.yml) -
-[![Code Coverage](https://img.shields.io/codecov/c/github/habedi/template-rust-project?style=flat&labelColor=282c34&color=ffca28&logo=codecov)](https://codecov.io/gh/habedi/template-rust-project) -[![Code Quality](https://img.shields.io/codefactor/grade/github/habedi/template-rust-project?style=flat&labelColor=282c34&color=4caf50&logo=codefactor)](https://www.codefactor.io/repository/github/habedi/template-rust-project) -[![Crates.io](https://img.shields.io/crates/v/template-rust-project.svg?style=flat&labelColor=282c34&color=f46623&logo=rust)](https://crates.io/crates/template-rust-project) -[![Downloads](https://img.shields.io/crates/d/template-rust-project?style=flat&labelColor=282c34&color=4caf50&logo=rust)](https://crates.io/crates/template-rust-project) -[![Docs.rs](https://img.shields.io/badge/docs.rs-template--rust--project-66c2a5?style=flat&labelColor=282c34&logo=docs.rs)](https://docs.rs/template-rust-project) -
-[![Release](https://img.shields.io/github/release/habedi/template-rust-project.svg?style=flat&labelColor=282c34&color=f46623&logo=github)](https://github.com/habedi/template-rust-project/releases/latest) -[![Total Downloads](https://img.shields.io/github/downloads/habedi/template-rust-project/total.svg?style=flat&labelColor=282c34&color=8caf50&logo=github)](https://github.com/habedi/template-rust-project/releases) -[![Docs](https://img.shields.io/badge/docs-latest-007ec6?style=flat&labelColor=282c34&logo=readthedocs)](docs) -[![License](https://img.shields.io/badge/license-MIT%2FApache--2.0-007ec6?style=flat&labelColor=282c34&logo=open-source-initiative)](https://github.com/habedi/template-rust-project) -[![Status: Stable](https://img.shields.io/badge/status-stable-green.svg?style=flat&labelColor=282c34)](https://github.com/habedi/template-rust-project) +## Overview ---- +The chase algorithm is a fundamental technique in database theory and knowledge representation used for: -This is a template repository with a minimalistic structure to make it easier to start a new Rust project. -I share it here in case it might be useful to others. +- Query answering under tuple-generating dependencies (TGDs) +- Computing universal models +- Ontology-based data access (OBDA) +- Datalog with existential rules -### Features +This implementation provides a **restricted chase** that ensures termination even with existential rules by tracking applied triggers. -- Minimalistic project structure -- Pre-configured GitHub Actions for running tests and making releases for different platforms -- Makefile for managing common tasks such as formatting, testing, linting, and building -- Example configuration files for common tools like `rustfmt`, `clippy`, and `editorconfig` -- GitHub badges for tests, builds, code quality and coverage, documentation, etc. +## Features -### Contributing +- **Core Data Types**: Terms, Atoms, Rules, Instances +- **Existential Quantification**: Automatic generation of labeled nulls +- **Restricted Chase**: Termination guarantees via trigger tracking +- **Fluent API**: `RuleBuilder` for readable rule construction +- **Zero Dependencies**: Pure Rust with no external runtime dependencies -See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to make a contribution. +## Quick Start -### License +```rust +use chase_rs::{chase, Atom, Instance, Term}; +use chase_rs::chase::rule::RuleBuilder; -This project is licensed under either of these: +// Create initial facts +let instance: Instance = vec![ + Atom::new("Parent", vec![Term::constant("alice"), Term::constant("bob")]), + Atom::new("Parent", vec![Term::constant("bob"), Term::constant("carol")]), +].into_iter().collect(); -* MIT License ([LICENSE-MIT](LICENSE-MIT) or https://opensource.org/licenses/MIT) -* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or https://www.apache.org/licenses/LICENSE-2.0) +// Define rules +// Parent(X, Y) -> Ancestor(X, Y) +let rule1 = RuleBuilder::new() + .when("Parent", vec![Term::var("X"), Term::var("Y")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .build(); + +// Ancestor(X, Y), Parent(Y, Z) -> Ancestor(X, Z) +let rule2 = RuleBuilder::new() + .when("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .when("Parent", vec![Term::var("Y"), Term::var("Z")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Z")]) + .build(); + +// Run the chase +let result = chase(instance, &[rule1, rule2]); + +assert!(result.terminated); +println!("Derived {} facts", result.instance.len()); +``` + +## Existential Rules + +Rules with head-only variables (existential quantification) automatically generate fresh labeled nulls: + +```rust +// Every person has an SSN: Person(X) -> HasSSN(X, Y) +let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasSSN", vec![Term::var("X"), Term::var("Y")]) // Y is existential + .build(); +``` + +## Project Structure + +``` +src/ +├── lib.rs # Library root and re-exports +├── main.rs # Binary entry point +└── chase/ + ├── mod.rs # Module exports + ├── term.rs # Terms: Constants, Nulls, Variables + ├── atom.rs # Atoms: Predicate(term1, term2, ...) + ├── instance.rs # Database instance (set of facts) + ├── rule.rs # TGDs with RuleBuilder + ├── substitution.rs # Variable bindings and unification + └── engine.rs # Core chase algorithm +tests/ +├── integration_tests.rs +├── regression_tests.rs +└── property_tests.rs +``` + +## Building and Testing + +```bash +# Run all tests +cargo test + +# Run with optimizations +cargo build --release + +# Check for lint warnings +cargo clippy +``` + +## License + +This project is licensed under [BSD-3](LICENSE). diff --git a/assets/logos/README.md b/assets/logos/README.md deleted file mode 100644 index a22f2d9..0000000 --- a/assets/logos/README.md +++ /dev/null @@ -1,3 +0,0 @@ -## Sources - -- [rustacean.net](https://rustacean.net/) diff --git a/assets/logos/corro.svg b/assets/logos/corro.svg deleted file mode 100644 index c3dc6fa..0000000 --- a/assets/logos/corro.svg +++ /dev/null @@ -1,113 +0,0 @@ - -image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/logos/cuddlyferris.svg b/assets/logos/cuddlyferris.svg deleted file mode 100644 index a82119e..0000000 --- a/assets/logos/cuddlyferris.svg +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/logos/rustacean-flat-gesture.svg b/assets/logos/rustacean-flat-gesture.svg deleted file mode 100644 index aa5089b..0000000 --- a/assets/logos/rustacean-flat-gesture.svg +++ /dev/null @@ -1,58 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/logos/rustacean-flat-happy.svg b/assets/logos/rustacean-flat-happy.svg deleted file mode 100644 index 05b07fb..0000000 --- a/assets/logos/rustacean-flat-happy.svg +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/logos/rustacean-flat-noshadow.svg b/assets/logos/rustacean-flat-noshadow.svg deleted file mode 100644 index debd916..0000000 --- a/assets/logos/rustacean-flat-noshadow.svg +++ /dev/null @@ -1,68 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/logos/rustacean-orig-noshadow.svg b/assets/logos/rustacean-orig-noshadow.svg deleted file mode 100644 index 733a5ba..0000000 --- a/assets/logos/rustacean-orig-noshadow.svg +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/make_figures.sh b/assets/make_figures.sh deleted file mode 100644 index b6f8541..0000000 --- a/assets/make_figures.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# You need to have Graphviz installed to run this script -# On Debian-based OSes, you can install it using: sudo apt-get install graphviz - -# Directory containing .dot files (with default value) -ASSET_DIR=${1:-"."} - -# Make figures from .dot files -for f in ${ASSET_DIR}/*.dot; do - dot -Tsvg "$f" -o "${f%.dot}.svg" -done diff --git a/benches/project_benchmarks.rs b/benches/project_benchmarks.rs deleted file mode 100644 index fa87d01..0000000 --- a/benches/project_benchmarks.rs +++ /dev/null @@ -1,8 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; - -fn benchmark_fun(_c: &mut Criterion) { - // Your benchmarking code here -} - -criterion_group!(benches, benchmark_fun,); -criterion_main!(benches); diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 1eb76f9..0000000 --- a/docs/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Project Documentation - -Put your project documentation here. - - diff --git a/examples/basic_usage.rs b/examples/basic_usage.rs deleted file mode 100644 index 7c55598..0000000 --- a/examples/basic_usage.rs +++ /dev/null @@ -1,5 +0,0 @@ -fn main() { - println!("This is a basic usage example"); -} - -// Run this example with `cargo run --example basic_usage` diff --git a/src/chase/atom.rs b/src/chase/atom.rs new file mode 100644 index 0000000..427d973 --- /dev/null +++ b/src/chase/atom.rs @@ -0,0 +1,91 @@ +//! Atoms represent predicates applied to terms. + +use std::fmt; + +use super::term::Term; + +/// An atom is a predicate symbol applied to a tuple of terms. +/// Example: Parent(alice, bob) or Ancestor(?X, ?Y) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Atom { + /// The predicate name. + pub predicate: String, + /// The arguments to the predicate. + pub terms: Vec, +} + +impl Atom { + /// Create a new atom with the given predicate and terms. + pub fn new(predicate: impl Into, terms: Vec) -> Self { + Atom { + predicate: predicate.into(), + terms, + } + } + + /// Returns the arity (number of arguments) of this atom. + pub fn arity(&self) -> usize { + self.terms.len() + } + + /// Returns true if this atom is ground (contains no variables). + pub fn is_ground(&self) -> bool { + self.terms.iter().all(|t| t.is_ground()) + } + + /// Get all variables in this atom. + pub fn variables(&self) -> Vec<&String> { + self.terms + .iter() + .filter_map(|t| match t { + Term::Variable(v) => Some(v), + _ => None, + }) + .collect() + } +} + +impl fmt::Display for Atom { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}(", self.predicate)?; + for (i, term) in self.terms.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", term)?; + } + write!(f, ")") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_atom_creation() { + let atom = Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ); + assert_eq!(atom.predicate, "Parent"); + assert_eq!(atom.arity(), 2); + assert!(atom.is_ground()); + } + + #[test] + fn test_atom_with_variables() { + let atom = Atom::new("Ancestor", vec![Term::var("X"), Term::var("Y")]); + assert!(!atom.is_ground()); + assert_eq!(atom.variables().len(), 2); + } + + #[test] + fn test_atom_display() { + let atom = Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ); + assert_eq!(format!("{}", atom), "Parent(alice, bob)"); + } +} diff --git a/src/chase/engine.rs b/src/chase/engine.rs new file mode 100644 index 0000000..c84c573 --- /dev/null +++ b/src/chase/engine.rs @@ -0,0 +1,376 @@ +//! Core chase algorithm implementation. + +use std::collections::{HashMap, HashSet}; + +use super::atom::Atom; +use super::instance::Instance; +use super::rule::Rule; +use super::substitution::{unify_atom, Substitution}; +use super::term::Term; + +/// Result of running the chase algorithm. +#[derive(Debug)] +pub struct ChaseResult { + /// The final instance after the chase terminates. + pub instance: Instance, + /// Number of chase steps performed. + pub steps: usize, + /// Whether the chase terminated (vs hitting a limit). + pub terminated: bool, +} + +/// Configuration for the chase algorithm. +#[derive(Debug, Clone)] +pub struct ChaseConfig { + /// Maximum number of chase steps before giving up. + pub max_steps: usize, +} + +impl Default for ChaseConfig { + fn default() -> Self { + ChaseConfig { max_steps: 10_000 } + } +} + +/// Counter for generating fresh null values. +#[derive(Debug, Default)] +struct NullGenerator { + counter: usize, +} + +impl NullGenerator { + fn fresh(&mut self) -> Term { + let id = self.counter; + self.counter += 1; + Term::Null(id) + } +} + +/// A trigger represents a rule application: (rule_index, frontier_variable_bindings). +/// We use this to track which rule applications have already been performed, +/// preventing infinite loops with existential variables. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct Trigger { + rule_index: usize, + /// Bindings for frontier variables (those appearing in both body and head). + /// Sorted by variable name for consistent hashing. + frontier_bindings: Vec<(String, Term)>, +} + +impl Trigger { + fn new(rule_index: usize, rule: &Rule, subst: &Substitution) -> Self { + let frontier = rule.frontier_variables(); + let mut bindings: Vec<_> = frontier + .into_iter() + .filter_map(|v| subst.get(&v).map(|t| (v, t.clone()))) + .collect(); + bindings.sort_by(|a, b| a.0.cmp(&b.0)); + + Trigger { + rule_index, + frontier_bindings: bindings, + } + } +} + +/// Run the standard chase algorithm. +/// +/// The chase repeatedly applies rules to derive new facts until no more +/// facts can be derived (fixpoint) or a limit is reached. +/// +/// This implementation uses the "restricted chase" approach which tracks +/// applied triggers to ensure termination with existential rules. +pub fn chase(instance: Instance, rules: &[Rule]) -> ChaseResult { + chase_with_config(instance, rules, ChaseConfig::default()) +} + +/// Run the chase with custom configuration. +pub fn chase_with_config( + mut instance: Instance, + rules: &[Rule], + config: ChaseConfig, +) -> ChaseResult { + let mut null_gen = NullGenerator::default(); + let mut applied_triggers: HashSet = HashSet::new(); + let mut steps = 0; + + loop { + if steps >= config.max_steps { + return ChaseResult { + instance, + steps, + terminated: false, + }; + } + + let new_facts = chase_step(&instance, rules, &mut null_gen, &mut applied_triggers); + + if new_facts.is_empty() { + // Fixpoint reached + return ChaseResult { + instance, + steps, + terminated: true, + }; + } + + for fact in new_facts { + instance.add(fact); + } + steps += 1; + } +} + +/// Perform a single chase step: find all applicable rule instances and derive new facts. +fn chase_step( + instance: &Instance, + rules: &[Rule], + null_gen: &mut NullGenerator, + applied_triggers: &mut HashSet, +) -> Vec { + let mut new_facts = Vec::new(); + + for (rule_idx, rule) in rules.iter().enumerate() { + // Find all ways to match the rule body against the instance + let matches = find_body_matches(instance, &rule.body); + + for subst in matches { + // Create a trigger to check if we've already applied this + let trigger = Trigger::new(rule_idx, rule, &subst); + + // Skip if already applied (prevents infinite loops with existentials) + if applied_triggers.contains(&trigger) { + continue; + } + + // For rules without existentials, check if head is already satisfied + if rule.existential_variables().is_empty() { + let head_facts: Vec<_> = rule + .head + .iter() + .map(|atom| subst.apply_atom(atom)) + .collect(); + + // Skip if all head facts already exist + if head_facts.iter().all(|f| instance.contains(f)) { + continue; + } + } + + // Mark this trigger as applied + applied_triggers.insert(trigger); + + // Generate head atoms with this substitution + let derived = apply_rule_head(rule, &subst, null_gen); + + for fact in derived { + if !instance.contains(&fact) { + new_facts.push(fact); + } + } + } + } + + new_facts +} + +/// Find all substitutions that satisfy the rule body against the instance. +fn find_body_matches(instance: &Instance, body: &[Atom]) -> Vec { + if body.is_empty() { + return vec![Substitution::new()]; + } + + let mut results = vec![Substitution::new()]; + + for body_atom in body { + let mut new_results = Vec::new(); + + for subst in &results { + // Apply current substitution to the body atom + let pattern = subst.apply_atom(body_atom); + + // Find all facts that match this pattern + for fact in instance.facts_for_predicate(&pattern.predicate) { + if let Some(new_subst) = unify_atom(&pattern, fact) { + // Combine substitutions + let mut combined = subst.clone(); + for (var, term) in new_subst.iter() { + combined.bind(var.clone(), term.clone()); + } + new_results.push(combined); + } + } + } + + results = new_results; + } + + results +} + +/// Apply a rule head with the given substitution, generating fresh nulls for existentials. +fn apply_rule_head(rule: &Rule, subst: &Substitution, null_gen: &mut NullGenerator) -> Vec { + let existentials = rule.existential_variables(); + + // Create a mapping for existential variables to fresh nulls + let mut extended_subst = subst.clone(); + let mut null_map: HashMap = HashMap::new(); + + for var in &existentials { + let null = null_gen.fresh(); + null_map.insert(var.clone(), null); + } + + // Add null mappings to substitution + for (var, null) in null_map { + extended_subst.bind(var, null); + } + + // Generate head atoms + rule.head + .iter() + .map(|atom| extended_subst.apply_atom(atom)) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chase::rule::RuleBuilder; + + #[test] + fn test_simple_chase() { + // Initial facts + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + + // Rule: Parent(X, Y) -> Ancestor(X, Y) + let rule1 = RuleBuilder::new() + .when("Parent", vec![Term::var("X"), Term::var("Y")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .build(); + + // Rule: Ancestor(X, Y), Parent(Y, Z) -> Ancestor(X, Z) + let rule2 = RuleBuilder::new() + .when("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .when("Parent", vec![Term::var("Y"), Term::var("Z")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Z")]) + .build(); + + let result = chase(instance, &[rule1, rule2]); + + assert!(result.terminated); + + // Check derived facts + let ancestors = result.instance.facts_for_predicate("Ancestor"); + assert_eq!(ancestors.len(), 3); // alice->bob, bob->carol, alice->carol + } + + #[test] + fn test_chase_with_existentials() { + // Initial facts + let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])] + .into_iter() + .collect(); + + // Rule: Person(X) -> HasSSN(X, Y) where Y is existential + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasSSN", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + + let has_ssn = result.instance.facts_for_predicate("HasSSN"); + assert_eq!(has_ssn.len(), 1); + + // Check that a null was generated + let fact = has_ssn[0]; + assert!(matches!(fact.terms[1], Term::Null(_))); + } + + #[test] + fn test_chase_multiple_existentials() { + // Test that each person gets their own SSN + let instance: Instance = vec![ + Atom::new("Person", vec![Term::constant("alice")]), + Atom::new("Person", vec![Term::constant("bob")]), + ] + .into_iter() + .collect(); + + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasSSN", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + + let has_ssn = result.instance.facts_for_predicate("HasSSN"); + assert_eq!(has_ssn.len(), 2); + + // Verify different nulls were generated + let nulls: Vec<_> = has_ssn.iter().map(|f| &f.terms[1]).collect(); + assert_ne!(nulls[0], nulls[1]); + } + + #[test] + fn test_empty_chase() { + let instance = Instance::new(); + let result = chase(instance, &[]); + + assert!(result.terminated); + assert_eq!(result.steps, 0); + } + + #[test] + fn test_chase_fixpoint() { + // With no applicable rules, chase should terminate immediately + let instance: Instance = vec![Atom::new("Fact", vec![Term::constant("a")])] + .into_iter() + .collect(); + + let rule = RuleBuilder::new() + .when("Other", vec![Term::var("X")]) + .then("Derived", vec![Term::var("X")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + assert_eq!(result.instance.len(), 1); + } + + #[test] + fn test_chase_no_duplicate_applications() { + // Ensure the same rule isn't applied twice for the same body match + let instance: Instance = vec![Atom::new("A", vec![Term::constant("x")])] + .into_iter() + .collect(); + + // A(X) -> B(X, Y) - should only fire once per X value + let rule = RuleBuilder::new() + .when("A", vec![Term::var("X")]) + .then("B", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + assert_eq!(result.instance.facts_for_predicate("B").len(), 1); + } +} diff --git a/src/chase/instance.rs b/src/chase/instance.rs new file mode 100644 index 0000000..d7e3c1e --- /dev/null +++ b/src/chase/instance.rs @@ -0,0 +1,122 @@ +//! A database instance is a set of ground atoms (facts). + +use std::collections::HashSet; +use std::fmt; + +use super::atom::Atom; + +/// A database instance containing ground atoms. +#[derive(Debug, Clone, Default)] +pub struct Instance { + facts: HashSet, +} + +impl Instance { + /// Create an empty instance. + pub fn new() -> Self { + Instance { + facts: HashSet::new(), + } + } + + /// Add a fact to the instance. Returns true if the fact was new. + pub fn add(&mut self, fact: Atom) -> bool { + debug_assert!(fact.is_ground(), "Facts must be ground atoms"); + self.facts.insert(fact) + } + + /// Check if the instance contains a fact. + pub fn contains(&self, fact: &Atom) -> bool { + self.facts.contains(fact) + } + + /// Get the number of facts. + pub fn len(&self) -> usize { + self.facts.len() + } + + /// Check if the instance is empty. + pub fn is_empty(&self) -> bool { + self.facts.is_empty() + } + + /// Iterate over all facts. + pub fn iter(&self) -> impl Iterator { + self.facts.iter() + } + + /// Get all facts with a given predicate. + pub fn facts_for_predicate(&self, predicate: &str) -> Vec<&Atom> { + self.facts + .iter() + .filter(|a| a.predicate == predicate) + .collect() + } +} + +impl fmt::Display for Instance { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Instance {{")?; + for fact in &self.facts { + writeln!(f, " {}", fact)?; + } + write!(f, "}}") + } +} + +impl FromIterator for Instance { + fn from_iter>(iter: T) -> Self { + let mut instance = Instance::new(); + for atom in iter { + instance.add(atom); + } + instance + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chase::term::Term; + + #[test] + fn test_instance_operations() { + let mut instance = Instance::new(); + + let fact1 = Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ); + let fact2 = Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ); + + assert!(instance.add(fact1.clone())); + assert!(instance.add(fact2.clone())); + assert!(!instance.add(fact1.clone())); // Duplicate + + assert_eq!(instance.len(), 2); + assert!(instance.contains(&fact1)); + } + + #[test] + fn test_facts_for_predicate() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Person", + vec![Term::constant("alice")], + ), + ] + .into_iter() + .collect(); + + assert_eq!(instance.facts_for_predicate("Parent").len(), 1); + assert_eq!(instance.facts_for_predicate("Person").len(), 1); + assert_eq!(instance.facts_for_predicate("Other").len(), 0); + } +} diff --git a/src/chase/mod.rs b/src/chase/mod.rs new file mode 100644 index 0000000..9cb5dc1 --- /dev/null +++ b/src/chase/mod.rs @@ -0,0 +1,16 @@ +//! Chase algorithm implementation for reasoning with tuple-generating dependencies (TGDs). + +pub mod atom; +pub mod instance; +pub mod rule; +pub mod substitution; +pub mod term; + +mod engine; + +pub use atom::Atom; +pub use engine::{chase, ChaseResult}; +pub use instance::Instance; +pub use rule::Rule; +pub use substitution::Substitution; +pub use term::Term; diff --git a/src/chase/rule.rs b/src/chase/rule.rs new file mode 100644 index 0000000..e63f6e9 --- /dev/null +++ b/src/chase/rule.rs @@ -0,0 +1,170 @@ +//! Rules (tuple-generating dependencies / TGDs) for the chase. + +use std::collections::HashSet; +use std::fmt; + +use super::atom::Atom; +use super::term::Term; + +/// A rule (TGD) of the form: body -> head +/// +/// The body is a conjunction of atoms, and the head is a conjunction of atoms. +/// Variables in the head that don't appear in the body are "existential" - +/// they will be replaced with fresh nulls during the chase. +#[derive(Debug, Clone)] +pub struct Rule { + /// The body atoms (conjunction). + pub body: Vec, + /// The head atoms (conjunction). + pub head: Vec, +} + +impl Rule { + /// Create a new rule. + pub fn new(body: Vec, head: Vec) -> Self { + Rule { body, head } + } + + /// Get all variables appearing in the body. + pub fn body_variables(&self) -> HashSet { + self.body + .iter() + .flat_map(|a| a.variables()) + .cloned() + .collect() + } + + /// Get all variables appearing in the head. + pub fn head_variables(&self) -> HashSet { + self.head + .iter() + .flat_map(|a| a.variables()) + .cloned() + .collect() + } + + /// Get existentially quantified variables (in head but not in body). + pub fn existential_variables(&self) -> HashSet { + let body_vars = self.body_variables(); + self.head_variables() + .into_iter() + .filter(|v| !body_vars.contains(v)) + .collect() + } + + /// Get frontier variables (in both body and head). + pub fn frontier_variables(&self) -> HashSet { + let body_vars = self.body_variables(); + self.head_variables() + .into_iter() + .filter(|v| body_vars.contains(v)) + .collect() + } +} + +impl fmt::Display for Rule { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Body + for (i, atom) in self.body.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", atom)?; + } + write!(f, " → ")?; + // Head + for (i, atom) in self.head.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", atom)?; + } + Ok(()) + } +} + +/// Builder for creating rules with a fluent API. +pub struct RuleBuilder { + body: Vec, + head: Vec, +} + +impl RuleBuilder { + pub fn new() -> Self { + RuleBuilder { + body: Vec::new(), + head: Vec::new(), + } + } + + /// Add an atom to the body. + pub fn when(mut self, predicate: &str, terms: Vec) -> Self { + self.body.push(Atom::new(predicate, terms)); + self + } + + /// Add an atom to the head. + pub fn then(mut self, predicate: &str, terms: Vec) -> Self { + self.head.push(Atom::new(predicate, terms)); + self + } + + /// Build the rule. + pub fn build(self) -> Rule { + Rule::new(self.body, self.head) + } +} + +impl Default for RuleBuilder { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rule_variables() { + // Parent(X, Y), Parent(Y, Z) -> Grandparent(X, Z) + let rule = RuleBuilder::new() + .when("Parent", vec![Term::var("X"), Term::var("Y")]) + .when("Parent", vec![Term::var("Y"), Term::var("Z")]) + .then("Grandparent", vec![Term::var("X"), Term::var("Z")]) + .build(); + + let body_vars = rule.body_variables(); + assert!(body_vars.contains("X")); + assert!(body_vars.contains("Y")); + assert!(body_vars.contains("Z")); + + assert!(rule.existential_variables().is_empty()); + } + + #[test] + fn test_existential_variables() { + // Person(X) -> HasId(X, Y) where Y is existential + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasId", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let existential = rule.existential_variables(); + assert!(existential.contains("Y")); + assert!(!existential.contains("X")); + } + + #[test] + fn test_rule_display() { + let rule = RuleBuilder::new() + .when("Parent", vec![Term::var("X"), Term::var("Y")]) + .then("Ancestor", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let display = format!("{}", rule); + assert!(display.contains("Parent")); + assert!(display.contains("Ancestor")); + assert!(display.contains("→")); + } +} diff --git a/src/chase/substitution.rs b/src/chase/substitution.rs new file mode 100644 index 0000000..2c4a85b --- /dev/null +++ b/src/chase/substitution.rs @@ -0,0 +1,147 @@ +//! Substitutions map variables to terms. + +use std::collections::HashMap; + +use super::atom::Atom; +use super::term::Term; + +/// A substitution maps variable names to terms. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Substitution { + mapping: HashMap, +} + +impl Substitution { + /// Create an empty substitution. + pub fn new() -> Self { + Substitution { + mapping: HashMap::new(), + } + } + + /// Bind a variable to a term. + pub fn bind(&mut self, var: String, term: Term) { + self.mapping.insert(var, term); + } + + /// Get the term bound to a variable, if any. + pub fn get(&self, var: &str) -> Option<&Term> { + self.mapping.get(var) + } + + /// Apply this substitution to a term. + pub fn apply_term(&self, term: &Term) -> Term { + match term { + Term::Variable(v) => self.mapping.get(v).cloned().unwrap_or_else(|| term.clone()), + _ => term.clone(), + } + } + + /// Apply this substitution to an atom. + pub fn apply_atom(&self, atom: &Atom) -> Atom { + Atom::new( + atom.predicate.clone(), + atom.terms.iter().map(|t| self.apply_term(t)).collect(), + ) + } + + /// Check if this substitution is empty. + pub fn is_empty(&self) -> bool { + self.mapping.is_empty() + } + + /// Get the number of bindings. + pub fn len(&self) -> usize { + self.mapping.len() + } + + /// Iterate over all bindings. + pub fn iter(&self) -> impl Iterator { + self.mapping.iter() + } +} + +/// Try to unify an atom pattern with a ground atom (fact). +/// Returns Some(substitution) if they unify, None otherwise. +pub fn unify_atom(pattern: &Atom, fact: &Atom) -> Option { + if pattern.predicate != fact.predicate || pattern.arity() != fact.arity() { + return None; + } + + let mut subst = Substitution::new(); + + for (pattern_term, fact_term) in pattern.terms.iter().zip(fact.terms.iter()) { + match pattern_term { + Term::Variable(v) => { + if let Some(existing) = subst.get(v) { + if existing != fact_term { + return None; // Conflict + } + } else { + subst.bind(v.clone(), fact_term.clone()); + } + } + Term::Constant(c1) => { + if let Term::Constant(c2) = fact_term { + if c1 != c2 { + return None; + } + } else { + return None; + } + } + Term::Null(n1) => { + if let Term::Null(n2) = fact_term { + if n1 != n2 { + return None; + } + } else { + return None; + } + } + } + } + + Some(subst) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_substitution_apply() { + let mut subst = Substitution::new(); + subst.bind("X".to_string(), Term::constant("alice")); + subst.bind("Y".to_string(), Term::constant("bob")); + + let atom = Atom::new("Parent", vec![Term::var("X"), Term::var("Y")]); + let result = subst.apply_atom(&atom); + + assert!(result.is_ground()); + assert_eq!(format!("{}", result), "Parent(alice, bob)"); + } + + #[test] + fn test_unify_success() { + let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("bob")]); + let fact = Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ); + + let subst = unify_atom(&pattern, &fact).unwrap(); + assert_eq!(subst.get("X"), Some(&Term::constant("alice"))); + } + + #[test] + fn test_unify_failure() { + let pattern = Atom::new("Parent", vec![Term::var("X"), Term::constant("carol")]); + let fact = Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ); + + assert!(unify_atom(&pattern, &fact).is_none()); + } +} diff --git a/src/chase/term.rs b/src/chase/term.rs new file mode 100644 index 0000000..0e26c75 --- /dev/null +++ b/src/chase/term.rs @@ -0,0 +1,79 @@ +//! Terms represent values in the chase: constants or labeled nulls. + +use std::fmt; + +/// A term is either a constant (from the input) or a null (invented during chase). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Term { + /// A constant value from the domain. + Constant(String), + /// A labeled null (invented value) created during the chase. + /// The usize is a unique identifier for this null. + Null(usize), + /// A variable (used in rule bodies/heads, not in instances). + Variable(String), +} + +impl Term { + /// Create a new constant term. + pub fn constant(value: impl Into) -> Self { + Term::Constant(value.into()) + } + + /// Create a new null term with the given id. + pub fn null(id: usize) -> Self { + Term::Null(id) + } + + /// Create a new variable term. + pub fn var(name: impl Into) -> Self { + Term::Variable(name.into()) + } + + /// Returns true if this term is a variable. + pub fn is_variable(&self) -> bool { + matches!(self, Term::Variable(_)) + } + + /// Returns true if this term is ground (not a variable). + pub fn is_ground(&self) -> bool { + !self.is_variable() + } +} + +impl fmt::Display for Term { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Term::Constant(c) => write!(f, "{}", c), + Term::Null(id) => write!(f, "⊥{}", id), + Term::Variable(v) => write!(f, "?{}", v), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_term_creation() { + let c = Term::constant("alice"); + let n = Term::null(1); + let v = Term::var("X"); + + assert!(matches!(c, Term::Constant(_))); + assert!(matches!(n, Term::Null(1))); + assert!(matches!(v, Term::Variable(_))); + } + + #[test] + fn test_term_properties() { + let c = Term::constant("alice"); + let v = Term::var("X"); + + assert!(c.is_ground()); + assert!(!c.is_variable()); + assert!(!v.is_ground()); + assert!(v.is_variable()); + } +} diff --git a/src/cli.rs b/src/cli.rs deleted file mode 100644 index 8677f5f..0000000 --- a/src/cli.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::ffi::OsString; -use tracing::error; - -pub fn run(args: impl IntoIterator) -> Result<(), i32> { - let _args: Vec = args.into_iter().collect(); - // Your implementation here - // Expecting at least 2 arguments - if _args.len() < 2 { - error!("Expecting at least 2 arguments"); - return Err(1); - } - Ok(()) -} - -// Unit tests -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::OsString; - - #[test] - fn test_run_with_valid_args() { - let args = vec![OsString::from("arg1"), OsString::from("arg2")]; - let result = run(args); - assert!(result.is_ok()); - } - - #[test] - fn test_run_with_invalid_args() { - let args = vec![OsString::from("invalid_arg")]; - let result = run(args); - assert!(result.is_err()); - } -} diff --git a/src/lib.rs b/src/lib.rs index 2b8e049..186b1a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,4 @@ -pub mod cli; -pub mod logging; +pub mod chase; + +// Re-export main types for convenience +pub use chase::{chase, Atom, ChaseResult, Instance, Rule, Substitution, Term}; diff --git a/src/logging.rs b/src/logging.rs deleted file mode 100644 index d1cfa71..0000000 --- a/src/logging.rs +++ /dev/null @@ -1,17 +0,0 @@ -use ctor::ctor; -use tracing::Level; -use tracing_subscriber; - -#[ctor] -fn set_debug_level() { - // If DEBUG_PROJ is not set or set to false, disable logging. Otherwise, enable logging - if std::env::var("DEBUG_PROJ").map_or(true, |v| v == "0" || v == "false" || v.is_empty()) { - // Disable logging - } else { - tracing_subscriber::fmt() - .with_max_level(Level::DEBUG) - .init(); - } - - //println!("DEBUG_PROJ: {:?}", std::env::var("DEBUG_PROJ")); -} diff --git a/src/main.rs b/src/main.rs index 50093e7..cef01df 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,4 @@ -use template_rust_project::cli::run; - fn main() { - if let Err(code) = run(std::env::args_os()) { - std::process::exit(code); - } + // TODO: Implement CLI for chase-rs + println!("chase-rs: An implementation of the chase algorithm"); } diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 8b13789..409c8c7 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1 +1,184 @@ +//! Integration tests for the chase algorithm. +use chase_rs::chase::rule::RuleBuilder; +use chase_rs::{chase, Atom, Instance, Term}; + +#[test] +fn test_transitive_closure() { + // Build a chain: a -> b -> c -> d + let instance: Instance = vec![ + Atom::new("Edge", vec![Term::constant("a"), Term::constant("b")]), + Atom::new("Edge", vec![Term::constant("b"), Term::constant("c")]), + Atom::new("Edge", vec![Term::constant("c"), Term::constant("d")]), + ] + .into_iter() + .collect(); + + // Edge(X, Y) -> Path(X, Y) + let rule1 = RuleBuilder::new() + .when("Edge", vec![Term::var("X"), Term::var("Y")]) + .then("Path", vec![Term::var("X"), Term::var("Y")]) + .build(); + + // Path(X, Y), Edge(Y, Z) -> Path(X, Z) + let rule2 = RuleBuilder::new() + .when("Path", vec![Term::var("X"), Term::var("Y")]) + .when("Edge", vec![Term::var("Y"), Term::var("Z")]) + .then("Path", vec![Term::var("X"), Term::var("Z")]) + .build(); + + let result = chase(instance, &[rule1, rule2]); + + assert!(result.terminated); + + // Should have 6 paths: a->b, b->c, c->d, a->c, b->d, a->d + let paths = result.instance.facts_for_predicate("Path"); + assert_eq!(paths.len(), 6); +} + +#[test] +fn test_existential_rule_generates_nulls() { + // Every employee must have a department + let instance: Instance = vec![ + Atom::new("Employee", vec![Term::constant("alice")]), + Atom::new("Employee", vec![Term::constant("bob")]), + Atom::new("Employee", vec![Term::constant("carol")]), + ] + .into_iter() + .collect(); + + // Employee(X) -> WorksIn(X, Y) where Y is existential + let rule = RuleBuilder::new() + .when("Employee", vec![Term::var("X")]) + .then("WorksIn", vec![Term::var("X"), Term::var("Dept")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + + let works_in = result.instance.facts_for_predicate("WorksIn"); + assert_eq!(works_in.len(), 3); + + // Each should have a unique null + let nulls: Vec<_> = works_in + .iter() + .filter_map(|f| match &f.terms[1] { + Term::Null(id) => Some(*id), + _ => None, + }) + .collect(); + assert_eq!(nulls.len(), 3); + + // All nulls should be unique + let mut unique_nulls = nulls.clone(); + unique_nulls.sort(); + unique_nulls.dedup(); + assert_eq!(unique_nulls.len(), 3); +} + +#[test] +fn test_multiple_head_atoms() { + let instance: Instance = vec![Atom::new("Person", vec![Term::constant("alice")])] + .into_iter() + .collect(); + + // Person(X) -> HasName(X, N), HasAge(X, A) + let rule = RuleBuilder::new() + .when("Person", vec![Term::var("X")]) + .then("HasName", vec![Term::var("X"), Term::var("N")]) + .then("HasAge", vec![Term::var("X"), Term::var("A")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + assert_eq!(result.instance.facts_for_predicate("HasName").len(), 1); + assert_eq!(result.instance.facts_for_predicate("HasAge").len(), 1); +} + +#[test] +fn test_chase_with_constants_in_rules() { + let instance: Instance = vec![ + Atom::new("Status", vec![Term::constant("alice"), Term::constant("active")]), + Atom::new("Status", vec![Term::constant("bob"), Term::constant("inactive")]), + ] + .into_iter() + .collect(); + + // Only active users get access: Status(X, "active") -> HasAccess(X) + let rule = RuleBuilder::new() + .when( + "Status", + vec![Term::var("X"), Term::constant("active")], + ) + .then("HasAccess", vec![Term::var("X")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + + let access = result.instance.facts_for_predicate("HasAccess"); + assert_eq!(access.len(), 1); + + // Only alice should have access + let fact = access[0]; + assert_eq!(fact.terms[0], Term::constant("alice")); +} + +#[test] +fn test_chase_reaches_fixpoint() { + // Test that applying the same rule multiple times doesn't create duplicates + let instance: Instance = vec![Atom::new("Fact", vec![Term::constant("x")])] + .into_iter() + .collect(); + + // Fact(X) -> Derived(X) + let rule = RuleBuilder::new() + .when("Fact", vec![Term::var("X")]) + .then("Derived", vec![Term::var("X")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + assert_eq!(result.instance.facts_for_predicate("Derived").len(), 1); + assert_eq!(result.steps, 1); // Should complete in one step +} + +#[test] +fn test_self_join_rule() { + // Find pairs of people with the same manager + let instance: Instance = vec![ + Atom::new( + "ManagedBy", + vec![Term::constant("alice"), Term::constant("eve")], + ), + Atom::new( + "ManagedBy", + vec![Term::constant("bob"), Term::constant("eve")], + ), + Atom::new( + "ManagedBy", + vec![Term::constant("carol"), Term::constant("frank")], + ), + ] + .into_iter() + .collect(); + + // ManagedBy(X, M), ManagedBy(Y, M) -> SameTeam(X, Y) + let rule = RuleBuilder::new() + .when("ManagedBy", vec![Term::var("X"), Term::var("M")]) + .when("ManagedBy", vec![Term::var("Y"), Term::var("M")]) + .then("SameTeam", vec![Term::var("X"), Term::var("Y")]) + .build(); + + let result = chase(instance, &[rule]); + + assert!(result.terminated); + + // Should have: (alice, alice), (alice, bob), (bob, alice), (bob, bob), (carol, carol) + let same_team = result.instance.facts_for_predicate("SameTeam"); + assert_eq!(same_team.len(), 5); +} diff --git a/python/.gitkeep b/tests/property_tests.rs similarity index 100% rename from python/.gitkeep rename to tests/property_tests.rs diff --git a/tests/regression_tests.rs b/tests/regression_tests.rs new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/README.md b/tests/testdata/README.md deleted file mode 100644 index 8edc998..0000000 --- a/tests/testdata/README.md +++ /dev/null @@ -1,22 +0,0 @@ -## Datasets for Testing - -This directory contains the datasets used for the tests in the [`tests`](../) directory. - -### Downloading the Datasets - -Run the following command to download the datasets used for testing: - -```shell -bash download_datasets.sh -``` - -### Checking the Datasets - -To check the datasets after downloading, run the following command: - -```shell -duckdb -init check_datasets.sql -no-stdin -``` - -You need to have the `duckdb` binary installed on your system to run the above command. -Check the [DuckDB installation guide](https://duckdb.org/docs/installation) for more information. diff --git a/tests/testdata/check_datasets.sql b/tests/testdata/check_datasets.sql deleted file mode 100644 index faa332f..0000000 --- a/tests/testdata/check_datasets.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Description: This script is used to check the datasets that are available in the testdata directory. --- Run using DuckDB CLI (in current directory): duckdb -init check_datasets.sql -no-stdin - --- Query the Wine Quality CSV file using csv_scan: -SELECT * FROM read_csv('winequality-red.csv') LIMIT 5; - --- Query the NYC Yellow Taxi Parquet file using parquet_scan: -SELECT * FROM read_parquet('yellow_tripdata_2019-01.parquet') LIMIT 5; - --- Query the NYC Green Taxi Parquet file using parquet_scan: -SELECT * FROM read_parquet('green_tripdata_2019-01.parquet') LIMIT 5; diff --git a/tests/testdata/download_datasets.sh b/tests/testdata/download_datasets.sh deleted file mode 100644 index 83403a5..0000000 --- a/tests/testdata/download_datasets.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Directory for test data (relative to this script) -TESTDATA_DIR="$(dirname "$0")" -echo "Using test data directory: $TESTDATA_DIR" - -# Create the directory if it doesn't exist -mkdir -p "$TESTDATA_DIR" - -echo "Downloading Wine Quality Dataset (red wine)..." -wget -c -O "$TESTDATA_DIR/winequality-red.csv" "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv" - -echo "Downloading NYC Yellow Taxi Trip Data (January 2019, Parquet)..." -wget -c -O "$TESTDATA_DIR/yellow_tripdata_2019-01.parquet" "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2019-01.parquet" - -echo "Downloading NYC Green Taxi Trip Data (January 2019, Parquet)..." -wget -c -O "$TESTDATA_DIR/green_tripdata_2019-01.parquet" "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2019-01.parquet" - -echo "Download complete. Test data saved to $TESTDATA_DIR"