From dff8adebfa9874bbb8aa613245a57a2adc83cd35 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Fri, 10 Apr 2026 16:06:57 +0200 Subject: [PATCH] Decouple executor from Instance via DataSource trait --- AGENTS.md | 3 +- README.md | 12 +++- ROADMAP.md | 4 +- src/execution/mod.rs | 107 ++++++++++++++++++++++++++++-------- tests/sql_pipeline_tests.rs | 31 +++++++++++ 5 files changed, 131 insertions(+), 26 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 968c8e4..1a9918f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -59,7 +59,7 @@ Quick examples: - `src/catalog/`: predicate-to-table schema inference and catalog access. - `src/sql/`: narrow SQL AST and parser support. - `src/planner/`: logical plan structures and SQL-to-plan translation. -- `src/execution/`: execution of the current logical plan subset. +- `src/execution/`: execution of the current logical plan subset, including the `DataSource` trait and the `TableStore` in-memory source. - `examples/scripts/`: runnable script examples for supported workflows. - `tests/`: integration, regression, and property-based tests. @@ -75,6 +75,7 @@ Quick examples: - Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`. - Single-table SQL queries may use the table name as a qualifier when no alias is present. - Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented. +- The executor operates on the `DataSource` trait, not on `Instance` directly. `Instance` and `TableStore` are the two built-in implementations. - Relational and SQL modules should build on explicit schemas and logical plans, not call frontend helpers directly. - If you add parser, planner, or executor layers, keep their responsibilities separate. - Public docs and interfaces should reflect the implemented state of the repository accurately. diff --git a/README.md b/README.md index ddd2b2d..68ad8a5 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,22 @@ The repository is currently organized around a few clear subsystems: - `src/catalog/`: predicate-backed table metadata - `src/sql/`: minimal SQL AST and parser - `src/planner/`: logical plan structures and SQL-to-plan translation -- `src/execution/`: execution for the current logical-plan subset +- `src/execution/`: execution for the current logical-plan subset, `DataSource` trait, and `TableStore` Today, the chase subsystem is still the most mature part of the codebase. The relational and SQL modules are present to create clean extension points for a broader query-engine architecture. +The executor operates on the `DataSource` trait rather than on the chase +`Instance` directly. This allows non-chase data sources to plug into the SQL +pipeline. The crate ships two implementations: `Instance` (chase-backed) and +`TableStore` (in-memory rows). Implementing `DataSource` for a new backend +requires a single method: + +```rust +fn scan(&self, table: &str, schema: &Schema) -> Result; +``` + ### Intended Direction The medium-term direction is to evolve this project into a more general diff --git a/ROADMAP.md b/ROADMAP.md index f8761b6..14b665b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -48,9 +48,9 @@ This document tracks the current state and next steps for the repository. - [x] Introduce a dedicated logical representation module - [x] Define clear front-end, planning, and execution boundaries -- [ ] Add engine-level abstractions that are not chase-specific +- [x] Add engine-level abstractions that are not chase-specific - [x] Establish common schema and typed-value representations -- [ ] Design a source boundary for future scans and pushdown +- [x] Design a source boundary for future scans and pushdown ### Front End and Planning diff --git a/src/execution/mod.rs b/src/execution/mod.rs index df3a55d..6b83f74 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -1,4 +1,10 @@ -//! Minimal execution support for the first SQL slice. +//! Execution support for the current SQL slice. +//! +//! The executor evaluates a [`LogicalPlan`] against a [`DataSource`] that +//! provides table scans. The built-in [`Instance`](crate::chase::Instance) +//! adapter and the [`TableStore`] are the two provided implementations. + +pub mod table_store; use std::cmp::Ordering; use std::error::Error; @@ -6,7 +12,9 @@ use std::fmt; use crate::chase::{Instance, Term}; use crate::planner::logical::{LogicalExpr, LogicalPlan, SortDirection, SortKey}; -use crate::relational::{ResultSet, Row, Value}; +use crate::relational::{ResultSet, Row, Schema, Value}; + +pub use table_store::TableStore; /// Errors returned by the current logical-plan executor. #[derive(Debug)] @@ -30,28 +38,43 @@ impl fmt::Display for ExecutionError { impl Error for ExecutionError {} -/// Execute the current logical-plan subset against an instance-backed source. -pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { - match plan { - LogicalPlan::Scan { table, schema } => { - let mut rows = Vec::new(); - for fact in instance.facts_for_predicate(table) { - let values = fact - .terms - .iter() - .map(value_from_term) - .collect::, _>>()?; - rows.push(Row::new(values)); - } - Ok(ResultSet::new(schema.clone(), rows)) +/// A source of relational data for the executor. +/// +/// Implementations provide table scans that return rows conforming to a given +/// schema. The executor calls [`scan`](DataSource::scan) for each +/// [`LogicalPlan::Scan`] node; all other operators work on the resulting +/// [`ResultSet`] values. +pub trait DataSource { + /// Scan all rows for the named table, conforming to the provided schema. + fn scan(&self, table: &str, schema: &Schema) -> Result; +} + +impl DataSource for Instance { + fn scan(&self, table: &str, schema: &Schema) -> Result { + let mut rows = Vec::new(); + for fact in self.facts_for_predicate(table) { + let values = fact + .terms + .iter() + .map(value_from_term) + .collect::, _>>()?; + rows.push(Row::new(values)); } + Ok(ResultSet::new(schema.clone(), rows)) + } +} + +/// Execute a logical plan against the provided data source. +pub fn execute(plan: &LogicalPlan, source: &dyn DataSource) -> Result { + match plan { + LogicalPlan::Scan { table, schema } => source.scan(table, schema), LogicalPlan::CrossJoin { left, right, schema, } => { - let left_result = execute(left, instance)?; - let right_result = execute(right, instance)?; + let left_result = execute(left, source)?; + let right_result = execute(right, source)?; let mut rows = Vec::new(); for left_row in left_result.rows() { @@ -65,7 +88,7 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { - let result = execute(input, instance)?; + let result = execute(input, source)?; let filtered_rows = result .rows() .iter() @@ -79,7 +102,7 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { - let result = execute(input, instance)?; + let result = execute(input, source)?; let mut rows = Vec::new(); for row in result.rows() { let values = expressions @@ -95,14 +118,14 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { - let result = execute(input, instance)?; + let result = execute(input, source)?; let mut rows = result.rows().to_vec(); let resolved_keys = resolve_sort_keys(keys, result.schema())?; rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys)); Ok(ResultSet::new(schema.clone(), rows)) } LogicalPlan::Limit { input, count } => { - let result = execute(input, instance)?; + let result = execute(input, source)?; let rows = result.rows().iter().take(*count).cloned().collect(); Ok(ResultSet::new(result.schema().clone(), rows)) } @@ -219,3 +242,43 @@ fn compare_values(left: &Value, right: &Value) -> Ordering { (Value::Boolean(_), Value::Text(_)) => Ordering::Greater, } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::chase::{Atom, Term}; + use crate::relational::{DataType, Field}; + + #[test] + fn instance_datasource_scans_predicate_as_table() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + + let schema = Schema::new(vec![ + Field::new("c0", DataType::Text, false), + Field::new("c1", DataType::Text, false), + ]); + + let result = instance.scan("Parent", &schema).unwrap(); + assert_eq!(result.rows().len(), 2); + assert_eq!(result.schema().len(), 2); + } + + #[test] + fn instance_datasource_returns_empty_for_unknown_predicate() { + let instance = Instance::new(); + let schema = Schema::new(vec![]); + let result = instance.scan("Missing", &schema).unwrap(); + assert_eq!(result.rows().len(), 0); + } +} diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs index 9134585..2bad1c2 100644 --- a/tests/sql_pipeline_tests.rs +++ b/tests/sql_pipeline_tests.rs @@ -329,3 +329,34 @@ fn select_where_not_equal_excludes_matching_rows() { assert_eq!(result.rows().len(), 1); assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); } + +#[test] +fn execute_with_table_store_scans_in_memory_rows() { + use query_engine::execution::TableStore; + use query_engine::relational::{DataType, Field, Row, Schema, Value}; + + let schema = Schema::new(vec![ + Field::new("name", DataType::Text, false), + Field::new("age", DataType::Integer, false), + ]); + + let mut store = TableStore::new(); + store.insert( + "people", + schema.clone(), + vec![ + Row::new(vec![Value::text("alice"), Value::Integer(30)]), + Row::new(vec![Value::text("bob"), Value::Integer(25)]), + ], + ); + + let mut catalog = PredicateCatalog::new(); + catalog.register_table("people", schema); + + let select = parse_select("SELECT name FROM people WHERE age != 30").unwrap(); + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &store).unwrap(); + + assert_eq!(result.rows().len(), 1); + assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); +}