From dff8adebfa9874bbb8aa613245a57a2adc83cd35 Mon Sep 17 00:00:00 2001
From: Hassan Abedi <cogitator.tech@gmail.com>
Date: Fri, 10 Apr 2026 16:06:57 +0200
Subject: [PATCH] Decouple executor from Instance via DataSource trait

---
 AGENTS.md                   |   3 +-
 README.md                   |  12 +++-
 ROADMAP.md                  |   4 +-
 src/execution/mod.rs        | 107 ++++++++++++++++++++++++++++--------
 tests/sql_pipeline_tests.rs |  31 +++++++++++
 5 files changed, 131 insertions(+), 26 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 968c8e4..1a9918f 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -59,7 +59,7 @@ Quick examples:
 - `src/catalog/`: predicate-to-table schema inference and catalog access.
 - `src/sql/`: narrow SQL AST and parser support.
 - `src/planner/`: logical plan structures and SQL-to-plan translation.
-- `src/execution/`: execution of the current logical plan subset.
+- `src/execution/`: execution of the current logical plan subset, including the `DataSource` trait and the `TableStore` in-memory source.
 - `examples/scripts/`: runnable script examples for supported workflows.
 - `tests/`: integration, regression, and property-based tests.
 
@@ -75,6 +75,7 @@ Quick examples:
 - Stable SQL column names come from explicit catalog registration or the frontend `schema ...` command, including for empty tables; otherwise the default names are positional such as `c0` and `c1`.
 - Single-table SQL queries may use the table name as a qualifier when no alias is present.
 - Do not describe unsupported SQL features such as aggregates, grouping, or arbitrary expressions as implemented.
+- The executor operates on the `DataSource` trait, not on `Instance` directly. `Instance` and `TableStore` are the two built-in implementations.
 - Relational and SQL modules should build on explicit schemas and logical plans, not call frontend helpers directly.
 - If you add parser, planner, or executor layers, keep their responsibilities separate.
 - Public docs and interfaces should reflect the implemented state of the repository accurately.
diff --git a/README.md b/README.md
index ddd2b2d..68ad8a5 100644
--- a/README.md
+++ b/README.md
@@ -26,12 +26,22 @@ The repository is currently organized around a few clear subsystems:
 - `src/catalog/`: predicate-backed table metadata
 - `src/sql/`: minimal SQL AST and parser
 - `src/planner/`: logical plan structures and SQL-to-plan translation
-- `src/execution/`: execution for the current logical-plan subset
+- `src/execution/`: execution for the current logical-plan subset, `DataSource` trait, and `TableStore`
 
 Today, the chase subsystem is still the most mature part of the codebase. The
 relational and SQL modules are present to create clean extension points for a
 broader query-engine architecture.
 
+The executor operates on the `DataSource` trait rather than on the chase
+`Instance` directly. This allows non-chase data sources to plug into the SQL
+pipeline. The crate ships two implementations: `Instance` (chase-backed) and
+`TableStore` (in-memory rows). Implementing `DataSource` for a new backend
+requires a single method:
+
+```rust
+fn scan(&self, table: &str, schema: &Schema) -> Result<ResultSet, ExecutionError>;
+```
+
 ### Intended Direction
 
 The medium-term direction is to evolve this project into a more general
diff --git a/ROADMAP.md b/ROADMAP.md
index f8761b6..14b665b 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -48,9 +48,9 @@ This document tracks the current state and next steps for the repository.
 
 - [x] Introduce a dedicated logical representation module
 - [x] Define clear front-end, planning, and execution boundaries
-- [ ] Add engine-level abstractions that are not chase-specific
+- [x] Add engine-level abstractions that are not chase-specific
 - [x] Establish common schema and typed-value representations
-- [ ] Design a source boundary for future scans and pushdown
+- [x] Design a source boundary for future scans and pushdown
 
 ### Front End and Planning
 
diff --git a/src/execution/mod.rs b/src/execution/mod.rs
index df3a55d..6b83f74 100644
--- a/src/execution/mod.rs
+++ b/src/execution/mod.rs
@@ -1,4 +1,10 @@
-//! Minimal execution support for the first SQL slice.
+//! Execution support for the current SQL slice.
+//!
+//! The executor evaluates a [`LogicalPlan`] against a [`DataSource`] that
+//! provides table scans. The built-in [`Instance`](crate::chase::Instance)
+//! adapter and the [`TableStore`] are the two provided implementations.
+
+pub mod table_store;
 
 use std::cmp::Ordering;
 use std::error::Error;
@@ -6,7 +12,9 @@ use std::fmt;
 
 use crate::chase::{Instance, Term};
 use crate::planner::logical::{LogicalExpr, LogicalPlan, SortDirection, SortKey};
-use crate::relational::{ResultSet, Row, Value};
+use crate::relational::{ResultSet, Row, Schema, Value};
+
+pub use table_store::TableStore;
 
 /// Errors returned by the current logical-plan executor.
 #[derive(Debug)]
@@ -30,28 +38,43 @@ impl fmt::Display for ExecutionError {
 
 impl Error for ExecutionError {}
 
-/// Execute the current logical-plan subset against an instance-backed source.
-pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, ExecutionError> {
-    match plan {
-        LogicalPlan::Scan { table, schema } => {
-            let mut rows = Vec::new();
-            for fact in instance.facts_for_predicate(table) {
-                let values = fact
-                    .terms
-                    .iter()
-                    .map(value_from_term)
-                    .collect::<Result<Vec<_>, _>>()?;
-                rows.push(Row::new(values));
-            }
-            Ok(ResultSet::new(schema.clone(), rows))
+/// A source of relational data for the executor.
+///
+/// Implementations provide table scans that return rows conforming to a given
+/// schema. The executor calls [`scan`](DataSource::scan) for each
+/// [`LogicalPlan::Scan`] node; all other operators work on the resulting
+/// [`ResultSet`] values.
+pub trait DataSource {
+    /// Scan all rows for the named table, conforming to the provided schema.
+    fn scan(&self, table: &str, schema: &Schema) -> Result<ResultSet, ExecutionError>;
+}
+
+impl DataSource for Instance {
+    fn scan(&self, table: &str, schema: &Schema) -> Result<ResultSet, ExecutionError> {
+        let mut rows = Vec::new();
+        for fact in self.facts_for_predicate(table) {
+            let values = fact
+                .terms
+                .iter()
+                .map(value_from_term)
+                .collect::<Result<Vec<_>, _>>()?;
+            rows.push(Row::new(values));
         }
+        Ok(ResultSet::new(schema.clone(), rows))
+    }
+}
+
+/// Execute a logical plan against the provided data source.
+pub fn execute(plan: &LogicalPlan, source: &dyn DataSource) -> Result<ResultSet, ExecutionError> {
+    match plan {
+        LogicalPlan::Scan { table, schema } => source.scan(table, schema),
         LogicalPlan::CrossJoin {
             left,
             right,
             schema,
         } => {
-            let left_result = execute(left, instance)?;
-            let right_result = execute(right, instance)?;
+            let left_result = execute(left, source)?;
+            let right_result = execute(right, source)?;
             let mut rows = Vec::new();
 
             for left_row in left_result.rows() {
@@ -65,7 +88,7 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
             Ok(ResultSet::new(schema.clone(), rows))
         }
         LogicalPlan::Filter { input, predicate } => {
-            let result = execute(input, instance)?;
+            let result = execute(input, source)?;
             let filtered_rows = result
                 .rows()
                 .iter()
@@ -79,7 +102,7 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
             expressions,
             schema,
         } => {
-            let result = execute(input, instance)?;
+            let result = execute(input, source)?;
             let mut rows = Vec::new();
             for row in result.rows() {
                 let values = expressions
@@ -95,14 +118,14 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
             keys,
             schema,
         } => {
-            let result = execute(input, instance)?;
+            let result = execute(input, source)?;
             let mut rows = result.rows().to_vec();
             let resolved_keys = resolve_sort_keys(keys, result.schema())?;
             rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys));
             Ok(ResultSet::new(schema.clone(), rows))
         }
         LogicalPlan::Limit { input, count } => {
-            let result = execute(input, instance)?;
+            let result = execute(input, source)?;
             let rows = result.rows().iter().take(*count).cloned().collect();
             Ok(ResultSet::new(result.schema().clone(), rows))
         }
@@ -219,3 +242,43 @@ fn compare_values(left: &Value, right: &Value) -> Ordering {
         (Value::Boolean(_), Value::Text(_)) => Ordering::Greater,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::chase::{Atom, Term};
+    use crate::relational::{DataType, Field};
+
+    #[test]
+    fn instance_datasource_scans_predicate_as_table() {
+        let instance: Instance = vec![
+            Atom::new(
+                "Parent",
+                vec![Term::constant("alice"), Term::constant("bob")],
+            ),
+            Atom::new(
+                "Parent",
+                vec![Term::constant("bob"), Term::constant("carol")],
+            ),
+        ]
+        .into_iter()
+        .collect();
+
+        let schema = Schema::new(vec![
+            Field::new("c0", DataType::Text, false),
+            Field::new("c1", DataType::Text, false),
+        ]);
+
+        let result = instance.scan("Parent", &schema).unwrap();
+        assert_eq!(result.rows().len(), 2);
+        assert_eq!(result.schema().len(), 2);
+    }
+
+    #[test]
+    fn instance_datasource_returns_empty_for_unknown_predicate() {
+        let instance = Instance::new();
+        let schema = Schema::new(vec![]);
+        let result = instance.scan("Missing", &schema).unwrap();
+        assert_eq!(result.rows().len(), 0);
+    }
+}
diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs
index 9134585..2bad1c2 100644
--- a/tests/sql_pipeline_tests.rs
+++ b/tests/sql_pipeline_tests.rs
@@ -329,3 +329,34 @@ fn select_where_not_equal_excludes_matching_rows() {
     assert_eq!(result.rows().len(), 1);
     assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
 }
+
+#[test]
+fn execute_with_table_store_scans_in_memory_rows() {
+    use query_engine::execution::TableStore;
+    use query_engine::relational::{DataType, Field, Row, Schema, Value};
+
+    let schema = Schema::new(vec![
+        Field::new("name", DataType::Text, false),
+        Field::new("age", DataType::Integer, false),
+    ]);
+
+    let mut store = TableStore::new();
+    store.insert(
+        "people",
+        schema.clone(),
+        vec![
+            Row::new(vec![Value::text("alice"), Value::Integer(30)]),
+            Row::new(vec![Value::text("bob"), Value::Integer(25)]),
+        ],
+    );
+
+    let mut catalog = PredicateCatalog::new();
+    catalog.register_table("people", schema);
+
+    let select = parse_select("SELECT name FROM people WHERE age != 30").unwrap();
+    let plan = plan_select(&select, &catalog).unwrap();
+    let result = execute(&plan, &store).unwrap();
+
+    assert_eq!(result.rows().len(), 1);
+    assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
+}