Add SQL projection aliases and literal expressions support

This commit is contained in:
Hassan Abedi 2026-04-09 12:50:06 +02:00
parent dd7b16ce93
commit 23cbc6a4bf
20 changed files with 374 additions and 55 deletions

View File

@ -77,7 +77,7 @@ install-deps: ## Install development dependencies
.PHONY: lint .PHONY: lint
lint: format ## Run the linters lint: format ## Run the linters
@echo "Linting Rust files..." @echo "Linting Rust files..."
@DEBUG_PROJ=$(DEBUG_PROJ) cargo clippy -- -D warnings -D clippy::unwrap_used -D clippy::expect_used @DEBUG_PROJ=$(DEBUG_PROJ) cargo clippy --all-targets --all-features -- -D warnings
.PHONY: audit .PHONY: audit
audit: ## Run security audit on Rust dependencies audit: ## Run security audit on Rust dependencies
@ -92,7 +92,7 @@ docs: format ## Generate the documentation
.PHONY: fix-lint .PHONY: fix-lint
fix-lint: ## Fix the linter warnings fix-lint: ## Fix the linter warnings
@echo "Fixing linter warnings..." @echo "Fixing linter warnings..."
@cargo clippy --fix --allow-dirty --allow-staged --all-targets --workspace @cargo clippy --fix --allow-dirty --allow-staged --all-targets --all-features -- -D warnings
.PHONY: nextest .PHONY: nextest
nextest: ## Run tests using nextest nextest: ## Run tests using nextest
@ -116,5 +116,9 @@ test-hooks: ## Test Git hooks on all files
@pre-commit run --all-files --show-diff-on-failure @pre-commit run --all-files --show-diff-on-failure
.PHONY: check .PHONY: check
check: format lint test ## Run format, lint, and test check: format ## Run format, lint, and test
@echo "Checking formatting..."
@cargo fmt --check
@$(MAKE) lint
@$(MAKE) test
@echo "All checks passed." @echo "All checks passed."

View File

@ -2,10 +2,10 @@
An experimental Rust project for building query-engine components. An experimental Rust project for building query-engine components.
Right now the repository is centered on a chase-based reasoning core plus a Right now the repository is centered on a chase-based reasoning core, a small
small interactive frontend, plus an early relational/SQL scaffold. The broader interactive frontend, and an early relational/SQL scaffold. The broader target
target shape is a query engine with clearer front-end, planning, optimization, shape is a query engine with clearer front-end, planning, optimization, and
and execution boundaries. execution boundaries.
### Current scope ### Current scope
@ -16,6 +16,22 @@ and execution boundaries.
- Relational schema, catalog, logical-plan, and execution scaffolding - Relational schema, catalog, logical-plan, and execution scaffolding
- A minimal SQL slice for single-table `SELECT-FROM-WHERE` queries - A minimal SQL slice for single-table `SELECT-FROM-WHERE` queries
### Architecture
The repository is currently organized around a few clear subsystems:
- `src/chase/`: rule-engine data structures and chase execution
- `src/frontend/`: REPL, script, GUI, and explanation rendering
- `src/relational/`: schemas, values, rows, and result sets
- `src/catalog/`: predicate-backed table metadata
- `src/sql/`: minimal SQL AST and parser
- `src/planner/`: logical plan structures and SQL-to-plan translation
- `src/execution/`: execution for the current logical-plan subset
Today, the chase subsystem is still the most mature part of the codebase. The
relational and SQL modules are present to create clean extension points for a
broader query-engine architecture.
### Intended Direction ### Intended Direction
The medium-term direction is to evolve this project into a more general The medium-term direction is to evolve this project into a more general
@ -75,6 +91,7 @@ cargo run -- script examples/scripts/ancestor.chase
```text ```text
fact Parent(alice, bob). fact Parent(alice, bob).
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y). rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
sql SELECT * FROM Parent;
run. run.
query Ancestor(?X, ?Y)? query Ancestor(?X, ?Y)?
explain Ancestor(alice, carol)? explain Ancestor(alice, carol)?
@ -100,6 +117,14 @@ Currently supported examples:
SELECT * FROM Parent SELECT * FROM Parent
SELECT c0 FROM Parent SELECT c0 FROM Parent
SELECT c0 FROM Parent WHERE c1 = 'bob' SELECT c0 FROM Parent WHERE c1 = 'bob'
SELECT c0 AS parent_name, 'seed' AS label FROM Parent
```
In the REPL or script runner, use the `sql` command and end the statement with
`;`:
```text
sql SELECT c0 FROM Parent WHERE c1 = 'bob';
``` ```
Current limits: Current limits:
@ -108,7 +133,7 @@ Current limits:
- positional column names such as `c0`, `c1` - positional column names such as `c0`, `c1`
- no joins - no joins
- no aggregates - no aggregates
- no aliases - projection aliases only via `AS`
### Development ### Development

View File

@ -3,6 +3,9 @@
This directory contains example `.geolog` files that use a richer DSL than the This directory contains example `.geolog` files that use a richer DSL than the
minimal `.chase` script language in `examples/scripts/`. minimal `.chase` script language in `examples/scripts/`.
These files are reference material and experiments. They are not currently
wired into the `query-engine` binary, REPL, SQL parser, or planner pipeline.
This README summarizes the Geolog DSL structure as it appears in the examples in This README summarizes the Geolog DSL structure as it appears in the examples in
this directory. It should be read as a practical, example-driven reference, not this directory. It should be read as a practical, example-driven reference, not
as a formal or complete language specification. as a formal or complete language specification.

View File

@ -4,6 +4,7 @@ These scripts can be executed with:
```bash ```bash
make script SCRIPT=examples/scripts/ancestor.chase make script SCRIPT=examples/scripts/ancestor.chase
cargo run -- script examples/scripts/ancestor.chase
``` ```
Available examples: Available examples:

View File

@ -1,5 +1,5 @@
{ {
description = "A playground for experimenting with query engine stuff"; description = "A playground for experimenting with query engine components in Rust";
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";

View File

@ -1,4 +1,7 @@
//! Minimal catalog support for mapping predicates to relational schemas. //! Minimal catalog support for mapping predicates to relational schemas.
//!
//! The current catalog is intentionally lightweight. It infers table schemas
//! from predicate arities and null occurrences in an [`Instance`](crate::chase::Instance).
use std::collections::HashMap; use std::collections::HashMap;
use std::error::Error; use std::error::Error;
@ -7,9 +10,12 @@ use std::fmt;
use crate::chase::{Instance, Term}; use crate::chase::{Instance, Term};
use crate::relational::{DataType, Field, Schema}; use crate::relational::{DataType, Field, Schema};
/// Errors returned by the predicate-backed catalog.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum CatalogError { pub enum CatalogError {
/// The requested predicate-backed table does not exist.
UnknownTable(String), UnknownTable(String),
/// Facts for one predicate do not all have the same arity.
InconsistentArity { InconsistentArity {
table: String, table: String,
expected: usize, expected: usize,
@ -36,26 +42,34 @@ impl fmt::Display for CatalogError {
impl Error for CatalogError {} impl Error for CatalogError {}
/// A minimal catalog that maps predicate names to inferred relational schemas.
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct PredicateCatalog { pub struct PredicateCatalog {
schemas: HashMap<String, Schema>, schemas: HashMap<String, Schema>,
} }
impl PredicateCatalog { impl PredicateCatalog {
/// Create an empty catalog.
pub fn new() -> Self { pub fn new() -> Self {
Self::default() Self::default()
} }
/// Register a table schema explicitly.
pub fn register_table(&mut self, table: impl Into<String>, schema: Schema) { pub fn register_table(&mut self, table: impl Into<String>, schema: Schema) {
self.schemas.insert(table.into(), schema); self.schemas.insert(table.into(), schema);
} }
/// Fetch a schema for a table name.
pub fn schema_for(&self, table: &str) -> Result<&Schema, CatalogError> { pub fn schema_for(&self, table: &str) -> Result<&Schema, CatalogError> {
self.schemas self.schemas
.get(table) .get(table)
.ok_or_else(|| CatalogError::UnknownTable(table.to_string())) .ok_or_else(|| CatalogError::UnknownTable(table.to_string()))
} }
/// Infer table schemas from the predicates present in an instance.
///
/// Each predicate becomes one table, with positional column names `c0`,
/// `c1`, and so on.
pub fn from_instance(instance: &Instance) -> Result<Self, CatalogError> { pub fn from_instance(instance: &Instance) -> Result<Self, CatalogError> {
let mut arities = HashMap::new(); let mut arities = HashMap::new();
let mut nullable_positions: HashMap<String, Vec<bool>> = HashMap::new(); let mut nullable_positions: HashMap<String, Vec<bool>> = HashMap::new();

View File

@ -7,9 +7,12 @@ use crate::chase::{Instance, Term};
use crate::planner::logical::{LogicalExpr, LogicalPlan}; use crate::planner::logical::{LogicalExpr, LogicalPlan};
use crate::relational::{ResultSet, Row, Value}; use crate::relational::{ResultSet, Row, Value};
/// Errors returned by the current logical-plan executor.
#[derive(Debug)] #[derive(Debug)]
pub enum ExecutionError { pub enum ExecutionError {
/// A column reference could not be resolved.
UnknownColumn(String), UnknownColumn(String),
/// The scan layer encountered a variable term where a ground value was expected.
NonGroundScanTerm, NonGroundScanTerm,
} }
@ -26,6 +29,7 @@ impl fmt::Display for ExecutionError {
impl Error for ExecutionError {} impl Error for ExecutionError {}
/// Execute the current logical-plan subset against an instance-backed source.
pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, ExecutionError> { pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, ExecutionError> {
match plan { match plan {
LogicalPlan::Scan { table, schema } => { LogicalPlan::Scan { table, schema } => {

View File

@ -2,11 +2,14 @@
use crate::chase::rule::RuleBuilder; use crate::chase::rule::RuleBuilder;
use crate::chase::{Atom, Rule, Term}; use crate::chase::{Atom, Rule, Term};
use crate::sql::ast::Select;
use crate::sql::parser::parse_select;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Command { pub enum Command {
Fact(Atom), Fact(Atom),
Rule(Rule), Rule(Rule),
Sql(Select),
Run, Run,
Query(Vec<Atom>), Query(Vec<Atom>),
Explain(Vec<Atom>), Explain(Vec<Atom>),
@ -51,6 +54,11 @@ pub fn parse_command(input: &str) -> Result<Command, String> {
return Ok(Command::Help); return Ok(Command::Help);
} }
if let Some(rest) = strip_keyword(trimmed, "sql") {
let select = parse_select(trim_suffix(rest, ';')?).map_err(|err| err.to_string())?;
return Ok(Command::Sql(select));
}
if let Some(rest) = strip_keyword(trimmed, "fact") { if let Some(rest) = strip_keyword(trimmed, "fact") {
let atom = parse_atom(trim_suffix(rest, '.')?)?; let atom = parse_atom(trim_suffix(rest, '.')?)?;
if !atom.is_ground() { if !atom.is_ground() {
@ -367,6 +375,18 @@ mod tests {
} }
} }
#[test]
fn parse_sql_command() {
let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap();
match command {
Command::Sql(select) => {
assert_eq!(select.from, "Parent");
assert!(select.selection.is_some());
}
other => panic!("unexpected command: {:?}", other),
}
}
#[test] #[test]
fn parse_query_command() { fn parse_query_command() {
let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap(); let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap();

View File

@ -2,9 +2,13 @@
use std::fmt; use std::fmt;
use crate::catalog::PredicateCatalog;
use crate::chase::{ use crate::chase::{
Atom, Instance, MaterializedState, Rule, Substitution, find_matches, materialize, Atom, Instance, MaterializedState, Rule, Substitution, find_matches, materialize,
}; };
use crate::execution::execute;
use crate::planner::sql::plan_select;
use crate::relational::ResultSet;
use super::language::{Command, parse_script}; use super::language::{Command, parse_script};
use super::provenance::explain_atom; use super::provenance::explain_atom;
@ -59,6 +63,7 @@ impl Session {
self.rules.push(rule.clone()); self.rules.push(rule.clone());
Ok(format!("Added rule #{}: {}", self.rules.len(), rule)) Ok(format!("Added rule #{}: {}", self.rules.len(), rule))
} }
Command::Sql(select) => self.run_sql(&select),
Command::Run => Ok(self.run_chase()), Command::Run => Ok(self.run_chase()),
Command::Query(query) => Ok(self.run_query(&query)), Command::Query(query) => Ok(self.run_query(&query)),
Command::Explain(query) => Ok(self.explain_query(&query)), Command::Explain(query) => Ok(self.explain_query(&query)),
@ -123,6 +128,14 @@ impl Session {
rendered.join("\n") rendered.join("\n")
} }
fn run_sql(&self, select: &crate::sql::ast::Select) -> Result<String, String> {
let instance = self.active_instance();
let catalog = PredicateCatalog::from_instance(instance).map_err(|err| err.to_string())?;
let plan = plan_select(select, &catalog).map_err(|err| err.to_string())?;
let result = execute(&plan, instance).map_err(|err| err.to_string())?;
Ok(render_result_set(&result))
}
fn explain_query(&self, query: &[Atom]) -> String { fn explain_query(&self, query: &[Atom]) -> String {
let instance = self.active_instance(); let instance = self.active_instance();
let matches = find_matches(instance, query); let matches = find_matches(instance, query);
@ -198,6 +211,7 @@ fn help_text() -> &'static str {
"Commands: "Commands:
fact Parent(alice, bob). fact Parent(alice, bob).
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y). rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
sql SELECT * FROM Parent;
run. run.
query Ancestor(?X, ?Y)? query Ancestor(?X, ?Y)?
explain Ancestor(alice, bob)? explain Ancestor(alice, bob)?
@ -216,6 +230,39 @@ where
rendered rendered
} }
fn render_result_set(result: &ResultSet) -> String {
let mut lines = Vec::new();
lines.push(format!("{} row(s)", result.rows().len()));
if result.schema().is_empty() {
return lines.join("\n");
}
let header = result
.schema()
.fields()
.iter()
.map(|field| field.name().to_string())
.collect::<Vec<_>>()
.join(" | ");
lines.push(header);
let mut rows = result
.rows()
.iter()
.map(|row| {
row.values()
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join(" | ")
})
.collect::<Vec<_>>();
rows.sort();
lines.extend(rows);
lines.join("\n")
}
fn query_variables(query: &[Atom]) -> Vec<String> { fn query_variables(query: &[Atom]) -> Vec<String> {
let mut variables = query let mut variables = query
.iter() .iter()
@ -286,4 +333,37 @@ mod tests {
assert!(output.contains("premise: Ancestor(alice, bob)")); assert!(output.contains("premise: Ancestor(alice, bob)"));
assert!(output.contains("input fact")); assert!(output.contains("input fact"));
} }
#[test]
fn session_runs_sql_query() {
let mut session = Session::new();
let output = session
.execute_script(
"fact Parent(alice, bob).\n\
fact Parent(bob, carol).\n\
sql SELECT c0 FROM Parent WHERE c1 = 'bob';",
)
.unwrap();
assert!(output.contains("1 row(s)"));
assert!(output.contains("c0"));
assert!(output.contains("alice"));
}
#[test]
fn session_runs_sql_query_with_alias_and_literal_projection() {
let mut session = Session::new();
let output = session
.execute_script(
"fact Parent(alice, bob).\n\
fact Parent(bob, carol).\n\
sql SELECT c0 AS parent_name, 'seed' AS label FROM Parent;",
)
.unwrap();
assert!(output.contains("2 row(s)"));
assert!(output.contains("parent_name | label"));
assert!(output.contains("alice | seed"));
assert!(output.contains("bob | seed"));
}
} }

View File

@ -1,28 +1,36 @@
use crate::relational::{ResultSet, Schema, Value}; use crate::relational::{Schema, Value};
/// A logical expression over relational data.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogicalExpr { pub enum LogicalExpr {
/// A column reference.
Column(String), Column(String),
/// A literal value.
Literal(Value), Literal(Value),
/// Equality.
Eq(Box<LogicalExpr>, Box<LogicalExpr>), Eq(Box<LogicalExpr>, Box<LogicalExpr>),
} }
/// A named output expression in a projection.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct NamedExpr { pub struct NamedExpr {
/// Output column name.
pub name: String, pub name: String,
/// Expression to evaluate.
pub expr: LogicalExpr, pub expr: LogicalExpr,
} }
/// A logical plan in the current execution subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogicalPlan { pub enum LogicalPlan {
Scan { /// Read all facts for one predicate-backed table.
table: String, Scan { table: String, schema: Schema },
schema: Schema, /// Filter rows by a predicate.
},
Filter { Filter {
input: Box<LogicalPlan>, input: Box<LogicalPlan>,
predicate: LogicalExpr, predicate: LogicalExpr,
}, },
/// Project a new output schema.
Project { Project {
input: Box<LogicalPlan>, input: Box<LogicalPlan>,
expressions: Vec<NamedExpr>, expressions: Vec<NamedExpr>,
@ -31,6 +39,7 @@ pub enum LogicalPlan {
} }
impl LogicalPlan { impl LogicalPlan {
/// Return the schema produced by this logical plan.
pub fn output_schema(&self) -> &Schema { pub fn output_schema(&self) -> &Schema {
match self { match self {
Self::Scan { schema, .. } => schema, Self::Scan { schema, .. } => schema,
@ -39,9 +48,3 @@ impl LogicalPlan {
} }
} }
} }
impl From<ResultSet> for LogicalPlan {
fn from(_: ResultSet) -> Self {
unreachable!("result sets are execution output, not logical plans")
}
}

View File

@ -1,4 +1,12 @@
//! Logical planning scaffolding. //! Logical planning scaffolding.
//!
//! The planner is split into:
//!
//! - [`logical`]: plan and expression data structures
//! - [`sql`]: translation from SQL AST into the current logical-plan subset
//!
//! At the moment this is intentionally small and only covers the first
//! single-table SQL slice.
pub mod logical; pub mod logical;
pub mod sql; pub mod sql;

View File

@ -3,14 +3,16 @@ use std::fmt;
use crate::catalog::{CatalogError, PredicateCatalog}; use crate::catalog::{CatalogError, PredicateCatalog};
use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr};
use crate::relational::{Field, Schema, Value}; use crate::relational::{DataType, Field, Schema, Value};
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
/// Errors returned when translating SQL AST into a logical plan.
#[derive(Debug)] #[derive(Debug)]
pub enum PlannerError { pub enum PlannerError {
/// Catalog lookup failed.
Catalog(CatalogError), Catalog(CatalogError),
/// A referenced column does not exist in the input schema.
UnknownColumn(String), UnknownColumn(String),
UnsupportedProjection,
} }
impl fmt::Display for PlannerError { impl fmt::Display for PlannerError {
@ -18,9 +20,6 @@ impl fmt::Display for PlannerError {
match self { match self {
Self::Catalog(err) => write!(f, "catalog error: {}", err), Self::Catalog(err) => write!(f, "catalog error: {}", err),
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column), Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
Self::UnsupportedProjection => {
write!(f, "only wildcard and column projections are supported")
}
} }
} }
} }
@ -29,7 +28,7 @@ impl Error for PlannerError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::Catalog(err) => Some(err), Self::Catalog(err) => Some(err),
Self::UnknownColumn(_) | Self::UnsupportedProjection => None, Self::UnknownColumn(_) => None,
} }
} }
} }
@ -40,6 +39,7 @@ impl From<CatalogError> for PlannerError {
} }
} }
/// Plan a parsed `SELECT` statement into the current logical plan subset.
pub fn plan_select( pub fn plan_select(
select: &Select, select: &Select,
catalog: &PredicateCatalog, catalog: &PredicateCatalog,
@ -64,28 +64,21 @@ pub fn plan_select(
let mut expressions = Vec::new(); let mut expressions = Vec::new();
let mut fields = Vec::new(); let mut fields = Vec::new();
for item in &select.projection { for (index, item) in select.projection.iter().enumerate() {
match item { match item {
SelectItem::Expr { expr, alias } => match expr { SelectItem::Expr { expr, alias } => {
Expr::Identifier(name) => { let planned_expr = plan_expr(expr, &scan_schema)?;
let index = scan_schema let output_name = alias
.index_of(name) .clone()
.ok_or_else(|| PlannerError::UnknownColumn(name.clone()))?; .unwrap_or_else(|| default_projection_name(expr, index + 1));
let input_field = &scan_schema.fields()[index]; let (data_type, nullable) = projection_metadata(expr, &scan_schema)?;
let output_name = alias.clone().unwrap_or_else(|| name.clone()); expressions.push(NamedExpr {
expressions.push(NamedExpr { name: output_name.clone(),
name: output_name.clone(), expr: planned_expr,
expr: LogicalExpr::Column(name.clone()), });
}); fields.push(Field::new(output_name, data_type, nullable));
fields.push(Field::new( }
output_name, SelectItem::Wildcard => unreachable!("wildcard projections are handled earlier"),
input_field.data_type().clone(),
input_field.nullable(),
));
}
_ => return Err(PlannerError::UnsupportedProjection),
},
SelectItem::Wildcard => return Err(PlannerError::UnsupportedProjection),
} }
} }
@ -125,6 +118,28 @@ fn plan_literal(literal: &Literal) -> Value {
} }
} }
fn projection_metadata(expr: &Expr, schema: &Schema) -> Result<(DataType, bool), PlannerError> {
match expr {
Expr::Identifier(name) => {
let index = schema
.index_of(name)
.ok_or_else(|| PlannerError::UnknownColumn(name.clone()))?;
let field = &schema.fields()[index];
Ok((field.data_type().clone(), field.nullable()))
}
Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)),
Expr::Literal(Literal::Null) => Ok((DataType::Text, true)),
Expr::Binary { .. } => Ok((DataType::Boolean, true)),
}
}
fn default_projection_name(expr: &Expr, ordinal: usize) -> String {
match expr {
Expr::Identifier(name) => name.clone(),
Expr::Literal(_) | Expr::Binary { .. } => format!("expr{}", ordinal),
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -146,4 +161,25 @@ mod tests {
let plan = plan_select(&select, &catalog).unwrap(); let plan = plan_select(&select, &catalog).unwrap();
assert_eq!(plan.output_schema().len(), 1); assert_eq!(plan.output_schema().len(), 1);
} }
#[test]
fn plans_aliases_and_literal_projection() {
let instance: Instance = vec![Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
)]
.into_iter()
.collect();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select =
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let schema = plan.output_schema();
assert_eq!(schema.len(), 3);
assert_eq!(schema.fields()[0].name(), "parent_name");
assert_eq!(schema.fields()[1].name(), "label");
assert_eq!(schema.fields()[2].name(), "expr3");
assert_eq!(schema.fields()[1].data_type(), &DataType::Text);
}
} }

View File

@ -1,4 +1,10 @@
//! Relational data model scaffolding for future SQL and planner work. //! Relational data model scaffolding for SQL and planner work.
//!
//! This module provides the current relational execution vocabulary:
//!
//! - [`Schema`] and [`Field`] for column metadata
//! - [`Value`] for scalar values
//! - [`Row`] and [`ResultSet`] for execution output
mod row; mod row;
mod schema; mod schema;

View File

@ -1,24 +1,29 @@
use super::{Schema, Value}; use super::{Schema, Value};
/// One row of relational output values.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Row { pub struct Row {
values: Vec<Value>, values: Vec<Value>,
} }
impl Row { impl Row {
/// Create a row from ordered values.
pub fn new(values: Vec<Value>) -> Self { pub fn new(values: Vec<Value>) -> Self {
Self { values } Self { values }
} }
/// Return all values in order.
pub fn values(&self) -> &[Value] { pub fn values(&self) -> &[Value] {
&self.values &self.values
} }
/// Return the value at a column index.
pub fn get(&self, index: usize) -> Option<&Value> { pub fn get(&self, index: usize) -> Option<&Value> {
self.values.get(index) self.values.get(index)
} }
} }
/// A relational query result with a schema and ordered rows.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResultSet { pub struct ResultSet {
schema: Schema, schema: Schema,
@ -26,14 +31,17 @@ pub struct ResultSet {
} }
impl ResultSet { impl ResultSet {
/// Create a result set from a schema and rows.
pub fn new(schema: Schema, rows: Vec<Row>) -> Self { pub fn new(schema: Schema, rows: Vec<Row>) -> Self {
Self { schema, rows } Self { schema, rows }
} }
/// Return the output schema.
pub fn schema(&self) -> &Schema { pub fn schema(&self) -> &Schema {
&self.schema &self.schema
} }
/// Return all output rows.
pub fn rows(&self) -> &[Row] { pub fn rows(&self) -> &[Row] {
&self.rows &self.rows
} }

View File

@ -1,11 +1,15 @@
use std::fmt; use std::fmt;
/// A scalar type supported by the current relational layer.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum DataType { pub enum DataType {
/// UTF-8 text values.
Text, Text,
/// Boolean values.
Boolean, Boolean,
} }
/// Metadata for one output column in a schema.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Field { pub struct Field {
name: String, name: String,
@ -14,6 +18,7 @@ pub struct Field {
} }
impl Field { impl Field {
/// Create a field definition.
pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self { pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
Self { Self {
name: name.into(), name: name.into(),
@ -22,41 +27,50 @@ impl Field {
} }
} }
/// Return the field name.
pub fn name(&self) -> &str { pub fn name(&self) -> &str {
&self.name &self.name
} }
/// Return the field data type.
pub fn data_type(&self) -> &DataType { pub fn data_type(&self) -> &DataType {
&self.data_type &self.data_type
} }
/// Return whether the field may contain null values.
pub fn nullable(&self) -> bool { pub fn nullable(&self) -> bool {
self.nullable self.nullable
} }
} }
/// An ordered set of named output fields.
#[derive(Debug, Clone, PartialEq, Eq, Default)] #[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Schema { pub struct Schema {
fields: Vec<Field>, fields: Vec<Field>,
} }
impl Schema { impl Schema {
/// Create a schema from a list of fields.
pub fn new(fields: Vec<Field>) -> Self { pub fn new(fields: Vec<Field>) -> Self {
Self { fields } Self { fields }
} }
/// Return all fields in order.
pub fn fields(&self) -> &[Field] { pub fn fields(&self) -> &[Field] {
&self.fields &self.fields
} }
/// Return the number of fields.
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.fields.len() self.fields.len()
} }
/// Return whether the schema has no fields.
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.fields.is_empty() self.fields.is_empty()
} }
/// Look up a field index by column name.
pub fn index_of(&self, name: &str) -> Option<usize> { pub fn index_of(&self, name: &str) -> Option<usize> {
self.fields.iter().position(|field| field.name() == name) self.fields.iter().position(|field| field.name() == name)
} }

View File

@ -1,21 +1,30 @@
use std::fmt; use std::fmt;
/// A scalar value in the current relational execution layer.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Value { pub enum Value {
/// Textual data.
Text(String), Text(String),
/// Boolean data.
Boolean(bool), Boolean(bool),
/// SQL-style null.
Null, Null,
} }
impl Value { impl Value {
/// Create a text value.
pub fn text(value: impl Into<String>) -> Self { pub fn text(value: impl Into<String>) -> Self {
Self::Text(value.into()) Self::Text(value.into())
} }
/// Return whether this value is null.
pub fn is_null(&self) -> bool { pub fn is_null(&self) -> bool {
matches!(self, Self::Null) matches!(self, Self::Null)
} }
/// Evaluate SQL equality semantics for two scalar values.
///
/// Returns `None` when either side is `NULL`.
pub fn sql_eq(&self, other: &Self) -> Option<bool> { pub fn sql_eq(&self, other: &Self) -> Option<bool> {
match (self, other) { match (self, other) {
(Self::Null, _) | (_, Self::Null) => None, (Self::Null, _) | (_, Self::Null) => None,

View File

@ -1,20 +1,31 @@
/// A parsed `SELECT-FROM-WHERE` statement in the current SQL subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Select { pub struct Select {
/// Output expressions requested by the query.
pub projection: Vec<SelectItem>, pub projection: Vec<SelectItem>,
/// Source table name.
pub from: String, pub from: String,
/// Optional filter predicate.
pub selection: Option<Expr>, pub selection: Option<Expr>,
} }
/// One item in a `SELECT` projection list.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum SelectItem { pub enum SelectItem {
/// `*`
Wildcard, Wildcard,
/// A projected expression, optionally renamed with `AS`.
Expr { expr: Expr, alias: Option<String> }, Expr { expr: Expr, alias: Option<String> },
} }
/// A SQL expression in the current subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr { pub enum Expr {
/// A column reference.
Identifier(String), Identifier(String),
/// A literal value.
Literal(Literal), Literal(Literal),
/// A binary expression.
Binary { Binary {
left: Box<Expr>, left: Box<Expr>,
op: BinaryOp, op: BinaryOp,
@ -22,13 +33,18 @@ pub enum Expr {
}, },
} }
/// A SQL literal in the current subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Literal { pub enum Literal {
/// A string literal.
String(String), String(String),
/// The `NULL` literal.
Null, Null,
} }
/// A binary operator in the current subset.
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp { pub enum BinaryOp {
/// Equality.
Eq, Eq,
} }

View File

@ -1,4 +1,12 @@
//! Minimal SQL front-end scaffolding. //! Minimal SQL front-end scaffolding.
//!
//! The current SQL layer supports a narrow `SELECT-FROM-WHERE` subset over one
//! predicate-backed table. It provides:
//!
//! - a small AST in [`ast`]
//! - a parser in [`parser`]
//!
//! The resulting AST is translated into logical plans by `crate::planner::sql`.
pub mod ast; pub mod ast;
pub mod parser; pub mod parser;

View File

@ -3,6 +3,7 @@ use std::fmt;
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
/// Errors returned by the minimal SQL parser.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError { pub enum ParseError {
UnexpectedEnd, UnexpectedEnd,
@ -31,6 +32,7 @@ enum Token {
Select, Select,
From, From,
Where, Where,
As,
Null, Null,
Identifier(String), Identifier(String),
String(String), String(String),
@ -39,6 +41,7 @@ enum Token {
Eq, Eq,
} }
/// Parse a `SELECT-FROM-WHERE` query in the current SQL subset.
pub fn parse_select(input: &str) -> Result<Select, ParseError> { pub fn parse_select(input: &str) -> Result<Select, ParseError> {
let tokens = tokenize(input)?; let tokens = tokenize(input)?;
let mut parser = Parser::new(tokens); let mut parser = Parser::new(tokens);
@ -82,13 +85,21 @@ impl Parser {
let mut items = Vec::new(); let mut items = Vec::new();
loop { loop {
let item = match self.next().ok_or(ParseError::UnexpectedEnd)? { let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? {
Token::Star => SelectItem::Wildcard, Token::Star => {
Token::Identifier(name) => SelectItem::Expr { self.index += 1;
expr: Expr::Identifier(name), SelectItem::Wildcard
alias: None, }
}, _ => {
other => return Err(ParseError::UnexpectedToken(render_token(&other))), let expr = self.parse_operand()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
SelectItem::Expr { expr, alias }
}
}; };
items.push(item); items.push(item);
@ -185,6 +196,7 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
"SELECT" => Token::Select, "SELECT" => Token::Select,
"FROM" => Token::From, "FROM" => Token::From,
"WHERE" => Token::Where, "WHERE" => Token::Where,
"AS" => Token::As,
"NULL" => Token::Null, "NULL" => Token::Null,
_ => Token::Identifier(ident), _ => Token::Identifier(ident),
}; };
@ -251,6 +263,7 @@ fn render_token(token: &Token) -> String {
Token::Select => "SELECT".to_string(), Token::Select => "SELECT".to_string(),
Token::From => "FROM".to_string(), Token::From => "FROM".to_string(),
Token::Where => "WHERE".to_string(), Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(),
Token::Null => "NULL".to_string(), Token::Null => "NULL".to_string(),
Token::Identifier(name) => name.clone(), Token::Identifier(name) => name.clone(),
Token::String(value) => format!("'{}'", value), Token::String(value) => format!("'{}'", value),
@ -272,4 +285,33 @@ mod tests {
assert_eq!(select.projection.len(), 1); assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some()); assert!(select.selection.is_some());
} }
#[test]
fn parses_projection_aliases_and_literals() {
let select =
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
assert_eq!(select.projection.len(), 3);
assert_eq!(
select.projection[0],
SelectItem::Expr {
expr: Expr::Identifier("c0".to_string()),
alias: Some("parent_name".to_string()),
}
);
assert_eq!(
select.projection[1],
SelectItem::Expr {
expr: Expr::Literal(Literal::String("seed".to_string())),
alias: Some("label".to_string()),
}
);
assert_eq!(
select.projection[2],
SelectItem::Expr {
expr: Expr::Literal(Literal::Null),
alias: None,
}
);
}
} }

View File

@ -65,3 +65,21 @@ fn select_where_filters_rows() {
assert_eq!(result.rows().len(), 1); assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice"); assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
} }
#[test]
fn select_alias_and_literal_projection_shape_output() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select =
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.schema().fields()[0].name(), "parent_name");
assert_eq!(result.schema().fields()[1].name(), "label");
assert_eq!(result.schema().fields()[2].name(), "expr3");
assert_eq!(result.rows().len(), 2);
assert_eq!(format!("{}", result.rows()[0].values()[1]), "seed");
assert_eq!(format!("{}", result.rows()[0].values()[2]), "NULL");
}