From 23cbc6a4bf0e58544b2aa6d5d7469aec94750cf2 Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Thu, 9 Apr 2026 12:50:06 +0200 Subject: [PATCH] Add SQL projection aliases and literal expressions support --- Makefile | 10 +++-- README.md | 35 ++++++++++++--- examples/geolog/README.md | 3 ++ examples/scripts/README.md | 1 + flake.nix | 2 +- src/catalog/mod.rs | 14 ++++++ src/execution/mod.rs | 4 ++ src/frontend/language.rs | 20 +++++++++ src/frontend/session.rs | 80 +++++++++++++++++++++++++++++++++ src/planner/logical.rs | 25 ++++++----- src/planner/mod.rs | 8 ++++ src/planner/sql.rs | 90 ++++++++++++++++++++++++++----------- src/relational/mod.rs | 8 +++- src/relational/row.rs | 8 ++++ src/relational/schema.rs | 14 ++++++ src/relational/value.rs | 9 ++++ src/sql/ast.rs | 16 +++++++ src/sql/mod.rs | 8 ++++ src/sql/parser.rs | 56 ++++++++++++++++++++--- tests/sql_pipeline_tests.rs | 18 ++++++++ 20 files changed, 374 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index cd67631..c7954a6 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ install-deps: ## Install development dependencies .PHONY: lint lint: format ## Run the linters @echo "Linting Rust files..." - @DEBUG_PROJ=$(DEBUG_PROJ) cargo clippy -- -D warnings -D clippy::unwrap_used -D clippy::expect_used + @DEBUG_PROJ=$(DEBUG_PROJ) cargo clippy --all-targets --all-features -- -D warnings .PHONY: audit audit: ## Run security audit on Rust dependencies @@ -92,7 +92,7 @@ docs: format ## Generate the documentation .PHONY: fix-lint fix-lint: ## Fix the linter warnings @echo "Fixing linter warnings..." - @cargo clippy --fix --allow-dirty --allow-staged --all-targets --workspace + @cargo clippy --fix --allow-dirty --allow-staged --all-targets --all-features -- -D warnings .PHONY: nextest nextest: ## Run tests using nextest @@ -116,5 +116,9 @@ test-hooks: ## Test Git hooks on all files @pre-commit run --all-files --show-diff-on-failure .PHONY: check -check: format lint test ## Run format, lint, and test +check: format ## Run format, lint, and test + @echo "Checking formatting..." + @cargo fmt --check + @$(MAKE) lint + @$(MAKE) test @echo "All checks passed." diff --git a/README.md b/README.md index 39278f4..c910330 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ An experimental Rust project for building query-engine components. -Right now the repository is centered on a chase-based reasoning core plus a -small interactive frontend, plus an early relational/SQL scaffold. The broader -target shape is a query engine with clearer front-end, planning, optimization, -and execution boundaries. +Right now the repository is centered on a chase-based reasoning core, a small +interactive frontend, and an early relational/SQL scaffold. The broader target +shape is a query engine with clearer front-end, planning, optimization, and +execution boundaries. ### Current scope @@ -16,6 +16,22 @@ and execution boundaries. - Relational schema, catalog, logical-plan, and execution scaffolding - A minimal SQL slice for single-table `SELECT-FROM-WHERE` queries +### Architecture + +The repository is currently organized around a few clear subsystems: + +- `src/chase/`: rule-engine data structures and chase execution +- `src/frontend/`: REPL, script, GUI, and explanation rendering +- `src/relational/`: schemas, values, rows, and result sets +- `src/catalog/`: predicate-backed table metadata +- `src/sql/`: minimal SQL AST and parser +- `src/planner/`: logical plan structures and SQL-to-plan translation +- `src/execution/`: execution for the current logical-plan subset + +Today, the chase subsystem is still the most mature part of the codebase. The +relational and SQL modules are present to create clean extension points for a +broader query-engine architecture. + ### Intended Direction The medium-term direction is to evolve this project into a more general @@ -75,6 +91,7 @@ cargo run -- script examples/scripts/ancestor.chase ```text fact Parent(alice, bob). rule Parent(?X, ?Y) -> Ancestor(?X, ?Y). +sql SELECT * FROM Parent; run. query Ancestor(?X, ?Y)? explain Ancestor(alice, carol)? @@ -100,6 +117,14 @@ Currently supported examples: SELECT * FROM Parent SELECT c0 FROM Parent SELECT c0 FROM Parent WHERE c1 = 'bob' +SELECT c0 AS parent_name, 'seed' AS label FROM Parent +``` + +In the REPL or script runner, use the `sql` command and end the statement with +`;`: + +```text +sql SELECT c0 FROM Parent WHERE c1 = 'bob'; ``` Current limits: @@ -108,7 +133,7 @@ Current limits: - positional column names such as `c0`, `c1` - no joins - no aggregates -- no aliases +- projection aliases only via `AS` ### Development diff --git a/examples/geolog/README.md b/examples/geolog/README.md index 0f1d39d..36b7efc 100644 --- a/examples/geolog/README.md +++ b/examples/geolog/README.md @@ -3,6 +3,9 @@ This directory contains example `.geolog` files that use a richer DSL than the minimal `.chase` script language in `examples/scripts/`. +These files are reference material and experiments. They are not currently +wired into the `query-engine` binary, REPL, SQL parser, or planner pipeline. + This README summarizes the Geolog DSL structure as it appears in the examples in this directory. It should be read as a practical, example-driven reference, not as a formal or complete language specification. diff --git a/examples/scripts/README.md b/examples/scripts/README.md index 580863e..783fcf4 100644 --- a/examples/scripts/README.md +++ b/examples/scripts/README.md @@ -4,6 +4,7 @@ These scripts can be executed with: ```bash make script SCRIPT=examples/scripts/ancestor.chase +cargo run -- script examples/scripts/ancestor.chase ``` Available examples: diff --git a/flake.nix b/flake.nix index 2061e28..e743f03 100644 --- a/flake.nix +++ b/flake.nix @@ -1,5 +1,5 @@ { - description = "A playground for experimenting with query engine stuff"; + description = "A playground for experimenting with query engine components in Rust"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index d4fa332..1de6769 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -1,4 +1,7 @@ //! Minimal catalog support for mapping predicates to relational schemas. +//! +//! The current catalog is intentionally lightweight. It infers table schemas +//! from predicate arities and null occurrences in an [`Instance`](crate::chase::Instance). use std::collections::HashMap; use std::error::Error; @@ -7,9 +10,12 @@ use std::fmt; use crate::chase::{Instance, Term}; use crate::relational::{DataType, Field, Schema}; +/// Errors returned by the predicate-backed catalog. #[derive(Debug, Clone, PartialEq, Eq)] pub enum CatalogError { + /// The requested predicate-backed table does not exist. UnknownTable(String), + /// Facts for one predicate do not all have the same arity. InconsistentArity { table: String, expected: usize, @@ -36,26 +42,34 @@ impl fmt::Display for CatalogError { impl Error for CatalogError {} +/// A minimal catalog that maps predicate names to inferred relational schemas. #[derive(Debug, Clone, Default)] pub struct PredicateCatalog { schemas: HashMap, } impl PredicateCatalog { + /// Create an empty catalog. pub fn new() -> Self { Self::default() } + /// Register a table schema explicitly. pub fn register_table(&mut self, table: impl Into, schema: Schema) { self.schemas.insert(table.into(), schema); } + /// Fetch a schema for a table name. pub fn schema_for(&self, table: &str) -> Result<&Schema, CatalogError> { self.schemas .get(table) .ok_or_else(|| CatalogError::UnknownTable(table.to_string())) } + /// Infer table schemas from the predicates present in an instance. + /// + /// Each predicate becomes one table, with positional column names `c0`, + /// `c1`, and so on. pub fn from_instance(instance: &Instance) -> Result { let mut arities = HashMap::new(); let mut nullable_positions: HashMap> = HashMap::new(); diff --git a/src/execution/mod.rs b/src/execution/mod.rs index ec52930..95d8743 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -7,9 +7,12 @@ use crate::chase::{Instance, Term}; use crate::planner::logical::{LogicalExpr, LogicalPlan}; use crate::relational::{ResultSet, Row, Value}; +/// Errors returned by the current logical-plan executor. #[derive(Debug)] pub enum ExecutionError { + /// A column reference could not be resolved. UnknownColumn(String), + /// The scan layer encountered a variable term where a ground value was expected. NonGroundScanTerm, } @@ -26,6 +29,7 @@ impl fmt::Display for ExecutionError { impl Error for ExecutionError {} +/// Execute the current logical-plan subset against an instance-backed source. pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { match plan { LogicalPlan::Scan { table, schema } => { diff --git a/src/frontend/language.rs b/src/frontend/language.rs index 11bce08..d031e3a 100644 --- a/src/frontend/language.rs +++ b/src/frontend/language.rs @@ -2,11 +2,14 @@ use crate::chase::rule::RuleBuilder; use crate::chase::{Atom, Rule, Term}; +use crate::sql::ast::Select; +use crate::sql::parser::parse_select; #[derive(Debug, Clone)] pub enum Command { Fact(Atom), Rule(Rule), + Sql(Select), Run, Query(Vec), Explain(Vec), @@ -51,6 +54,11 @@ pub fn parse_command(input: &str) -> Result { return Ok(Command::Help); } + if let Some(rest) = strip_keyword(trimmed, "sql") { + let select = parse_select(trim_suffix(rest, ';')?).map_err(|err| err.to_string())?; + return Ok(Command::Sql(select)); + } + if let Some(rest) = strip_keyword(trimmed, "fact") { let atom = parse_atom(trim_suffix(rest, '.')?)?; if !atom.is_ground() { @@ -367,6 +375,18 @@ mod tests { } } + #[test] + fn parse_sql_command() { + let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap(); + match command { + Command::Sql(select) => { + assert_eq!(select.from, "Parent"); + assert!(select.selection.is_some()); + } + other => panic!("unexpected command: {:?}", other), + } + } + #[test] fn parse_query_command() { let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap(); diff --git a/src/frontend/session.rs b/src/frontend/session.rs index 7dde270..f109f2b 100644 --- a/src/frontend/session.rs +++ b/src/frontend/session.rs @@ -2,9 +2,13 @@ use std::fmt; +use crate::catalog::PredicateCatalog; use crate::chase::{ Atom, Instance, MaterializedState, Rule, Substitution, find_matches, materialize, }; +use crate::execution::execute; +use crate::planner::sql::plan_select; +use crate::relational::ResultSet; use super::language::{Command, parse_script}; use super::provenance::explain_atom; @@ -59,6 +63,7 @@ impl Session { self.rules.push(rule.clone()); Ok(format!("Added rule #{}: {}", self.rules.len(), rule)) } + Command::Sql(select) => self.run_sql(&select), Command::Run => Ok(self.run_chase()), Command::Query(query) => Ok(self.run_query(&query)), Command::Explain(query) => Ok(self.explain_query(&query)), @@ -123,6 +128,14 @@ impl Session { rendered.join("\n") } + fn run_sql(&self, select: &crate::sql::ast::Select) -> Result { + let instance = self.active_instance(); + let catalog = PredicateCatalog::from_instance(instance).map_err(|err| err.to_string())?; + let plan = plan_select(select, &catalog).map_err(|err| err.to_string())?; + let result = execute(&plan, instance).map_err(|err| err.to_string())?; + Ok(render_result_set(&result)) + } + fn explain_query(&self, query: &[Atom]) -> String { let instance = self.active_instance(); let matches = find_matches(instance, query); @@ -198,6 +211,7 @@ fn help_text() -> &'static str { "Commands: fact Parent(alice, bob). rule Parent(?X, ?Y) -> Ancestor(?X, ?Y). +sql SELECT * FROM Parent; run. query Ancestor(?X, ?Y)? explain Ancestor(alice, bob)? @@ -216,6 +230,39 @@ where rendered } +fn render_result_set(result: &ResultSet) -> String { + let mut lines = Vec::new(); + lines.push(format!("{} row(s)", result.rows().len())); + + if result.schema().is_empty() { + return lines.join("\n"); + } + + let header = result + .schema() + .fields() + .iter() + .map(|field| field.name().to_string()) + .collect::>() + .join(" | "); + lines.push(header); + + let mut rows = result + .rows() + .iter() + .map(|row| { + row.values() + .iter() + .map(ToString::to_string) + .collect::>() + .join(" | ") + }) + .collect::>(); + rows.sort(); + lines.extend(rows); + lines.join("\n") +} + fn query_variables(query: &[Atom]) -> Vec { let mut variables = query .iter() @@ -286,4 +333,37 @@ mod tests { assert!(output.contains("premise: Ancestor(alice, bob)")); assert!(output.contains("input fact")); } + + #[test] + fn session_runs_sql_query() { + let mut session = Session::new(); + let output = session + .execute_script( + "fact Parent(alice, bob).\n\ + fact Parent(bob, carol).\n\ + sql SELECT c0 FROM Parent WHERE c1 = 'bob';", + ) + .unwrap(); + + assert!(output.contains("1 row(s)")); + assert!(output.contains("c0")); + assert!(output.contains("alice")); + } + + #[test] + fn session_runs_sql_query_with_alias_and_literal_projection() { + let mut session = Session::new(); + let output = session + .execute_script( + "fact Parent(alice, bob).\n\ + fact Parent(bob, carol).\n\ + sql SELECT c0 AS parent_name, 'seed' AS label FROM Parent;", + ) + .unwrap(); + + assert!(output.contains("2 row(s)")); + assert!(output.contains("parent_name | label")); + assert!(output.contains("alice | seed")); + assert!(output.contains("bob | seed")); + } } diff --git a/src/planner/logical.rs b/src/planner/logical.rs index 3d36766..bd4a757 100644 --- a/src/planner/logical.rs +++ b/src/planner/logical.rs @@ -1,28 +1,36 @@ -use crate::relational::{ResultSet, Schema, Value}; +use crate::relational::{Schema, Value}; +/// A logical expression over relational data. #[derive(Debug, Clone, PartialEq, Eq)] pub enum LogicalExpr { + /// A column reference. Column(String), + /// A literal value. Literal(Value), + /// Equality. Eq(Box, Box), } +/// A named output expression in a projection. #[derive(Debug, Clone, PartialEq, Eq)] pub struct NamedExpr { + /// Output column name. pub name: String, + /// Expression to evaluate. pub expr: LogicalExpr, } +/// A logical plan in the current execution subset. #[derive(Debug, Clone, PartialEq, Eq)] pub enum LogicalPlan { - Scan { - table: String, - schema: Schema, - }, + /// Read all facts for one predicate-backed table. + Scan { table: String, schema: Schema }, + /// Filter rows by a predicate. Filter { input: Box, predicate: LogicalExpr, }, + /// Project a new output schema. Project { input: Box, expressions: Vec, @@ -31,6 +39,7 @@ pub enum LogicalPlan { } impl LogicalPlan { + /// Return the schema produced by this logical plan. pub fn output_schema(&self) -> &Schema { match self { Self::Scan { schema, .. } => schema, @@ -39,9 +48,3 @@ impl LogicalPlan { } } } - -impl From for LogicalPlan { - fn from(_: ResultSet) -> Self { - unreachable!("result sets are execution output, not logical plans") - } -} diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 018b572..84f6f62 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -1,4 +1,12 @@ //! Logical planning scaffolding. +//! +//! The planner is split into: +//! +//! - [`logical`]: plan and expression data structures +//! - [`sql`]: translation from SQL AST into the current logical-plan subset +//! +//! At the moment this is intentionally small and only covers the first +//! single-table SQL slice. pub mod logical; pub mod sql; diff --git a/src/planner/sql.rs b/src/planner/sql.rs index 9f5cd4b..b30903b 100644 --- a/src/planner/sql.rs +++ b/src/planner/sql.rs @@ -3,14 +3,16 @@ use std::fmt; use crate::catalog::{CatalogError, PredicateCatalog}; use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; -use crate::relational::{Field, Schema, Value}; +use crate::relational::{DataType, Field, Schema, Value}; use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; +/// Errors returned when translating SQL AST into a logical plan. #[derive(Debug)] pub enum PlannerError { + /// Catalog lookup failed. Catalog(CatalogError), + /// A referenced column does not exist in the input schema. UnknownColumn(String), - UnsupportedProjection, } impl fmt::Display for PlannerError { @@ -18,9 +20,6 @@ impl fmt::Display for PlannerError { match self { Self::Catalog(err) => write!(f, "catalog error: {}", err), Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column), - Self::UnsupportedProjection => { - write!(f, "only wildcard and column projections are supported") - } } } } @@ -29,7 +28,7 @@ impl Error for PlannerError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Catalog(err) => Some(err), - Self::UnknownColumn(_) | Self::UnsupportedProjection => None, + Self::UnknownColumn(_) => None, } } } @@ -40,6 +39,7 @@ impl From for PlannerError { } } +/// Plan a parsed `SELECT` statement into the current logical plan subset. pub fn plan_select( select: &Select, catalog: &PredicateCatalog, @@ -64,28 +64,21 @@ pub fn plan_select( let mut expressions = Vec::new(); let mut fields = Vec::new(); - for item in &select.projection { + for (index, item) in select.projection.iter().enumerate() { match item { - SelectItem::Expr { expr, alias } => match expr { - Expr::Identifier(name) => { - let index = scan_schema - .index_of(name) - .ok_or_else(|| PlannerError::UnknownColumn(name.clone()))?; - let input_field = &scan_schema.fields()[index]; - let output_name = alias.clone().unwrap_or_else(|| name.clone()); - expressions.push(NamedExpr { - name: output_name.clone(), - expr: LogicalExpr::Column(name.clone()), - }); - fields.push(Field::new( - output_name, - input_field.data_type().clone(), - input_field.nullable(), - )); - } - _ => return Err(PlannerError::UnsupportedProjection), - }, - SelectItem::Wildcard => return Err(PlannerError::UnsupportedProjection), + SelectItem::Expr { expr, alias } => { + let planned_expr = plan_expr(expr, &scan_schema)?; + let output_name = alias + .clone() + .unwrap_or_else(|| default_projection_name(expr, index + 1)); + let (data_type, nullable) = projection_metadata(expr, &scan_schema)?; + expressions.push(NamedExpr { + name: output_name.clone(), + expr: planned_expr, + }); + fields.push(Field::new(output_name, data_type, nullable)); + } + SelectItem::Wildcard => unreachable!("wildcard projections are handled earlier"), } } @@ -125,6 +118,28 @@ fn plan_literal(literal: &Literal) -> Value { } } +fn projection_metadata(expr: &Expr, schema: &Schema) -> Result<(DataType, bool), PlannerError> { + match expr { + Expr::Identifier(name) => { + let index = schema + .index_of(name) + .ok_or_else(|| PlannerError::UnknownColumn(name.clone()))?; + let field = &schema.fields()[index]; + Ok((field.data_type().clone(), field.nullable())) + } + Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)), + Expr::Literal(Literal::Null) => Ok((DataType::Text, true)), + Expr::Binary { .. } => Ok((DataType::Boolean, true)), + } +} + +fn default_projection_name(expr: &Expr, ordinal: usize) -> String { + match expr { + Expr::Identifier(name) => name.clone(), + Expr::Literal(_) | Expr::Binary { .. } => format!("expr{}", ordinal), + } +} + #[cfg(test)] mod tests { use super::*; @@ -146,4 +161,25 @@ mod tests { let plan = plan_select(&select, &catalog).unwrap(); assert_eq!(plan.output_schema().len(), 1); } + + #[test] + fn plans_aliases_and_literal_projection() { + let instance: Instance = vec![Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + )] + .into_iter() + .collect(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = + parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let schema = plan.output_schema(); + assert_eq!(schema.len(), 3); + assert_eq!(schema.fields()[0].name(), "parent_name"); + assert_eq!(schema.fields()[1].name(), "label"); + assert_eq!(schema.fields()[2].name(), "expr3"); + assert_eq!(schema.fields()[1].data_type(), &DataType::Text); + } } diff --git a/src/relational/mod.rs b/src/relational/mod.rs index 8bd29b5..1de19d6 100644 --- a/src/relational/mod.rs +++ b/src/relational/mod.rs @@ -1,4 +1,10 @@ -//! Relational data model scaffolding for future SQL and planner work. +//! Relational data model scaffolding for SQL and planner work. +//! +//! This module provides the current relational execution vocabulary: +//! +//! - [`Schema`] and [`Field`] for column metadata +//! - [`Value`] for scalar values +//! - [`Row`] and [`ResultSet`] for execution output mod row; mod schema; diff --git a/src/relational/row.rs b/src/relational/row.rs index d347ed0..359f45a 100644 --- a/src/relational/row.rs +++ b/src/relational/row.rs @@ -1,24 +1,29 @@ use super::{Schema, Value}; +/// One row of relational output values. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Row { values: Vec, } impl Row { + /// Create a row from ordered values. pub fn new(values: Vec) -> Self { Self { values } } + /// Return all values in order. pub fn values(&self) -> &[Value] { &self.values } + /// Return the value at a column index. pub fn get(&self, index: usize) -> Option<&Value> { self.values.get(index) } } +/// A relational query result with a schema and ordered rows. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ResultSet { schema: Schema, @@ -26,14 +31,17 @@ pub struct ResultSet { } impl ResultSet { + /// Create a result set from a schema and rows. pub fn new(schema: Schema, rows: Vec) -> Self { Self { schema, rows } } + /// Return the output schema. pub fn schema(&self) -> &Schema { &self.schema } + /// Return all output rows. pub fn rows(&self) -> &[Row] { &self.rows } diff --git a/src/relational/schema.rs b/src/relational/schema.rs index 38279b9..7f83c98 100644 --- a/src/relational/schema.rs +++ b/src/relational/schema.rs @@ -1,11 +1,15 @@ use std::fmt; +/// A scalar type supported by the current relational layer. #[derive(Debug, Clone, PartialEq, Eq)] pub enum DataType { + /// UTF-8 text values. Text, + /// Boolean values. Boolean, } +/// Metadata for one output column in a schema. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Field { name: String, @@ -14,6 +18,7 @@ pub struct Field { } impl Field { + /// Create a field definition. pub fn new(name: impl Into, data_type: DataType, nullable: bool) -> Self { Self { name: name.into(), @@ -22,41 +27,50 @@ impl Field { } } + /// Return the field name. pub fn name(&self) -> &str { &self.name } + /// Return the field data type. pub fn data_type(&self) -> &DataType { &self.data_type } + /// Return whether the field may contain null values. pub fn nullable(&self) -> bool { self.nullable } } +/// An ordered set of named output fields. #[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct Schema { fields: Vec, } impl Schema { + /// Create a schema from a list of fields. pub fn new(fields: Vec) -> Self { Self { fields } } + /// Return all fields in order. pub fn fields(&self) -> &[Field] { &self.fields } + /// Return the number of fields. pub fn len(&self) -> usize { self.fields.len() } + /// Return whether the schema has no fields. pub fn is_empty(&self) -> bool { self.fields.is_empty() } + /// Look up a field index by column name. pub fn index_of(&self, name: &str) -> Option { self.fields.iter().position(|field| field.name() == name) } diff --git a/src/relational/value.rs b/src/relational/value.rs index 0d37b6c..a750d50 100644 --- a/src/relational/value.rs +++ b/src/relational/value.rs @@ -1,21 +1,30 @@ use std::fmt; +/// A scalar value in the current relational execution layer. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Value { + /// Textual data. Text(String), + /// Boolean data. Boolean(bool), + /// SQL-style null. Null, } impl Value { + /// Create a text value. pub fn text(value: impl Into) -> Self { Self::Text(value.into()) } + /// Return whether this value is null. pub fn is_null(&self) -> bool { matches!(self, Self::Null) } + /// Evaluate SQL equality semantics for two scalar values. + /// + /// Returns `None` when either side is `NULL`. pub fn sql_eq(&self, other: &Self) -> Option { match (self, other) { (Self::Null, _) | (_, Self::Null) => None, diff --git a/src/sql/ast.rs b/src/sql/ast.rs index 5643624..6c5c8fb 100644 --- a/src/sql/ast.rs +++ b/src/sql/ast.rs @@ -1,20 +1,31 @@ +/// A parsed `SELECT-FROM-WHERE` statement in the current SQL subset. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Select { + /// Output expressions requested by the query. pub projection: Vec, + /// Source table name. pub from: String, + /// Optional filter predicate. pub selection: Option, } +/// One item in a `SELECT` projection list. #[derive(Debug, Clone, PartialEq, Eq)] pub enum SelectItem { + /// `*` Wildcard, + /// A projected expression, optionally renamed with `AS`. Expr { expr: Expr, alias: Option }, } +/// A SQL expression in the current subset. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Expr { + /// A column reference. Identifier(String), + /// A literal value. Literal(Literal), + /// A binary expression. Binary { left: Box, op: BinaryOp, @@ -22,13 +33,18 @@ pub enum Expr { }, } +/// A SQL literal in the current subset. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Literal { + /// A string literal. String(String), + /// The `NULL` literal. Null, } +/// A binary operator in the current subset. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BinaryOp { + /// Equality. Eq, } diff --git a/src/sql/mod.rs b/src/sql/mod.rs index e0ab5b1..3e9f3f8 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -1,4 +1,12 @@ //! Minimal SQL front-end scaffolding. +//! +//! The current SQL layer supports a narrow `SELECT-FROM-WHERE` subset over one +//! predicate-backed table. It provides: +//! +//! - a small AST in [`ast`] +//! - a parser in [`parser`] +//! +//! The resulting AST is translated into logical plans by `crate::planner::sql`. pub mod ast; pub mod parser; diff --git a/src/sql/parser.rs b/src/sql/parser.rs index 6960e93..fc893e4 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -3,6 +3,7 @@ use std::fmt; use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; +/// Errors returned by the minimal SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParseError { UnexpectedEnd, @@ -31,6 +32,7 @@ enum Token { Select, From, Where, + As, Null, Identifier(String), String(String), @@ -39,6 +41,7 @@ enum Token { Eq, } +/// Parse a `SELECT-FROM-WHERE` query in the current SQL subset. pub fn parse_select(input: &str) -> Result { let tokens = tokenize(input)?; let mut parser = Parser::new(tokens); @@ -82,13 +85,21 @@ impl Parser { let mut items = Vec::new(); loop { - let item = match self.next().ok_or(ParseError::UnexpectedEnd)? { - Token::Star => SelectItem::Wildcard, - Token::Identifier(name) => SelectItem::Expr { - expr: Expr::Identifier(name), - alias: None, - }, - other => return Err(ParseError::UnexpectedToken(render_token(&other))), + let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? { + Token::Star => { + self.index += 1; + SelectItem::Wildcard + } + _ => { + let expr = self.parse_operand()?; + let alias = if self.peek() == Some(&Token::As) { + self.index += 1; + Some(self.expect_identifier()?) + } else { + None + }; + SelectItem::Expr { expr, alias } + } }; items.push(item); @@ -185,6 +196,7 @@ fn tokenize(input: &str) -> Result, ParseError> { "SELECT" => Token::Select, "FROM" => Token::From, "WHERE" => Token::Where, + "AS" => Token::As, "NULL" => Token::Null, _ => Token::Identifier(ident), }; @@ -251,6 +263,7 @@ fn render_token(token: &Token) -> String { Token::Select => "SELECT".to_string(), Token::From => "FROM".to_string(), Token::Where => "WHERE".to_string(), + Token::As => "AS".to_string(), Token::Null => "NULL".to_string(), Token::Identifier(name) => name.clone(), Token::String(value) => format!("'{}'", value), @@ -272,4 +285,33 @@ mod tests { assert_eq!(select.projection.len(), 1); assert!(select.selection.is_some()); } + + #[test] + fn parses_projection_aliases_and_literals() { + let select = + parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap(); + + assert_eq!(select.projection.len(), 3); + assert_eq!( + select.projection[0], + SelectItem::Expr { + expr: Expr::Identifier("c0".to_string()), + alias: Some("parent_name".to_string()), + } + ); + assert_eq!( + select.projection[1], + SelectItem::Expr { + expr: Expr::Literal(Literal::String("seed".to_string())), + alias: Some("label".to_string()), + } + ); + assert_eq!( + select.projection[2], + SelectItem::Expr { + expr: Expr::Literal(Literal::Null), + alias: None, + } + ); + } } diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs index 92e798b..cabbe29 100644 --- a/tests/sql_pipeline_tests.rs +++ b/tests/sql_pipeline_tests.rs @@ -65,3 +65,21 @@ fn select_where_filters_rows() { assert_eq!(result.rows().len(), 1); assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice"); } + +#[test] +fn select_alias_and_literal_projection_shape_output() { + let instance = parent_instance(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = + parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.schema().fields()[0].name(), "parent_name"); + assert_eq!(result.schema().fields()[1].name(), "label"); + assert_eq!(result.schema().fields()[2].name(), "expr3"); + assert_eq!(result.rows().len(), 2); + assert_eq!(format!("{}", result.rows()[0].values()[1]), "seed"); + assert_eq!(format!("{}", result.rows()[0].values()[2]), "NULL"); +}