From 685804e60fdf3dc2901400f58058a9cc2a1371de Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Fri, 10 Apr 2026 09:56:18 +0200 Subject: [PATCH] Add SQL table aliases for self-joins --- README.md | 12 +++++- ROADMAP.md | 1 + src/frontend/language.rs | 27 ++++++++++--- src/frontend/session.rs | 20 ++++++++++ src/planner/sql.rs | 77 ++++++++++++++++++++++++++++--------- src/sql/ast.rs | 13 ++++++- src/sql/parser.rs | 55 +++++++++++++++++++++++--- tests/sql_pipeline_tests.rs | 48 +++++++++++++++++++++++ 8 files changed, 221 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 4ba261a..72612e5 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,9 @@ SELECT c0 AS parent_name, 'seed' AS label FROM Parent SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent +SELECT p.parent, q.child +FROM Parent AS p, Parent AS q +WHERE p.child = q.parent ``` In the REPL or script runner, use the `sql` command and end the statement with @@ -147,13 +150,20 @@ schema Ancestor(parent, child). sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent; ``` +For self-joins or shorter qualification, use table aliases: + +```text +schema Parent(parent, child). +sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q WHERE p.child = q.parent; +``` + Current limits: - default column names are positional such as `c0`, `c1` - stable names require explicit catalog registration or `schema ...` in the frontend - joins currently use comma-separated tables plus `WHERE` filtering - multi-table queries require qualified column names such as `Parent.child` -- no table aliases yet +- table aliases are supported via `FROM Parent AS p` - no aggregates - projection aliases only via `AS` diff --git a/ROADMAP.md b/ROADMAP.md index 3e9e53c..3ec28ba 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -30,6 +30,7 @@ This document tracks the current state and next steps for the repository. - [x] Logical-plan execution for the first SQL slice - [x] `SELECT-FROM-WHERE` support with positional or named columns - [x] Basic multi-table SQL joins via qualified-column filtering +- [x] Table aliases for self-joins and qualified references ### Near-Term Cleanup diff --git a/src/frontend/language.rs b/src/frontend/language.rs index e55e955..8aef025 100644 --- a/src/frontend/language.rs +++ b/src/frontend/language.rs @@ -399,7 +399,9 @@ mod tests { let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap(); match command { Command::Sql(select) => { - assert_eq!(select.from, vec!["Parent".to_string()]); + assert_eq!(select.from.len(), 1); + assert_eq!(select.from[0].name, "Parent"); + assert_eq!(select.from[0].alias, None); assert!(select.selection.is_some()); } other => panic!("unexpected command: {:?}", other), @@ -414,10 +416,25 @@ mod tests { .unwrap(); match command { Command::Sql(select) => { - assert_eq!( - select.from, - vec!["Parent".to_string(), "Ancestor".to_string()] - ); + assert_eq!(select.from.len(), 2); + assert_eq!(select.from[0].name, "Parent"); + assert_eq!(select.from[1].name, "Ancestor"); + } + other => panic!("unexpected command: {:?}", other), + } + } + + #[test] + fn parse_sql_join_command_with_aliases() { + let command = parse_command( + "sql SELECT p.parent FROM Parent AS p, Parent AS q WHERE p.child = q.parent;", + ) + .unwrap(); + match command { + Command::Sql(select) => { + assert_eq!(select.from.len(), 2); + assert_eq!(select.from[0].alias.as_deref(), Some("p")); + assert_eq!(select.from[1].alias.as_deref(), Some("q")); } other => panic!("unexpected command: {:?}", other), } diff --git a/src/frontend/session.rs b/src/frontend/session.rs index fb8d83a..4048b83 100644 --- a/src/frontend/session.rs +++ b/src/frontend/session.rs @@ -423,4 +423,24 @@ mod tests { assert!(output.contains("alice | carol")); assert!(output.contains("bob | dave")); } + + #[test] + fn session_runs_sql_self_join_with_aliases() { + let mut session = Session::new(); + let output = session + .execute_script( + "fact Parent(alice, bob).\n\ + fact Parent(bob, carol).\n\ + fact Parent(carol, dave).\n\ + schema Parent(parent, child).\n\ + sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q \ + WHERE p.child = q.parent;", + ) + .unwrap(); + + assert!(output.contains("2 row(s)")); + assert!(output.contains("p.parent | q.child")); + assert!(output.contains("alice | carol")); + assert!(output.contains("bob | dave")); + } } diff --git a/src/planner/sql.rs b/src/planner/sql.rs index 45290f6..3f514ce 100644 --- a/src/planner/sql.rs +++ b/src/planner/sql.rs @@ -5,7 +5,7 @@ use std::fmt; use crate::catalog::{CatalogError, PredicateCatalog}; use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; use crate::relational::{DataType, Field, Schema, Value}; -use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; +use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; /// Errors returned when translating SQL AST into a logical plan. #[derive(Debug)] @@ -14,8 +14,8 @@ pub enum PlannerError { Catalog(CatalogError), /// A referenced column does not exist in the input schema. UnknownColumn(String), - /// The same table name appears more than once without alias support. - DuplicateTable(String), + /// A table or alias name appears more than once in one query. + DuplicateSourceName(String), } impl fmt::Display for PlannerError { @@ -23,12 +23,8 @@ impl fmt::Display for PlannerError { match self { Self::Catalog(err) => write!(f, "catalog error: {}", err), Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column), - Self::DuplicateTable(table) => { - write!( - f, - "table `{}` appears more than once; aliases are not supported", - table - ) + Self::DuplicateSourceName(name) => { + write!(f, "source name `{}` appears more than once", name) } } } @@ -38,7 +34,7 @@ impl Error for PlannerError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Catalog(err) => Some(err), - Self::UnknownColumn(_) | Self::DuplicateTable(_) => None, + Self::UnknownColumn(_) | Self::DuplicateSourceName(_) => None, } } } @@ -100,7 +96,7 @@ fn is_wildcard_projection(items: &[SelectItem]) -> bool { } fn plan_from_tables( - tables: &[String], + tables: &[TableRef], catalog: &PredicateCatalog, ) -> Result<(LogicalPlan, Schema), PlannerError> { let mut seen = HashSet::new(); @@ -109,23 +105,28 @@ fn plan_from_tables( PlannerError::Catalog(CatalogError::UnknownTable("".to_string())) })?; - seen.insert(first.clone()); + let first_name = source_name(first); + if !seen.insert(first_name.clone()) { + return Err(PlannerError::DuplicateSourceName(first_name)); + } + let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?; let mut plan = LogicalPlan::Scan { - table: first.clone(), + table: first.name.clone(), schema: first_schema.clone(), }; let mut combined_schema = first_schema; for table in table_iter { - if !seen.insert(table.clone()) { - return Err(PlannerError::DuplicateTable(table.clone())); + let qualified_name = source_name(table); + if !seen.insert(qualified_name.clone()) { + return Err(PlannerError::DuplicateSourceName(qualified_name)); } let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?; let join_schema = combine_schemas(&combined_schema, &right_schema); let right_plan = LogicalPlan::Scan { - table: table.clone(), + table: table.name.clone(), schema: right_schema.clone(), }; plan = LogicalPlan::CrossJoin { @@ -187,21 +188,23 @@ fn default_projection_name(expr: &Expr, ordinal: usize) -> String { } fn input_schema_for_table( - table: &str, + table: &TableRef, catalog: &PredicateCatalog, qualify_columns: bool, ) -> Result { - let schema = catalog.schema_for(table)?.clone(); + let schema = catalog.schema_for(&table.name)?.clone(); if !qualify_columns { return Ok(schema); } + let qualifier = source_name(table); + let fields = schema .fields() .iter() .map(|field| { Field::new( - format!("{}.{}", table, field.name()), + format!("{}.{}", qualifier, field.name()), field.data_type().clone(), field.nullable(), ) @@ -210,6 +213,10 @@ fn input_schema_for_table( Ok(Schema::new(fields)) } +fn source_name(table: &TableRef) -> String { + table.alias.clone().unwrap_or_else(|| table.name.clone()) +} + fn combine_schemas(left: &Schema, right: &Schema) -> Schema { let mut fields = left.fields().to_vec(); fields.extend_from_slice(right.fields()); @@ -293,4 +300,36 @@ mod tests { assert_eq!(schema.fields()[0].name(), "Parent.parent"); assert_eq!(schema.fields()[1].name(), "Ancestor.child"); } + + #[test] + fn plans_self_join_with_table_aliases() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + let mut catalog = PredicateCatalog::from_instance(&instance).unwrap(); + catalog + .rename_columns("Parent", ["parent", "child"]) + .unwrap(); + + let select = parse_select( + "SELECT p.parent, q.child FROM Parent AS p, Parent AS q \ + WHERE p.child = q.parent", + ) + .unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let schema = plan.output_schema(); + assert_eq!(schema.len(), 2); + assert_eq!(schema.fields()[0].name(), "p.parent"); + assert_eq!(schema.fields()[1].name(), "q.child"); + } } diff --git a/src/sql/ast.rs b/src/sql/ast.rs index 5003b2f..57c7684 100644 --- a/src/sql/ast.rs +++ b/src/sql/ast.rs @@ -3,12 +3,21 @@ pub struct Select { /// Output expressions requested by the query. pub projection: Vec, - /// Source table names. - pub from: Vec, + /// Source tables and their optional aliases. + pub from: Vec, /// Optional filter predicate. pub selection: Option, } +/// One source entry in a `FROM` list. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TableRef { + /// The predicate-backed table name. + pub name: String, + /// Optional table alias used for qualification. + pub alias: Option, +} + /// One item in a `SELECT` projection list. #[derive(Debug, Clone, PartialEq, Eq)] pub enum SelectItem { diff --git a/src/sql/parser.rs b/src/sql/parser.rs index 2db7f50..185388b 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -1,7 +1,7 @@ use std::error::Error; use std::fmt; -use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; +use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; /// Errors returned by the minimal SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] @@ -113,11 +113,18 @@ impl Parser { Ok(items) } - fn parse_from_list(&mut self) -> Result, ParseError> { + fn parse_from_list(&mut self) -> Result, ParseError> { let mut tables = Vec::new(); loop { - tables.push(self.expect_identifier()?); + let name = self.expect_identifier()?; + let alias = if self.peek() == Some(&Token::As) { + self.index += 1; + Some(self.expect_identifier()?) + } else { + None + }; + tables.push(TableRef { name, alias }); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; @@ -296,7 +303,13 @@ mod tests { fn parses_select_with_filter() { let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap(); - assert_eq!(select.from, vec!["Parent".to_string()]); + assert_eq!( + select.from, + vec![TableRef { + name: "Parent".to_string(), + alias: None, + }] + ); assert_eq!(select.projection.len(), 1); assert!(select.selection.is_some()); } @@ -340,7 +353,16 @@ mod tests { assert_eq!( select.from, - vec!["Parent".to_string(), "Ancestor".to_string()] + vec![ + TableRef { + name: "Parent".to_string(), + alias: None, + }, + TableRef { + name: "Ancestor".to_string(), + alias: None, + } + ] ); assert_eq!( select.projection[0], @@ -350,4 +372,27 @@ mod tests { } ); } + + #[test] + fn parses_table_aliases() { + let select = parse_select( + "SELECT p.parent, a.child FROM Parent AS p, Ancestor AS a \ + WHERE p.child = a.parent", + ) + .unwrap(); + + assert_eq!( + select.from, + vec![ + TableRef { + name: "Parent".to_string(), + alias: Some("p".to_string()), + }, + TableRef { + name: "Ancestor".to_string(), + alias: Some("a".to_string()), + } + ] + ); + } } diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs index eb8230e..0f5a106 100644 --- a/tests/sql_pipeline_tests.rs +++ b/tests/sql_pipeline_tests.rs @@ -156,3 +156,51 @@ fn select_join_filters_cross_product_by_qualified_columns() { vec!["alice -> carol".to_string(), "bob -> dave".to_string()] ); } + +#[test] +fn select_self_join_uses_table_aliases() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + Atom::new( + "Parent", + vec![Term::constant("carol"), Term::constant("dave")], + ), + ] + .into_iter() + .collect(); + + let mut catalog = PredicateCatalog::from_instance(&instance).unwrap(); + catalog + .rename_columns("Parent", ["parent", "child"]) + .unwrap(); + + let select = parse_select( + "SELECT p.parent, q.child FROM Parent AS p, Parent AS q \ + WHERE p.child = q.parent", + ) + .unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.schema().fields()[0].name(), "p.parent"); + assert_eq!(result.schema().fields()[1].name(), "q.child"); + + let mut rows = result + .rows() + .iter() + .map(|row| format!("{} -> {}", row.values()[0], row.values()[1])) + .collect::>(); + rows.sort(); + assert_eq!( + rows, + vec!["alice -> carol".to_string(), "bob -> dave".to_string()] + ); +}