Add SQL table aliases for self-joins

This commit is contained in:
Hassan Abedi 2026-04-10 09:56:18 +02:00
parent 4535d4a918
commit 685804e60f
8 changed files with 221 additions and 32 deletions

View File

@ -122,6 +122,9 @@ SELECT c0 AS parent_name, 'seed' AS label FROM Parent
SELECT Parent.parent, Ancestor.child SELECT Parent.parent, Ancestor.child
FROM Parent, Ancestor FROM Parent, Ancestor
WHERE Parent.child = Ancestor.parent WHERE Parent.child = Ancestor.parent
SELECT p.parent, q.child
FROM Parent AS p, Parent AS q
WHERE p.child = q.parent
``` ```
In the REPL or script runner, use the `sql` command and end the statement with In the REPL or script runner, use the `sql` command and end the statement with
@ -147,13 +150,20 @@ schema Ancestor(parent, child).
sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent; sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent;
``` ```
For self-joins or shorter qualification, use table aliases:
```text
schema Parent(parent, child).
sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q WHERE p.child = q.parent;
```
Current limits: Current limits:
- default column names are positional such as `c0`, `c1` - default column names are positional such as `c0`, `c1`
- stable names require explicit catalog registration or `schema ...` in the frontend - stable names require explicit catalog registration or `schema ...` in the frontend
- joins currently use comma-separated tables plus `WHERE` filtering - joins currently use comma-separated tables plus `WHERE` filtering
- multi-table queries require qualified column names such as `Parent.child` - multi-table queries require qualified column names such as `Parent.child`
- no table aliases yet - table aliases are supported via `FROM Parent AS p`
- no aggregates - no aggregates
- projection aliases only via `AS` - projection aliases only via `AS`

View File

@ -30,6 +30,7 @@ This document tracks the current state and next steps for the repository.
- [x] Logical-plan execution for the first SQL slice - [x] Logical-plan execution for the first SQL slice
- [x] `SELECT-FROM-WHERE` support with positional or named columns - [x] `SELECT-FROM-WHERE` support with positional or named columns
- [x] Basic multi-table SQL joins via qualified-column filtering - [x] Basic multi-table SQL joins via qualified-column filtering
- [x] Table aliases for self-joins and qualified references
### Near-Term Cleanup ### Near-Term Cleanup

View File

@ -399,7 +399,9 @@ mod tests {
let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap(); let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap();
match command { match command {
Command::Sql(select) => { Command::Sql(select) => {
assert_eq!(select.from, vec!["Parent".to_string()]); assert_eq!(select.from.len(), 1);
assert_eq!(select.from[0].name, "Parent");
assert_eq!(select.from[0].alias, None);
assert!(select.selection.is_some()); assert!(select.selection.is_some());
} }
other => panic!("unexpected command: {:?}", other), other => panic!("unexpected command: {:?}", other),
@ -414,10 +416,25 @@ mod tests {
.unwrap(); .unwrap();
match command { match command {
Command::Sql(select) => { Command::Sql(select) => {
assert_eq!( assert_eq!(select.from.len(), 2);
select.from, assert_eq!(select.from[0].name, "Parent");
vec!["Parent".to_string(), "Ancestor".to_string()] assert_eq!(select.from[1].name, "Ancestor");
); }
other => panic!("unexpected command: {:?}", other),
}
}
#[test]
fn parse_sql_join_command_with_aliases() {
let command = parse_command(
"sql SELECT p.parent FROM Parent AS p, Parent AS q WHERE p.child = q.parent;",
)
.unwrap();
match command {
Command::Sql(select) => {
assert_eq!(select.from.len(), 2);
assert_eq!(select.from[0].alias.as_deref(), Some("p"));
assert_eq!(select.from[1].alias.as_deref(), Some("q"));
} }
other => panic!("unexpected command: {:?}", other), other => panic!("unexpected command: {:?}", other),
} }

View File

@ -423,4 +423,24 @@ mod tests {
assert!(output.contains("alice | carol")); assert!(output.contains("alice | carol"));
assert!(output.contains("bob | dave")); assert!(output.contains("bob | dave"));
} }
#[test]
fn session_runs_sql_self_join_with_aliases() {
let mut session = Session::new();
let output = session
.execute_script(
"fact Parent(alice, bob).\n\
fact Parent(bob, carol).\n\
fact Parent(carol, dave).\n\
schema Parent(parent, child).\n\
sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent;",
)
.unwrap();
assert!(output.contains("2 row(s)"));
assert!(output.contains("p.parent | q.child"));
assert!(output.contains("alice | carol"));
assert!(output.contains("bob | dave"));
}
} }

View File

@ -5,7 +5,7 @@ use std::fmt;
use crate::catalog::{CatalogError, PredicateCatalog}; use crate::catalog::{CatalogError, PredicateCatalog};
use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr};
use crate::relational::{DataType, Field, Schema, Value}; use crate::relational::{DataType, Field, Schema, Value};
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef};
/// Errors returned when translating SQL AST into a logical plan. /// Errors returned when translating SQL AST into a logical plan.
#[derive(Debug)] #[derive(Debug)]
@ -14,8 +14,8 @@ pub enum PlannerError {
Catalog(CatalogError), Catalog(CatalogError),
/// A referenced column does not exist in the input schema. /// A referenced column does not exist in the input schema.
UnknownColumn(String), UnknownColumn(String),
/// The same table name appears more than once without alias support. /// A table or alias name appears more than once in one query.
DuplicateTable(String), DuplicateSourceName(String),
} }
impl fmt::Display for PlannerError { impl fmt::Display for PlannerError {
@ -23,12 +23,8 @@ impl fmt::Display for PlannerError {
match self { match self {
Self::Catalog(err) => write!(f, "catalog error: {}", err), Self::Catalog(err) => write!(f, "catalog error: {}", err),
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column), Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
Self::DuplicateTable(table) => { Self::DuplicateSourceName(name) => {
write!( write!(f, "source name `{}` appears more than once", name)
f,
"table `{}` appears more than once; aliases are not supported",
table
)
} }
} }
} }
@ -38,7 +34,7 @@ impl Error for PlannerError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::Catalog(err) => Some(err), Self::Catalog(err) => Some(err),
Self::UnknownColumn(_) | Self::DuplicateTable(_) => None, Self::UnknownColumn(_) | Self::DuplicateSourceName(_) => None,
} }
} }
} }
@ -100,7 +96,7 @@ fn is_wildcard_projection(items: &[SelectItem]) -> bool {
} }
fn plan_from_tables( fn plan_from_tables(
tables: &[String], tables: &[TableRef],
catalog: &PredicateCatalog, catalog: &PredicateCatalog,
) -> Result<(LogicalPlan, Schema), PlannerError> { ) -> Result<(LogicalPlan, Schema), PlannerError> {
let mut seen = HashSet::new(); let mut seen = HashSet::new();
@ -109,23 +105,28 @@ fn plan_from_tables(
PlannerError::Catalog(CatalogError::UnknownTable("<missing>".to_string())) PlannerError::Catalog(CatalogError::UnknownTable("<missing>".to_string()))
})?; })?;
seen.insert(first.clone()); let first_name = source_name(first);
if !seen.insert(first_name.clone()) {
return Err(PlannerError::DuplicateSourceName(first_name));
}
let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?; let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?;
let mut plan = LogicalPlan::Scan { let mut plan = LogicalPlan::Scan {
table: first.clone(), table: first.name.clone(),
schema: first_schema.clone(), schema: first_schema.clone(),
}; };
let mut combined_schema = first_schema; let mut combined_schema = first_schema;
for table in table_iter { for table in table_iter {
if !seen.insert(table.clone()) { let qualified_name = source_name(table);
return Err(PlannerError::DuplicateTable(table.clone())); if !seen.insert(qualified_name.clone()) {
return Err(PlannerError::DuplicateSourceName(qualified_name));
} }
let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?; let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?;
let join_schema = combine_schemas(&combined_schema, &right_schema); let join_schema = combine_schemas(&combined_schema, &right_schema);
let right_plan = LogicalPlan::Scan { let right_plan = LogicalPlan::Scan {
table: table.clone(), table: table.name.clone(),
schema: right_schema.clone(), schema: right_schema.clone(),
}; };
plan = LogicalPlan::CrossJoin { plan = LogicalPlan::CrossJoin {
@ -187,21 +188,23 @@ fn default_projection_name(expr: &Expr, ordinal: usize) -> String {
} }
fn input_schema_for_table( fn input_schema_for_table(
table: &str, table: &TableRef,
catalog: &PredicateCatalog, catalog: &PredicateCatalog,
qualify_columns: bool, qualify_columns: bool,
) -> Result<Schema, PlannerError> { ) -> Result<Schema, PlannerError> {
let schema = catalog.schema_for(table)?.clone(); let schema = catalog.schema_for(&table.name)?.clone();
if !qualify_columns { if !qualify_columns {
return Ok(schema); return Ok(schema);
} }
let qualifier = source_name(table);
let fields = schema let fields = schema
.fields() .fields()
.iter() .iter()
.map(|field| { .map(|field| {
Field::new( Field::new(
format!("{}.{}", table, field.name()), format!("{}.{}", qualifier, field.name()),
field.data_type().clone(), field.data_type().clone(),
field.nullable(), field.nullable(),
) )
@ -210,6 +213,10 @@ fn input_schema_for_table(
Ok(Schema::new(fields)) Ok(Schema::new(fields))
} }
fn source_name(table: &TableRef) -> String {
table.alias.clone().unwrap_or_else(|| table.name.clone())
}
fn combine_schemas(left: &Schema, right: &Schema) -> Schema { fn combine_schemas(left: &Schema, right: &Schema) -> Schema {
let mut fields = left.fields().to_vec(); let mut fields = left.fields().to_vec();
fields.extend_from_slice(right.fields()); fields.extend_from_slice(right.fields());
@ -293,4 +300,36 @@ mod tests {
assert_eq!(schema.fields()[0].name(), "Parent.parent"); assert_eq!(schema.fields()[0].name(), "Parent.parent");
assert_eq!(schema.fields()[1].name(), "Ancestor.child"); assert_eq!(schema.fields()[1].name(), "Ancestor.child");
} }
#[test]
fn plans_self_join_with_table_aliases() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
catalog
.rename_columns("Parent", ["parent", "child"])
.unwrap();
let select = parse_select(
"SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent",
)
.unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let schema = plan.output_schema();
assert_eq!(schema.len(), 2);
assert_eq!(schema.fields()[0].name(), "p.parent");
assert_eq!(schema.fields()[1].name(), "q.child");
}
} }

View File

@ -3,12 +3,21 @@
pub struct Select { pub struct Select {
/// Output expressions requested by the query. /// Output expressions requested by the query.
pub projection: Vec<SelectItem>, pub projection: Vec<SelectItem>,
/// Source table names. /// Source tables and their optional aliases.
pub from: Vec<String>, pub from: Vec<TableRef>,
/// Optional filter predicate. /// Optional filter predicate.
pub selection: Option<Expr>, pub selection: Option<Expr>,
} }
/// One source entry in a `FROM` list.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableRef {
/// The predicate-backed table name.
pub name: String,
/// Optional table alias used for qualification.
pub alias: Option<String>,
}
/// One item in a `SELECT` projection list. /// One item in a `SELECT` projection list.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum SelectItem { pub enum SelectItem {

View File

@ -1,7 +1,7 @@
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef};
/// Errors returned by the minimal SQL parser. /// Errors returned by the minimal SQL parser.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -113,11 +113,18 @@ impl Parser {
Ok(items) Ok(items)
} }
fn parse_from_list(&mut self) -> Result<Vec<String>, ParseError> { fn parse_from_list(&mut self) -> Result<Vec<TableRef>, ParseError> {
let mut tables = Vec::new(); let mut tables = Vec::new();
loop { loop {
tables.push(self.expect_identifier()?); let name = self.expect_identifier()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
tables.push(TableRef { name, alias });
if self.peek() == Some(&Token::Comma) { if self.peek() == Some(&Token::Comma) {
self.index += 1; self.index += 1;
continue; continue;
@ -296,7 +303,13 @@ mod tests {
fn parses_select_with_filter() { fn parses_select_with_filter() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap(); let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
assert_eq!(select.from, vec!["Parent".to_string()]); assert_eq!(
select.from,
vec![TableRef {
name: "Parent".to_string(),
alias: None,
}]
);
assert_eq!(select.projection.len(), 1); assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some()); assert!(select.selection.is_some());
} }
@ -340,7 +353,16 @@ mod tests {
assert_eq!( assert_eq!(
select.from, select.from,
vec!["Parent".to_string(), "Ancestor".to_string()] vec![
TableRef {
name: "Parent".to_string(),
alias: None,
},
TableRef {
name: "Ancestor".to_string(),
alias: None,
}
]
); );
assert_eq!( assert_eq!(
select.projection[0], select.projection[0],
@ -350,4 +372,27 @@ mod tests {
} }
); );
} }
#[test]
fn parses_table_aliases() {
let select = parse_select(
"SELECT p.parent, a.child FROM Parent AS p, Ancestor AS a \
WHERE p.child = a.parent",
)
.unwrap();
assert_eq!(
select.from,
vec![
TableRef {
name: "Parent".to_string(),
alias: Some("p".to_string()),
},
TableRef {
name: "Ancestor".to_string(),
alias: Some("a".to_string()),
}
]
);
}
} }

View File

@ -156,3 +156,51 @@ fn select_join_filters_cross_product_by_qualified_columns() {
vec!["alice -> carol".to_string(), "bob -> dave".to_string()] vec!["alice -> carol".to_string(), "bob -> dave".to_string()]
); );
} }
#[test]
fn select_self_join_uses_table_aliases() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
Atom::new(
"Parent",
vec![Term::constant("carol"), Term::constant("dave")],
),
]
.into_iter()
.collect();
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
catalog
.rename_columns("Parent", ["parent", "child"])
.unwrap();
let select = parse_select(
"SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent",
)
.unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.schema().fields()[0].name(), "p.parent");
assert_eq!(result.schema().fields()[1].name(), "q.child");
let mut rows = result
.rows()
.iter()
.map(|row| format!("{} -> {}", row.values()[0], row.values()[1]))
.collect::<Vec<_>>();
rows.sort();
assert_eq!(
rows,
vec!["alice -> carol".to_string(), "bob -> dave".to_string()]
);
}