Add SQL table aliases for self-joins

This commit is contained in:
Hassan Abedi 2026-04-10 09:56:18 +02:00
parent 4535d4a918
commit 685804e60f
8 changed files with 221 additions and 32 deletions

View File

@ -122,6 +122,9 @@ SELECT c0 AS parent_name, 'seed' AS label FROM Parent
SELECT Parent.parent, Ancestor.child
FROM Parent, Ancestor
WHERE Parent.child = Ancestor.parent
SELECT p.parent, q.child
FROM Parent AS p, Parent AS q
WHERE p.child = q.parent
```
In the REPL or script runner, use the `sql` command and end the statement with
@ -147,13 +150,20 @@ schema Ancestor(parent, child).
sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent;
```
For self-joins or shorter qualification, use table aliases:
```text
schema Parent(parent, child).
sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q WHERE p.child = q.parent;
```
Current limits:
- default column names are positional such as `c0`, `c1`
- stable names require explicit catalog registration or `schema ...` in the frontend
- joins currently use comma-separated tables plus `WHERE` filtering
- multi-table queries require qualified column names such as `Parent.child`
- no table aliases yet
- table aliases are supported via `FROM Parent AS p`
- no aggregates
- projection aliases only via `AS`

View File

@ -30,6 +30,7 @@ This document tracks the current state and next steps for the repository.
- [x] Logical-plan execution for the first SQL slice
- [x] `SELECT-FROM-WHERE` support with positional or named columns
- [x] Basic multi-table SQL joins via qualified-column filtering
- [x] Table aliases for self-joins and qualified references
### Near-Term Cleanup

View File

@ -399,7 +399,9 @@ mod tests {
let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap();
match command {
Command::Sql(select) => {
assert_eq!(select.from, vec!["Parent".to_string()]);
assert_eq!(select.from.len(), 1);
assert_eq!(select.from[0].name, "Parent");
assert_eq!(select.from[0].alias, None);
assert!(select.selection.is_some());
}
other => panic!("unexpected command: {:?}", other),
@ -414,10 +416,25 @@ mod tests {
.unwrap();
match command {
Command::Sql(select) => {
assert_eq!(
select.from,
vec!["Parent".to_string(), "Ancestor".to_string()]
);
assert_eq!(select.from.len(), 2);
assert_eq!(select.from[0].name, "Parent");
assert_eq!(select.from[1].name, "Ancestor");
}
other => panic!("unexpected command: {:?}", other),
}
}
#[test]
fn parse_sql_join_command_with_aliases() {
let command = parse_command(
"sql SELECT p.parent FROM Parent AS p, Parent AS q WHERE p.child = q.parent;",
)
.unwrap();
match command {
Command::Sql(select) => {
assert_eq!(select.from.len(), 2);
assert_eq!(select.from[0].alias.as_deref(), Some("p"));
assert_eq!(select.from[1].alias.as_deref(), Some("q"));
}
other => panic!("unexpected command: {:?}", other),
}

View File

@ -423,4 +423,24 @@ mod tests {
assert!(output.contains("alice | carol"));
assert!(output.contains("bob | dave"));
}
#[test]
fn session_runs_sql_self_join_with_aliases() {
let mut session = Session::new();
let output = session
.execute_script(
"fact Parent(alice, bob).\n\
fact Parent(bob, carol).\n\
fact Parent(carol, dave).\n\
schema Parent(parent, child).\n\
sql SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent;",
)
.unwrap();
assert!(output.contains("2 row(s)"));
assert!(output.contains("p.parent | q.child"));
assert!(output.contains("alice | carol"));
assert!(output.contains("bob | dave"));
}
}

View File

@ -5,7 +5,7 @@ use std::fmt;
use crate::catalog::{CatalogError, PredicateCatalog};
use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr};
use crate::relational::{DataType, Field, Schema, Value};
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef};
/// Errors returned when translating SQL AST into a logical plan.
#[derive(Debug)]
@ -14,8 +14,8 @@ pub enum PlannerError {
Catalog(CatalogError),
/// A referenced column does not exist in the input schema.
UnknownColumn(String),
/// The same table name appears more than once without alias support.
DuplicateTable(String),
/// A table or alias name appears more than once in one query.
DuplicateSourceName(String),
}
impl fmt::Display for PlannerError {
@ -23,12 +23,8 @@ impl fmt::Display for PlannerError {
match self {
Self::Catalog(err) => write!(f, "catalog error: {}", err),
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
Self::DuplicateTable(table) => {
write!(
f,
"table `{}` appears more than once; aliases are not supported",
table
)
Self::DuplicateSourceName(name) => {
write!(f, "source name `{}` appears more than once", name)
}
}
}
@ -38,7 +34,7 @@ impl Error for PlannerError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Catalog(err) => Some(err),
Self::UnknownColumn(_) | Self::DuplicateTable(_) => None,
Self::UnknownColumn(_) | Self::DuplicateSourceName(_) => None,
}
}
}
@ -100,7 +96,7 @@ fn is_wildcard_projection(items: &[SelectItem]) -> bool {
}
fn plan_from_tables(
tables: &[String],
tables: &[TableRef],
catalog: &PredicateCatalog,
) -> Result<(LogicalPlan, Schema), PlannerError> {
let mut seen = HashSet::new();
@ -109,23 +105,28 @@ fn plan_from_tables(
PlannerError::Catalog(CatalogError::UnknownTable("<missing>".to_string()))
})?;
seen.insert(first.clone());
let first_name = source_name(first);
if !seen.insert(first_name.clone()) {
return Err(PlannerError::DuplicateSourceName(first_name));
}
let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?;
let mut plan = LogicalPlan::Scan {
table: first.clone(),
table: first.name.clone(),
schema: first_schema.clone(),
};
let mut combined_schema = first_schema;
for table in table_iter {
if !seen.insert(table.clone()) {
return Err(PlannerError::DuplicateTable(table.clone()));
let qualified_name = source_name(table);
if !seen.insert(qualified_name.clone()) {
return Err(PlannerError::DuplicateSourceName(qualified_name));
}
let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?;
let join_schema = combine_schemas(&combined_schema, &right_schema);
let right_plan = LogicalPlan::Scan {
table: table.clone(),
table: table.name.clone(),
schema: right_schema.clone(),
};
plan = LogicalPlan::CrossJoin {
@ -187,21 +188,23 @@ fn default_projection_name(expr: &Expr, ordinal: usize) -> String {
}
fn input_schema_for_table(
table: &str,
table: &TableRef,
catalog: &PredicateCatalog,
qualify_columns: bool,
) -> Result<Schema, PlannerError> {
let schema = catalog.schema_for(table)?.clone();
let schema = catalog.schema_for(&table.name)?.clone();
if !qualify_columns {
return Ok(schema);
}
let qualifier = source_name(table);
let fields = schema
.fields()
.iter()
.map(|field| {
Field::new(
format!("{}.{}", table, field.name()),
format!("{}.{}", qualifier, field.name()),
field.data_type().clone(),
field.nullable(),
)
@ -210,6 +213,10 @@ fn input_schema_for_table(
Ok(Schema::new(fields))
}
fn source_name(table: &TableRef) -> String {
table.alias.clone().unwrap_or_else(|| table.name.clone())
}
fn combine_schemas(left: &Schema, right: &Schema) -> Schema {
let mut fields = left.fields().to_vec();
fields.extend_from_slice(right.fields());
@ -293,4 +300,36 @@ mod tests {
assert_eq!(schema.fields()[0].name(), "Parent.parent");
assert_eq!(schema.fields()[1].name(), "Ancestor.child");
}
#[test]
fn plans_self_join_with_table_aliases() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
catalog
.rename_columns("Parent", ["parent", "child"])
.unwrap();
let select = parse_select(
"SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent",
)
.unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let schema = plan.output_schema();
assert_eq!(schema.len(), 2);
assert_eq!(schema.fields()[0].name(), "p.parent");
assert_eq!(schema.fields()[1].name(), "q.child");
}
}

View File

@ -3,12 +3,21 @@
pub struct Select {
/// Output expressions requested by the query.
pub projection: Vec<SelectItem>,
/// Source table names.
pub from: Vec<String>,
/// Source tables and their optional aliases.
pub from: Vec<TableRef>,
/// Optional filter predicate.
pub selection: Option<Expr>,
}
/// One source entry in a `FROM` list.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableRef {
/// The predicate-backed table name.
pub name: String,
/// Optional table alias used for qualification.
pub alias: Option<String>,
}
/// One item in a `SELECT` projection list.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SelectItem {

View File

@ -1,7 +1,7 @@
use std::error::Error;
use std::fmt;
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef};
/// Errors returned by the minimal SQL parser.
#[derive(Debug, Clone, PartialEq, Eq)]
@ -113,11 +113,18 @@ impl Parser {
Ok(items)
}
fn parse_from_list(&mut self) -> Result<Vec<String>, ParseError> {
fn parse_from_list(&mut self) -> Result<Vec<TableRef>, ParseError> {
let mut tables = Vec::new();
loop {
tables.push(self.expect_identifier()?);
let name = self.expect_identifier()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
tables.push(TableRef { name, alias });
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
@ -296,7 +303,13 @@ mod tests {
fn parses_select_with_filter() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
assert_eq!(select.from, vec!["Parent".to_string()]);
assert_eq!(
select.from,
vec![TableRef {
name: "Parent".to_string(),
alias: None,
}]
);
assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some());
}
@ -340,7 +353,16 @@ mod tests {
assert_eq!(
select.from,
vec!["Parent".to_string(), "Ancestor".to_string()]
vec![
TableRef {
name: "Parent".to_string(),
alias: None,
},
TableRef {
name: "Ancestor".to_string(),
alias: None,
}
]
);
assert_eq!(
select.projection[0],
@ -350,4 +372,27 @@ mod tests {
}
);
}
#[test]
fn parses_table_aliases() {
let select = parse_select(
"SELECT p.parent, a.child FROM Parent AS p, Ancestor AS a \
WHERE p.child = a.parent",
)
.unwrap();
assert_eq!(
select.from,
vec![
TableRef {
name: "Parent".to_string(),
alias: Some("p".to_string()),
},
TableRef {
name: "Ancestor".to_string(),
alias: Some("a".to_string()),
}
]
);
}
}

View File

@ -156,3 +156,51 @@ fn select_join_filters_cross_product_by_qualified_columns() {
vec!["alice -> carol".to_string(), "bob -> dave".to_string()]
);
}
#[test]
fn select_self_join_uses_table_aliases() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
Atom::new(
"Parent",
vec![Term::constant("carol"), Term::constant("dave")],
),
]
.into_iter()
.collect();
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
catalog
.rename_columns("Parent", ["parent", "child"])
.unwrap();
let select = parse_select(
"SELECT p.parent, q.child FROM Parent AS p, Parent AS q \
WHERE p.child = q.parent",
)
.unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.schema().fields()[0].name(), "p.parent");
assert_eq!(result.schema().fields()[1].name(), "q.child");
let mut rows = result
.rows()
.iter()
.map(|row| format!("{} -> {}", row.values()[0], row.values()[1]))
.collect::<Vec<_>>();
rows.sort();
assert_eq!(
rows,
vec!["alice -> carol".to_string(), "bob -> dave".to_string()]
);
}