2026-04-10 09:51:01 +02:00
|
|
|
use std::collections::HashSet;
|
2026-04-09 12:38:43 +02:00
|
|
|
use std::error::Error;
|
|
|
|
|
use std::fmt;
|
|
|
|
|
|
|
|
|
|
use crate::catalog::{CatalogError, PredicateCatalog};
|
|
|
|
|
use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr};
|
2026-04-09 12:50:06 +02:00
|
|
|
use crate::relational::{DataType, Field, Schema, Value};
|
2026-04-09 12:38:43 +02:00
|
|
|
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
|
|
|
|
|
|
2026-04-09 12:50:06 +02:00
|
|
|
/// Errors returned when translating SQL AST into a logical plan.
|
2026-04-09 12:38:43 +02:00
|
|
|
#[derive(Debug)]
|
|
|
|
|
pub enum PlannerError {
|
2026-04-09 12:50:06 +02:00
|
|
|
/// Catalog lookup failed.
|
2026-04-09 12:38:43 +02:00
|
|
|
Catalog(CatalogError),
|
2026-04-09 12:50:06 +02:00
|
|
|
/// A referenced column does not exist in the input schema.
|
2026-04-09 12:38:43 +02:00
|
|
|
UnknownColumn(String),
|
2026-04-10 09:51:01 +02:00
|
|
|
/// The same table name appears more than once without alias support.
|
|
|
|
|
DuplicateTable(String),
|
2026-04-09 12:38:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl fmt::Display for PlannerError {
|
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
|
match self {
|
|
|
|
|
Self::Catalog(err) => write!(f, "catalog error: {}", err),
|
|
|
|
|
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
|
2026-04-10 09:51:01 +02:00
|
|
|
Self::DuplicateTable(table) => {
|
|
|
|
|
write!(
|
|
|
|
|
f,
|
|
|
|
|
"table `{}` appears more than once; aliases are not supported",
|
|
|
|
|
table
|
|
|
|
|
)
|
|
|
|
|
}
|
2026-04-09 12:38:43 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Error for PlannerError {
|
|
|
|
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
|
|
|
|
match self {
|
|
|
|
|
Self::Catalog(err) => Some(err),
|
2026-04-10 09:51:01 +02:00
|
|
|
Self::UnknownColumn(_) | Self::DuplicateTable(_) => None,
|
2026-04-09 12:38:43 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl From<CatalogError> for PlannerError {
|
|
|
|
|
fn from(value: CatalogError) -> Self {
|
|
|
|
|
Self::Catalog(value)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 12:50:06 +02:00
|
|
|
/// Plan a parsed `SELECT` statement into the current logical plan subset.
|
2026-04-09 12:38:43 +02:00
|
|
|
pub fn plan_select(
|
|
|
|
|
select: &Select,
|
|
|
|
|
catalog: &PredicateCatalog,
|
|
|
|
|
) -> Result<LogicalPlan, PlannerError> {
|
2026-04-10 09:51:01 +02:00
|
|
|
let (mut plan, input_schema) = plan_from_tables(&select.from, catalog)?;
|
2026-04-09 12:38:43 +02:00
|
|
|
|
|
|
|
|
if let Some(selection) = &select.selection {
|
2026-04-10 09:51:01 +02:00
|
|
|
let predicate = plan_expr(selection, &input_schema)?;
|
2026-04-09 12:38:43 +02:00
|
|
|
plan = LogicalPlan::Filter {
|
|
|
|
|
input: Box::new(plan),
|
|
|
|
|
predicate,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if is_wildcard_projection(&select.projection) {
|
|
|
|
|
return Ok(plan);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut expressions = Vec::new();
|
|
|
|
|
let mut fields = Vec::new();
|
2026-04-09 12:50:06 +02:00
|
|
|
for (index, item) in select.projection.iter().enumerate() {
|
2026-04-09 12:38:43 +02:00
|
|
|
match item {
|
2026-04-09 12:50:06 +02:00
|
|
|
SelectItem::Expr { expr, alias } => {
|
2026-04-10 09:51:01 +02:00
|
|
|
let planned_expr = plan_expr(expr, &input_schema)?;
|
2026-04-09 12:50:06 +02:00
|
|
|
let output_name = alias
|
|
|
|
|
.clone()
|
|
|
|
|
.unwrap_or_else(|| default_projection_name(expr, index + 1));
|
2026-04-10 09:51:01 +02:00
|
|
|
let (data_type, nullable) = projection_metadata(expr, &input_schema)?;
|
2026-04-09 12:50:06 +02:00
|
|
|
expressions.push(NamedExpr {
|
|
|
|
|
name: output_name.clone(),
|
|
|
|
|
expr: planned_expr,
|
|
|
|
|
});
|
|
|
|
|
fields.push(Field::new(output_name, data_type, nullable));
|
|
|
|
|
}
|
|
|
|
|
SelectItem::Wildcard => unreachable!("wildcard projections are handled earlier"),
|
2026-04-09 12:38:43 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(LogicalPlan::Project {
|
|
|
|
|
input: Box::new(plan),
|
|
|
|
|
expressions,
|
|
|
|
|
schema: Schema::new(fields),
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn is_wildcard_projection(items: &[SelectItem]) -> bool {
|
|
|
|
|
matches!(items, [SelectItem::Wildcard])
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 09:51:01 +02:00
|
|
|
fn plan_from_tables(
|
|
|
|
|
tables: &[String],
|
|
|
|
|
catalog: &PredicateCatalog,
|
|
|
|
|
) -> Result<(LogicalPlan, Schema), PlannerError> {
|
|
|
|
|
let mut seen = HashSet::new();
|
|
|
|
|
let mut table_iter = tables.iter();
|
|
|
|
|
let first = table_iter.next().ok_or_else(|| {
|
|
|
|
|
PlannerError::Catalog(CatalogError::UnknownTable("<missing>".to_string()))
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
seen.insert(first.clone());
|
|
|
|
|
let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?;
|
|
|
|
|
let mut plan = LogicalPlan::Scan {
|
|
|
|
|
table: first.clone(),
|
|
|
|
|
schema: first_schema.clone(),
|
|
|
|
|
};
|
|
|
|
|
let mut combined_schema = first_schema;
|
|
|
|
|
|
|
|
|
|
for table in table_iter {
|
|
|
|
|
if !seen.insert(table.clone()) {
|
|
|
|
|
return Err(PlannerError::DuplicateTable(table.clone()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?;
|
|
|
|
|
let join_schema = combine_schemas(&combined_schema, &right_schema);
|
|
|
|
|
let right_plan = LogicalPlan::Scan {
|
|
|
|
|
table: table.clone(),
|
|
|
|
|
schema: right_schema.clone(),
|
|
|
|
|
};
|
|
|
|
|
plan = LogicalPlan::CrossJoin {
|
|
|
|
|
left: Box::new(plan),
|
|
|
|
|
right: Box::new(right_plan),
|
|
|
|
|
schema: join_schema.clone(),
|
|
|
|
|
};
|
|
|
|
|
combined_schema = join_schema;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok((plan, combined_schema))
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 12:38:43 +02:00
|
|
|
fn plan_expr(expr: &Expr, schema: &Schema) -> Result<LogicalExpr, PlannerError> {
|
|
|
|
|
match expr {
|
|
|
|
|
Expr::Identifier(name) => {
|
|
|
|
|
if schema.index_of(name).is_none() {
|
|
|
|
|
return Err(PlannerError::UnknownColumn(name.clone()));
|
|
|
|
|
}
|
|
|
|
|
Ok(LogicalExpr::Column(name.clone()))
|
|
|
|
|
}
|
|
|
|
|
Expr::Literal(literal) => Ok(LogicalExpr::Literal(plan_literal(literal))),
|
|
|
|
|
Expr::Binary { left, op, right } => match op {
|
|
|
|
|
BinaryOp::Eq => Ok(LogicalExpr::Eq(
|
|
|
|
|
Box::new(plan_expr(left, schema)?),
|
|
|
|
|
Box::new(plan_expr(right, schema)?),
|
|
|
|
|
)),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn plan_literal(literal: &Literal) -> Value {
|
|
|
|
|
match literal {
|
|
|
|
|
Literal::String(value) => Value::text(value.clone()),
|
|
|
|
|
Literal::Null => Value::Null,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 12:50:06 +02:00
|
|
|
fn projection_metadata(expr: &Expr, schema: &Schema) -> Result<(DataType, bool), PlannerError> {
|
|
|
|
|
match expr {
|
|
|
|
|
Expr::Identifier(name) => {
|
|
|
|
|
let index = schema
|
|
|
|
|
.index_of(name)
|
|
|
|
|
.ok_or_else(|| PlannerError::UnknownColumn(name.clone()))?;
|
|
|
|
|
let field = &schema.fields()[index];
|
|
|
|
|
Ok((field.data_type().clone(), field.nullable()))
|
|
|
|
|
}
|
|
|
|
|
Expr::Literal(Literal::String(_)) => Ok((DataType::Text, false)),
|
|
|
|
|
Expr::Literal(Literal::Null) => Ok((DataType::Text, true)),
|
|
|
|
|
Expr::Binary { .. } => Ok((DataType::Boolean, true)),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn default_projection_name(expr: &Expr, ordinal: usize) -> String {
|
|
|
|
|
match expr {
|
|
|
|
|
Expr::Identifier(name) => name.clone(),
|
|
|
|
|
Expr::Literal(_) | Expr::Binary { .. } => format!("expr{}", ordinal),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 09:51:01 +02:00
|
|
|
fn input_schema_for_table(
|
|
|
|
|
table: &str,
|
|
|
|
|
catalog: &PredicateCatalog,
|
|
|
|
|
qualify_columns: bool,
|
|
|
|
|
) -> Result<Schema, PlannerError> {
|
|
|
|
|
let schema = catalog.schema_for(table)?.clone();
|
|
|
|
|
if !qualify_columns {
|
|
|
|
|
return Ok(schema);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let fields = schema
|
|
|
|
|
.fields()
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|field| {
|
|
|
|
|
Field::new(
|
|
|
|
|
format!("{}.{}", table, field.name()),
|
|
|
|
|
field.data_type().clone(),
|
|
|
|
|
field.nullable(),
|
|
|
|
|
)
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
Ok(Schema::new(fields))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn combine_schemas(left: &Schema, right: &Schema) -> Schema {
|
|
|
|
|
let mut fields = left.fields().to_vec();
|
|
|
|
|
fields.extend_from_slice(right.fields());
|
|
|
|
|
Schema::new(fields)
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 12:38:43 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
use crate::catalog::PredicateCatalog;
|
|
|
|
|
use crate::chase::{Atom, Instance, Term};
|
|
|
|
|
use crate::sql::parser::parse_select;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn plans_projection_and_filter() {
|
|
|
|
|
let instance: Instance = vec![Atom::new(
|
|
|
|
|
"Parent",
|
|
|
|
|
vec![Term::constant("alice"), Term::constant("bob")],
|
|
|
|
|
)]
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect();
|
|
|
|
|
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
|
|
|
|
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
|
|
|
|
|
|
|
|
|
|
let plan = plan_select(&select, &catalog).unwrap();
|
|
|
|
|
assert_eq!(plan.output_schema().len(), 1);
|
|
|
|
|
}
|
2026-04-09 12:50:06 +02:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn plans_aliases_and_literal_projection() {
|
|
|
|
|
let instance: Instance = vec![Atom::new(
|
|
|
|
|
"Parent",
|
|
|
|
|
vec![Term::constant("alice"), Term::constant("bob")],
|
|
|
|
|
)]
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect();
|
|
|
|
|
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
|
|
|
|
let select =
|
|
|
|
|
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
|
|
|
|
|
|
|
|
|
|
let plan = plan_select(&select, &catalog).unwrap();
|
|
|
|
|
let schema = plan.output_schema();
|
|
|
|
|
assert_eq!(schema.len(), 3);
|
|
|
|
|
assert_eq!(schema.fields()[0].name(), "parent_name");
|
|
|
|
|
assert_eq!(schema.fields()[1].name(), "label");
|
|
|
|
|
assert_eq!(schema.fields()[2].name(), "expr3");
|
|
|
|
|
assert_eq!(schema.fields()[1].data_type(), &DataType::Text);
|
|
|
|
|
}
|
2026-04-10 09:51:01 +02:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn plans_multi_table_select_with_qualified_columns() {
|
|
|
|
|
let instance: Instance = vec![
|
|
|
|
|
Atom::new(
|
|
|
|
|
"Parent",
|
|
|
|
|
vec![Term::constant("alice"), Term::constant("bob")],
|
|
|
|
|
),
|
|
|
|
|
Atom::new(
|
|
|
|
|
"Ancestor",
|
|
|
|
|
vec![Term::constant("bob"), Term::constant("carol")],
|
|
|
|
|
),
|
|
|
|
|
]
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect();
|
|
|
|
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
|
|
|
|
catalog
|
|
|
|
|
.rename_columns("Parent", ["parent", "child"])
|
|
|
|
|
.unwrap();
|
|
|
|
|
catalog
|
|
|
|
|
.rename_columns("Ancestor", ["parent", "child"])
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
let select = parse_select(
|
|
|
|
|
"SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
|
|
|
|
|
WHERE Parent.child = Ancestor.parent",
|
|
|
|
|
)
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
let plan = plan_select(&select, &catalog).unwrap();
|
|
|
|
|
let schema = plan.output_schema();
|
|
|
|
|
assert_eq!(schema.len(), 2);
|
|
|
|
|
assert_eq!(schema.fields()[0].name(), "Parent.parent");
|
|
|
|
|
assert_eq!(schema.fields()[1].name(), "Ancestor.child");
|
|
|
|
|
}
|
2026-04-09 12:38:43 +02:00
|
|
|
}
|