diff --git a/README.md b/README.md index 33f6f8d..bbf5d68 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ SELECT * FROM Parent SELECT c0 FROM Parent SELECT c0 FROM Parent WHERE c1 = 'bob' SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice' +SELECT c0 FROM Parent ORDER BY c0 DESC SELECT c0 AS parent_name, 'seed' AS label FROM Parent SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor @@ -166,6 +167,7 @@ Current limits: - multi-table queries require qualified column names such as `Parent.child` - table aliases are supported via `FROM Parent AS p` - `WHERE` supports equality predicates combined with `AND` +- `ORDER BY` supports output-column ordering with `ASC`/`DESC` - no aggregates - projection aliases only via `AS` diff --git a/ROADMAP.md b/ROADMAP.md index 3ec28ba..e211f0a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -31,6 +31,7 @@ This document tracks the current state and next steps for the repository. - [x] `SELECT-FROM-WHERE` support with positional or named columns - [x] Basic multi-table SQL joins via qualified-column filtering - [x] Table aliases for self-joins and qualified references +- [x] Basic `ORDER BY` support over output columns ### Near-Term Cleanup diff --git a/src/execution/mod.rs b/src/execution/mod.rs index a30f41c..8fe09ca 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -1,10 +1,11 @@ //! Minimal execution support for the first SQL slice. +use std::cmp::Ordering; use std::error::Error; use std::fmt; use crate::chase::{Instance, Term}; -use crate::planner::logical::{LogicalExpr, LogicalPlan}; +use crate::planner::logical::{LogicalExpr, LogicalPlan, SortDirection, SortKey}; use crate::relational::{ResultSet, Row, Value}; /// Errors returned by the current logical-plan executor. @@ -89,6 +90,17 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result { + let result = execute(input, instance)?; + let mut rows = result.rows().to_vec(); + let resolved_keys = resolve_sort_keys(keys, result.schema())?; + rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys)); + Ok(ResultSet::new(schema.clone(), rows)) + } } } @@ -139,3 +151,45 @@ fn value_from_term(term: &Term) -> Result { Term::Variable(_) => Err(ExecutionError::NonGroundScanTerm), } } + +fn resolve_sort_keys( + keys: &[SortKey], + schema: &crate::relational::Schema, +) -> Result, ExecutionError> { + keys.iter() + .map(|key| { + let index = schema + .index_of(&key.column) + .ok_or_else(|| ExecutionError::UnknownColumn(key.column.clone()))?; + Ok((index, key.direction)) + }) + .collect() +} + +fn compare_rows(left: &Row, right: &Row, keys: &[(usize, SortDirection)]) -> Ordering { + for (index, direction) in keys { + let left_value = left.get(*index).unwrap_or(&Value::Null); + let right_value = right.get(*index).unwrap_or(&Value::Null); + let ordering = compare_values(left_value, right_value); + if ordering != Ordering::Equal { + return match direction { + SortDirection::Asc => ordering, + SortDirection::Desc => ordering.reverse(), + }; + } + } + + Ordering::Equal +} + +fn compare_values(left: &Value, right: &Value) -> Ordering { + match (left, right) { + (Value::Null, Value::Null) => Ordering::Equal, + (Value::Null, _) => Ordering::Greater, + (_, Value::Null) => Ordering::Less, + (Value::Text(left), Value::Text(right)) => left.cmp(right), + (Value::Boolean(left), Value::Boolean(right)) => left.cmp(right), + (Value::Text(_), Value::Boolean(_)) => Ordering::Less, + (Value::Boolean(_), Value::Text(_)) => Ordering::Greater, + } +} diff --git a/src/frontend/language.rs b/src/frontend/language.rs index a99ecb6..2f0a67a 100644 --- a/src/frontend/language.rs +++ b/src/frontend/language.rs @@ -452,6 +452,17 @@ mod tests { } } + #[test] + fn parse_sql_command_with_order_by() { + let command = parse_command("sql SELECT c0 FROM Parent ORDER BY c0 DESC;").unwrap(); + match command { + Command::Sql(select) => { + assert_eq!(select.order_by.len(), 1); + } + other => panic!("unexpected command: {:?}", other), + } + } + #[test] fn parse_schema_command() { let command = parse_command("schema Parent(parent, child).").unwrap(); diff --git a/src/frontend/session.rs b/src/frontend/session.rs index aa1d979..abeaeec 100644 --- a/src/frontend/session.rs +++ b/src/frontend/session.rs @@ -264,7 +264,7 @@ fn render_result_set(result: &ResultSet) -> String { .join(" | "); lines.push(header); - let mut rows = result + let rows = result .rows() .iter() .map(|row| { @@ -275,7 +275,6 @@ fn render_result_set(result: &ResultSet) -> String { .join(" | ") }) .collect::>(); - rows.sort(); lines.extend(rows); lines.join("\n") } @@ -458,4 +457,25 @@ mod tests { assert!(output.contains("1 row(s)")); assert!(output.contains("alice")); } + + #[test] + fn session_runs_sql_query_with_order_by() { + let mut session = Session::new(); + let output = session + .execute_script( + "fact Parent(alice, bob).\n\ + fact Parent(bob, carol).\n\ + fact Parent(carol, dave).\n\ + sql SELECT c0 FROM Parent ORDER BY c0 DESC;", + ) + .unwrap(); + + let lines = output.lines().collect::>(); + let tail = &lines[lines.len() - 5..]; + assert_eq!(tail[0], "3 row(s)"); + assert_eq!(tail[1], "c0"); + assert_eq!(tail[2], "carol"); + assert_eq!(tail[3], "bob"); + assert_eq!(tail[4], "alice"); + } } diff --git a/src/planner/logical.rs b/src/planner/logical.rs index d4e8f0e..aca23e2 100644 --- a/src/planner/logical.rs +++ b/src/planner/logical.rs @@ -1,5 +1,14 @@ use crate::relational::{Schema, Value}; +/// Sort direction for the logical `Sort` operator. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SortDirection { + /// Ascending order. + Asc, + /// Descending order. + Desc, +} + /// A logical expression over relational data. #[derive(Debug, Clone, PartialEq, Eq)] pub enum LogicalExpr { @@ -22,6 +31,15 @@ pub struct NamedExpr { pub expr: LogicalExpr, } +/// One sort key in a logical `Sort` operator. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SortKey { + /// Output column name to sort by. + pub column: String, + /// Sort direction. + pub direction: SortDirection, +} + /// A logical plan in the current execution subset. #[derive(Debug, Clone, PartialEq, Eq)] pub enum LogicalPlan { @@ -38,6 +56,12 @@ pub enum LogicalPlan { input: Box, predicate: LogicalExpr, }, + /// Sort rows by one or more output columns. + Sort { + input: Box, + keys: Vec, + schema: Schema, + }, /// Project a new output schema. Project { input: Box, @@ -53,6 +77,7 @@ impl LogicalPlan { Self::Scan { schema, .. } => schema, Self::CrossJoin { schema, .. } => schema, Self::Filter { input, .. } => input.output_schema(), + Self::Sort { schema, .. } => schema, Self::Project { schema, .. } => schema, } } diff --git a/src/planner/sql.rs b/src/planner/sql.rs index dcbe40b..3564181 100644 --- a/src/planner/sql.rs +++ b/src/planner/sql.rs @@ -3,9 +3,13 @@ use std::error::Error; use std::fmt; use crate::catalog::{CatalogError, PredicateCatalog}; -use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; +use crate::planner::logical::{ + LogicalExpr, LogicalPlan, NamedExpr, SortDirection as LogicalSortDirection, SortKey, +}; use crate::relational::{DataType, Field, Schema, Value}; -use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; +use crate::sql::ast::{ + BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef, +}; /// Errors returned when translating SQL AST into a logical plan. #[derive(Debug)] @@ -16,6 +20,8 @@ pub enum PlannerError { UnknownColumn(String), /// A table or alias name appears more than once in one query. DuplicateSourceName(String), + /// The current `ORDER BY` subset only supports output column names. + UnsupportedOrderBy, } impl fmt::Display for PlannerError { @@ -26,6 +32,9 @@ impl fmt::Display for PlannerError { Self::DuplicateSourceName(name) => { write!(f, "source name `{}` appears more than once", name) } + Self::UnsupportedOrderBy => { + write!(f, "only output column names are supported in ORDER BY") + } } } } @@ -34,7 +43,9 @@ impl Error for PlannerError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Catalog(err) => Some(err), - Self::UnknownColumn(_) | Self::DuplicateSourceName(_) => None, + Self::UnknownColumn(_) | Self::DuplicateSourceName(_) | Self::UnsupportedOrderBy => { + None + } } } } @@ -61,7 +72,8 @@ pub fn plan_select( } if is_wildcard_projection(&select.projection) { - return Ok(plan); + let output_schema = plan.output_schema().clone(); + return maybe_apply_sort(plan, output_schema, &select.order_by); } let mut expressions = Vec::new(); @@ -84,11 +96,14 @@ pub fn plan_select( } } - Ok(LogicalPlan::Project { + let plan = LogicalPlan::Project { input: Box::new(plan), expressions, schema: Schema::new(fields), - }) + }; + + let output_schema = plan.output_schema().clone(); + maybe_apply_sort(plan, output_schema, &select.order_by) } fn is_wildcard_projection(items: &[SelectItem]) -> bool { @@ -162,6 +177,40 @@ fn plan_expr(expr: &Expr, schema: &Schema) -> Result } } +fn maybe_apply_sort( + plan: LogicalPlan, + schema: Schema, + order_by: &[OrderByItem], +) -> Result { + if order_by.is_empty() { + return Ok(plan); + } + + let mut keys = Vec::new(); + for item in order_by { + let column = match &item.expr { + Expr::Identifier(name) => name.clone(), + _ => return Err(PlannerError::UnsupportedOrderBy), + }; + if schema.index_of(&column).is_none() { + return Err(PlannerError::UnknownColumn(column)); + } + keys.push(SortKey { + column, + direction: match item.direction { + SortDirection::Asc => LogicalSortDirection::Asc, + SortDirection::Desc => LogicalSortDirection::Desc, + }, + }); + } + + Ok(LogicalPlan::Sort { + input: Box::new(plan), + keys, + schema, + }) +} + fn plan_literal(literal: &Literal) -> Value { match literal { Literal::String(value) => Value::text(value.clone()), @@ -360,4 +409,33 @@ mod tests { other => panic!("unexpected plan: {:?}", other), } } + + #[test] + fn plans_order_by_after_projection() { + let instance: Instance = vec![ + Atom::new( + "Parent", + vec![Term::constant("alice"), Term::constant("bob")], + ), + Atom::new( + "Parent", + vec![Term::constant("bob"), Term::constant("carol")], + ), + ] + .into_iter() + .collect(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + match plan { + LogicalPlan::Sort { keys, input, .. } => { + assert_eq!(keys.len(), 1); + assert_eq!(keys[0].column, "c0"); + assert!(matches!(keys[0].direction, LogicalSortDirection::Desc)); + assert!(matches!(*input, LogicalPlan::Project { .. })); + } + other => panic!("unexpected plan: {:?}", other), + } + } } diff --git a/src/sql/ast.rs b/src/sql/ast.rs index 01e0269..8fe150a 100644 --- a/src/sql/ast.rs +++ b/src/sql/ast.rs @@ -7,6 +7,8 @@ pub struct Select { pub from: Vec, /// Optional filter predicate. pub selection: Option, + /// Optional output ordering. + pub order_by: Vec, } /// One source entry in a `FROM` list. @@ -18,6 +20,15 @@ pub struct TableRef { pub alias: Option, } +/// One item in an `ORDER BY` clause. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct OrderByItem { + /// Output column name to sort by. + pub expr: Expr, + /// Sort direction. + pub direction: SortDirection, +} + /// One item in a `SELECT` projection list. #[derive(Debug, Clone, PartialEq, Eq)] pub enum SelectItem { @@ -59,3 +70,12 @@ pub enum BinaryOp { /// Boolean conjunction. And, } + +/// Sort direction for `ORDER BY`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SortDirection { + /// Ascending order. + Asc, + /// Descending order. + Desc, +} diff --git a/src/sql/parser.rs b/src/sql/parser.rs index ab8c4cd..df32fca 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -1,7 +1,9 @@ use std::error::Error; use std::fmt; -use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; +use super::ast::{ + BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef, +}; /// Errors returned by the minimal SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] @@ -34,6 +36,10 @@ enum Token { Where, As, And, + Order, + By, + Asc, + Desc, Null, Identifier(String), String(String), @@ -70,6 +76,13 @@ impl Parser { } else { None }; + let order_by = if self.peek() == Some(&Token::Order) { + self.index += 1; + self.expect_keyword(Token::By, "BY")?; + self.parse_order_by()? + } else { + Vec::new() + }; if let Some(token) = self.peek() { return Err(ParseError::UnexpectedToken(render_token(token))); @@ -79,6 +92,7 @@ impl Parser { projection, from, selection, + order_by, }) } @@ -152,6 +166,34 @@ impl Parser { Ok(expr) } + fn parse_order_by(&mut self) -> Result, ParseError> { + let mut items = Vec::new(); + + loop { + let expr = self.parse_operand()?; + let direction = match self.peek() { + Some(Token::Asc) => { + self.index += 1; + SortDirection::Asc + } + Some(Token::Desc) => { + self.index += 1; + SortDirection::Desc + } + _ => SortDirection::Asc, + }; + items.push(OrderByItem { expr, direction }); + + if self.peek() == Some(&Token::Comma) { + self.index += 1; + continue; + } + break; + } + + Ok(items) + } + fn parse_equality(&mut self) -> Result { let left = self.parse_operand()?; match self.next().ok_or(ParseError::UnexpectedEnd)? { @@ -237,6 +279,10 @@ fn tokenize(input: &str) -> Result, ParseError> { "WHERE" => Token::Where, "AS" => Token::As, "AND" => Token::And, + "ORDER" => Token::Order, + "BY" => Token::By, + "ASC" => Token::Asc, + "DESC" => Token::Desc, "NULL" => Token::Null, _ => Token::Identifier(ident), }; @@ -305,6 +351,10 @@ fn render_token(token: &Token) -> String { Token::Where => "WHERE".to_string(), Token::As => "AS".to_string(), Token::And => "AND".to_string(), + Token::Order => "ORDER".to_string(), + Token::By => "BY".to_string(), + Token::Asc => "ASC".to_string(), + Token::Desc => "DESC".to_string(), Token::Null => "NULL".to_string(), Token::Identifier(name) => name.clone(), Token::String(value) => format!("'{}'", value), @@ -331,6 +381,7 @@ mod tests { ); assert_eq!(select.projection.len(), 1); assert!(select.selection.is_some()); + assert!(select.order_by.is_empty()); } #[test] @@ -437,4 +488,23 @@ mod tests { }) ); } + + #[test] + fn parses_order_by_clause() { + let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC, c1 ASC").unwrap(); + + assert_eq!( + select.order_by, + vec![ + OrderByItem { + expr: Expr::Identifier("c0".to_string()), + direction: SortDirection::Desc, + }, + OrderByItem { + expr: Expr::Identifier("c1".to_string()), + direction: SortDirection::Asc, + }, + ] + ); + } } diff --git a/tests/sql_pipeline_tests.rs b/tests/sql_pipeline_tests.rs index 2d7e3bb..0e12bf7 100644 --- a/tests/sql_pipeline_tests.rs +++ b/tests/sql_pipeline_tests.rs @@ -217,3 +217,17 @@ fn select_where_and_applies_multiple_filters() { assert_eq!(result.rows().len(), 1); assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice"); } + +#[test] +fn select_order_by_desc_sorts_rows() { + let instance = parent_instance(); + let catalog = PredicateCatalog::from_instance(&instance).unwrap(); + let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC").unwrap(); + + let plan = plan_select(&select, &catalog).unwrap(); + let result = execute(&plan, &instance).unwrap(); + + assert_eq!(result.rows().len(), 2); + assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob"); + assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice"); +}