Add basic SQL ORDER BY support

This commit is contained in:
Hassan Abedi 2026-04-10 10:10:46 +02:00
parent 77ef8c5ae9
commit eaeb2092d2
10 changed files with 305 additions and 10 deletions

View File

@ -119,6 +119,7 @@ SELECT * FROM Parent
SELECT c0 FROM Parent SELECT c0 FROM Parent
SELECT c0 FROM Parent WHERE c1 = 'bob' SELECT c0 FROM Parent WHERE c1 = 'bob'
SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice' SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'
SELECT c0 FROM Parent ORDER BY c0 DESC
SELECT c0 AS parent_name, 'seed' AS label FROM Parent SELECT c0 AS parent_name, 'seed' AS label FROM Parent
SELECT Parent.parent, Ancestor.child SELECT Parent.parent, Ancestor.child
FROM Parent, Ancestor FROM Parent, Ancestor
@ -166,6 +167,7 @@ Current limits:
- multi-table queries require qualified column names such as `Parent.child` - multi-table queries require qualified column names such as `Parent.child`
- table aliases are supported via `FROM Parent AS p` - table aliases are supported via `FROM Parent AS p`
- `WHERE` supports equality predicates combined with `AND` - `WHERE` supports equality predicates combined with `AND`
- `ORDER BY` supports output-column ordering with `ASC`/`DESC`
- no aggregates - no aggregates
- projection aliases only via `AS` - projection aliases only via `AS`

View File

@ -31,6 +31,7 @@ This document tracks the current state and next steps for the repository.
- [x] `SELECT-FROM-WHERE` support with positional or named columns - [x] `SELECT-FROM-WHERE` support with positional or named columns
- [x] Basic multi-table SQL joins via qualified-column filtering - [x] Basic multi-table SQL joins via qualified-column filtering
- [x] Table aliases for self-joins and qualified references - [x] Table aliases for self-joins and qualified references
- [x] Basic `ORDER BY` support over output columns
### Near-Term Cleanup ### Near-Term Cleanup

View File

@ -1,10 +1,11 @@
//! Minimal execution support for the first SQL slice. //! Minimal execution support for the first SQL slice.
use std::cmp::Ordering;
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use crate::chase::{Instance, Term}; use crate::chase::{Instance, Term};
use crate::planner::logical::{LogicalExpr, LogicalPlan}; use crate::planner::logical::{LogicalExpr, LogicalPlan, SortDirection, SortKey};
use crate::relational::{ResultSet, Row, Value}; use crate::relational::{ResultSet, Row, Value};
/// Errors returned by the current logical-plan executor. /// Errors returned by the current logical-plan executor.
@ -89,6 +90,17 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
} }
Ok(ResultSet::new(schema.clone(), rows)) Ok(ResultSet::new(schema.clone(), rows))
} }
LogicalPlan::Sort {
input,
keys,
schema,
} => {
let result = execute(input, instance)?;
let mut rows = result.rows().to_vec();
let resolved_keys = resolve_sort_keys(keys, result.schema())?;
rows.sort_by(|left, right| compare_rows(left, right, &resolved_keys));
Ok(ResultSet::new(schema.clone(), rows))
}
} }
} }
@ -139,3 +151,45 @@ fn value_from_term(term: &Term) -> Result<Value, ExecutionError> {
Term::Variable(_) => Err(ExecutionError::NonGroundScanTerm), Term::Variable(_) => Err(ExecutionError::NonGroundScanTerm),
} }
} }
fn resolve_sort_keys(
keys: &[SortKey],
schema: &crate::relational::Schema,
) -> Result<Vec<(usize, SortDirection)>, ExecutionError> {
keys.iter()
.map(|key| {
let index = schema
.index_of(&key.column)
.ok_or_else(|| ExecutionError::UnknownColumn(key.column.clone()))?;
Ok((index, key.direction))
})
.collect()
}
fn compare_rows(left: &Row, right: &Row, keys: &[(usize, SortDirection)]) -> Ordering {
for (index, direction) in keys {
let left_value = left.get(*index).unwrap_or(&Value::Null);
let right_value = right.get(*index).unwrap_or(&Value::Null);
let ordering = compare_values(left_value, right_value);
if ordering != Ordering::Equal {
return match direction {
SortDirection::Asc => ordering,
SortDirection::Desc => ordering.reverse(),
};
}
}
Ordering::Equal
}
fn compare_values(left: &Value, right: &Value) -> Ordering {
match (left, right) {
(Value::Null, Value::Null) => Ordering::Equal,
(Value::Null, _) => Ordering::Greater,
(_, Value::Null) => Ordering::Less,
(Value::Text(left), Value::Text(right)) => left.cmp(right),
(Value::Boolean(left), Value::Boolean(right)) => left.cmp(right),
(Value::Text(_), Value::Boolean(_)) => Ordering::Less,
(Value::Boolean(_), Value::Text(_)) => Ordering::Greater,
}
}

View File

@ -452,6 +452,17 @@ mod tests {
} }
} }
#[test]
fn parse_sql_command_with_order_by() {
let command = parse_command("sql SELECT c0 FROM Parent ORDER BY c0 DESC;").unwrap();
match command {
Command::Sql(select) => {
assert_eq!(select.order_by.len(), 1);
}
other => panic!("unexpected command: {:?}", other),
}
}
#[test] #[test]
fn parse_schema_command() { fn parse_schema_command() {
let command = parse_command("schema Parent(parent, child).").unwrap(); let command = parse_command("schema Parent(parent, child).").unwrap();

View File

@ -264,7 +264,7 @@ fn render_result_set(result: &ResultSet) -> String {
.join(" | "); .join(" | ");
lines.push(header); lines.push(header);
let mut rows = result let rows = result
.rows() .rows()
.iter() .iter()
.map(|row| { .map(|row| {
@ -275,7 +275,6 @@ fn render_result_set(result: &ResultSet) -> String {
.join(" | ") .join(" | ")
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
rows.sort();
lines.extend(rows); lines.extend(rows);
lines.join("\n") lines.join("\n")
} }
@ -458,4 +457,25 @@ mod tests {
assert!(output.contains("1 row(s)")); assert!(output.contains("1 row(s)"));
assert!(output.contains("alice")); assert!(output.contains("alice"));
} }
#[test]
fn session_runs_sql_query_with_order_by() {
let mut session = Session::new();
let output = session
.execute_script(
"fact Parent(alice, bob).\n\
fact Parent(bob, carol).\n\
fact Parent(carol, dave).\n\
sql SELECT c0 FROM Parent ORDER BY c0 DESC;",
)
.unwrap();
let lines = output.lines().collect::<Vec<_>>();
let tail = &lines[lines.len() - 5..];
assert_eq!(tail[0], "3 row(s)");
assert_eq!(tail[1], "c0");
assert_eq!(tail[2], "carol");
assert_eq!(tail[3], "bob");
assert_eq!(tail[4], "alice");
}
} }

View File

@ -1,5 +1,14 @@
use crate::relational::{Schema, Value}; use crate::relational::{Schema, Value};
/// Sort direction for the logical `Sort` operator.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SortDirection {
/// Ascending order.
Asc,
/// Descending order.
Desc,
}
/// A logical expression over relational data. /// A logical expression over relational data.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogicalExpr { pub enum LogicalExpr {
@ -22,6 +31,15 @@ pub struct NamedExpr {
pub expr: LogicalExpr, pub expr: LogicalExpr,
} }
/// One sort key in a logical `Sort` operator.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SortKey {
/// Output column name to sort by.
pub column: String,
/// Sort direction.
pub direction: SortDirection,
}
/// A logical plan in the current execution subset. /// A logical plan in the current execution subset.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogicalPlan { pub enum LogicalPlan {
@ -38,6 +56,12 @@ pub enum LogicalPlan {
input: Box<LogicalPlan>, input: Box<LogicalPlan>,
predicate: LogicalExpr, predicate: LogicalExpr,
}, },
/// Sort rows by one or more output columns.
Sort {
input: Box<LogicalPlan>,
keys: Vec<SortKey>,
schema: Schema,
},
/// Project a new output schema. /// Project a new output schema.
Project { Project {
input: Box<LogicalPlan>, input: Box<LogicalPlan>,
@ -53,6 +77,7 @@ impl LogicalPlan {
Self::Scan { schema, .. } => schema, Self::Scan { schema, .. } => schema,
Self::CrossJoin { schema, .. } => schema, Self::CrossJoin { schema, .. } => schema,
Self::Filter { input, .. } => input.output_schema(), Self::Filter { input, .. } => input.output_schema(),
Self::Sort { schema, .. } => schema,
Self::Project { schema, .. } => schema, Self::Project { schema, .. } => schema,
} }
} }

View File

@ -3,9 +3,13 @@ use std::error::Error;
use std::fmt; use std::fmt;
use crate::catalog::{CatalogError, PredicateCatalog}; use crate::catalog::{CatalogError, PredicateCatalog};
use crate::planner::logical::{LogicalExpr, LogicalPlan, NamedExpr}; use crate::planner::logical::{
LogicalExpr, LogicalPlan, NamedExpr, SortDirection as LogicalSortDirection, SortKey,
};
use crate::relational::{DataType, Field, Schema, Value}; use crate::relational::{DataType, Field, Schema, Value};
use crate::sql::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; use crate::sql::ast::{
BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef,
};
/// Errors returned when translating SQL AST into a logical plan. /// Errors returned when translating SQL AST into a logical plan.
#[derive(Debug)] #[derive(Debug)]
@ -16,6 +20,8 @@ pub enum PlannerError {
UnknownColumn(String), UnknownColumn(String),
/// A table or alias name appears more than once in one query. /// A table or alias name appears more than once in one query.
DuplicateSourceName(String), DuplicateSourceName(String),
/// The current `ORDER BY` subset only supports output column names.
UnsupportedOrderBy,
} }
impl fmt::Display for PlannerError { impl fmt::Display for PlannerError {
@ -26,6 +32,9 @@ impl fmt::Display for PlannerError {
Self::DuplicateSourceName(name) => { Self::DuplicateSourceName(name) => {
write!(f, "source name `{}` appears more than once", name) write!(f, "source name `{}` appears more than once", name)
} }
Self::UnsupportedOrderBy => {
write!(f, "only output column names are supported in ORDER BY")
}
} }
} }
} }
@ -34,7 +43,9 @@ impl Error for PlannerError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::Catalog(err) => Some(err), Self::Catalog(err) => Some(err),
Self::UnknownColumn(_) | Self::DuplicateSourceName(_) => None, Self::UnknownColumn(_) | Self::DuplicateSourceName(_) | Self::UnsupportedOrderBy => {
None
}
} }
} }
} }
@ -61,7 +72,8 @@ pub fn plan_select(
} }
if is_wildcard_projection(&select.projection) { if is_wildcard_projection(&select.projection) {
return Ok(plan); let output_schema = plan.output_schema().clone();
return maybe_apply_sort(plan, output_schema, &select.order_by);
} }
let mut expressions = Vec::new(); let mut expressions = Vec::new();
@ -84,11 +96,14 @@ pub fn plan_select(
} }
} }
Ok(LogicalPlan::Project { let plan = LogicalPlan::Project {
input: Box::new(plan), input: Box::new(plan),
expressions, expressions,
schema: Schema::new(fields), schema: Schema::new(fields),
}) };
let output_schema = plan.output_schema().clone();
maybe_apply_sort(plan, output_schema, &select.order_by)
} }
fn is_wildcard_projection(items: &[SelectItem]) -> bool { fn is_wildcard_projection(items: &[SelectItem]) -> bool {
@ -162,6 +177,40 @@ fn plan_expr(expr: &Expr, schema: &Schema) -> Result<LogicalExpr, PlannerError>
} }
} }
fn maybe_apply_sort(
plan: LogicalPlan,
schema: Schema,
order_by: &[OrderByItem],
) -> Result<LogicalPlan, PlannerError> {
if order_by.is_empty() {
return Ok(plan);
}
let mut keys = Vec::new();
for item in order_by {
let column = match &item.expr {
Expr::Identifier(name) => name.clone(),
_ => return Err(PlannerError::UnsupportedOrderBy),
};
if schema.index_of(&column).is_none() {
return Err(PlannerError::UnknownColumn(column));
}
keys.push(SortKey {
column,
direction: match item.direction {
SortDirection::Asc => LogicalSortDirection::Asc,
SortDirection::Desc => LogicalSortDirection::Desc,
},
});
}
Ok(LogicalPlan::Sort {
input: Box::new(plan),
keys,
schema,
})
}
fn plan_literal(literal: &Literal) -> Value { fn plan_literal(literal: &Literal) -> Value {
match literal { match literal {
Literal::String(value) => Value::text(value.clone()), Literal::String(value) => Value::text(value.clone()),
@ -360,4 +409,33 @@ mod tests {
other => panic!("unexpected plan: {:?}", other), other => panic!("unexpected plan: {:?}", other),
} }
} }
#[test]
fn plans_order_by_after_projection() {
let instance: Instance = vec![
Atom::new(
"Parent",
vec![Term::constant("alice"), Term::constant("bob")],
),
Atom::new(
"Parent",
vec![Term::constant("bob"), Term::constant("carol")],
),
]
.into_iter()
.collect();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
match plan {
LogicalPlan::Sort { keys, input, .. } => {
assert_eq!(keys.len(), 1);
assert_eq!(keys[0].column, "c0");
assert!(matches!(keys[0].direction, LogicalSortDirection::Desc));
assert!(matches!(*input, LogicalPlan::Project { .. }));
}
other => panic!("unexpected plan: {:?}", other),
}
}
} }

View File

@ -7,6 +7,8 @@ pub struct Select {
pub from: Vec<TableRef>, pub from: Vec<TableRef>,
/// Optional filter predicate. /// Optional filter predicate.
pub selection: Option<Expr>, pub selection: Option<Expr>,
/// Optional output ordering.
pub order_by: Vec<OrderByItem>,
} }
/// One source entry in a `FROM` list. /// One source entry in a `FROM` list.
@ -18,6 +20,15 @@ pub struct TableRef {
pub alias: Option<String>, pub alias: Option<String>,
} }
/// One item in an `ORDER BY` clause.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OrderByItem {
/// Output column name to sort by.
pub expr: Expr,
/// Sort direction.
pub direction: SortDirection,
}
/// One item in a `SELECT` projection list. /// One item in a `SELECT` projection list.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum SelectItem { pub enum SelectItem {
@ -59,3 +70,12 @@ pub enum BinaryOp {
/// Boolean conjunction. /// Boolean conjunction.
And, And,
} }
/// Sort direction for `ORDER BY`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SortDirection {
/// Ascending order.
Asc,
/// Descending order.
Desc,
}

View File

@ -1,7 +1,9 @@
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem, TableRef}; use super::ast::{
BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef,
};
/// Errors returned by the minimal SQL parser. /// Errors returned by the minimal SQL parser.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -34,6 +36,10 @@ enum Token {
Where, Where,
As, As,
And, And,
Order,
By,
Asc,
Desc,
Null, Null,
Identifier(String), Identifier(String),
String(String), String(String),
@ -70,6 +76,13 @@ impl Parser {
} else { } else {
None None
}; };
let order_by = if self.peek() == Some(&Token::Order) {
self.index += 1;
self.expect_keyword(Token::By, "BY")?;
self.parse_order_by()?
} else {
Vec::new()
};
if let Some(token) = self.peek() { if let Some(token) = self.peek() {
return Err(ParseError::UnexpectedToken(render_token(token))); return Err(ParseError::UnexpectedToken(render_token(token)));
@ -79,6 +92,7 @@ impl Parser {
projection, projection,
from, from,
selection, selection,
order_by,
}) })
} }
@ -152,6 +166,34 @@ impl Parser {
Ok(expr) Ok(expr)
} }
fn parse_order_by(&mut self) -> Result<Vec<OrderByItem>, ParseError> {
let mut items = Vec::new();
loop {
let expr = self.parse_operand()?;
let direction = match self.peek() {
Some(Token::Asc) => {
self.index += 1;
SortDirection::Asc
}
Some(Token::Desc) => {
self.index += 1;
SortDirection::Desc
}
_ => SortDirection::Asc,
};
items.push(OrderByItem { expr, direction });
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
}
break;
}
Ok(items)
}
fn parse_equality(&mut self) -> Result<Expr, ParseError> { fn parse_equality(&mut self) -> Result<Expr, ParseError> {
let left = self.parse_operand()?; let left = self.parse_operand()?;
match self.next().ok_or(ParseError::UnexpectedEnd)? { match self.next().ok_or(ParseError::UnexpectedEnd)? {
@ -237,6 +279,10 @@ fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
"WHERE" => Token::Where, "WHERE" => Token::Where,
"AS" => Token::As, "AS" => Token::As,
"AND" => Token::And, "AND" => Token::And,
"ORDER" => Token::Order,
"BY" => Token::By,
"ASC" => Token::Asc,
"DESC" => Token::Desc,
"NULL" => Token::Null, "NULL" => Token::Null,
_ => Token::Identifier(ident), _ => Token::Identifier(ident),
}; };
@ -305,6 +351,10 @@ fn render_token(token: &Token) -> String {
Token::Where => "WHERE".to_string(), Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(), Token::As => "AS".to_string(),
Token::And => "AND".to_string(), Token::And => "AND".to_string(),
Token::Order => "ORDER".to_string(),
Token::By => "BY".to_string(),
Token::Asc => "ASC".to_string(),
Token::Desc => "DESC".to_string(),
Token::Null => "NULL".to_string(), Token::Null => "NULL".to_string(),
Token::Identifier(name) => name.clone(), Token::Identifier(name) => name.clone(),
Token::String(value) => format!("'{}'", value), Token::String(value) => format!("'{}'", value),
@ -331,6 +381,7 @@ mod tests {
); );
assert_eq!(select.projection.len(), 1); assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some()); assert!(select.selection.is_some());
assert!(select.order_by.is_empty());
} }
#[test] #[test]
@ -437,4 +488,23 @@ mod tests {
}) })
); );
} }
#[test]
fn parses_order_by_clause() {
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC, c1 ASC").unwrap();
assert_eq!(
select.order_by,
vec![
OrderByItem {
expr: Expr::Identifier("c0".to_string()),
direction: SortDirection::Desc,
},
OrderByItem {
expr: Expr::Identifier("c1".to_string()),
direction: SortDirection::Asc,
},
]
);
}
} }

View File

@ -217,3 +217,17 @@ fn select_where_and_applies_multiple_filters() {
assert_eq!(result.rows().len(), 1); assert_eq!(result.rows().len(), 1);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice"); assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
} }
#[test]
fn select_order_by_desc_sorts_rows() {
let instance = parent_instance();
let catalog = PredicateCatalog::from_instance(&instance).unwrap();
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC").unwrap();
let plan = plan_select(&select, &catalog).unwrap();
let result = execute(&plan, &instance).unwrap();
assert_eq!(result.rows().len(), 2);
assert_eq!(format!("{}", result.rows()[0].values()[0]), "bob");
assert_eq!(format!("{}", result.rows()[1].values()[0]), "alice");
}