query-engine/src/sql/parser.rs

511 lines
14 KiB
Rust
Raw Normal View History

2026-04-09 12:38:43 +02:00
use std::error::Error;
use std::fmt;
2026-04-10 10:10:46 +02:00
use super::ast::{
BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef,
};
2026-04-09 12:38:43 +02:00
/// Errors returned by the minimal SQL parser.
2026-04-09 12:38:43 +02:00
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError {
UnexpectedEnd,
ExpectedToken(&'static str),
ExpectedIdentifier,
UnexpectedToken(String),
UnterminatedString,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnexpectedEnd => write!(f, "unexpected end of input"),
Self::ExpectedToken(token) => write!(f, "expected `{}`", token),
Self::ExpectedIdentifier => write!(f, "expected identifier"),
Self::UnexpectedToken(token) => write!(f, "unexpected token `{}`", token),
Self::UnterminatedString => write!(f, "unterminated string literal"),
}
}
}
impl Error for ParseError {}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Token {
Select,
From,
Where,
As,
And,
2026-04-10 10:10:46 +02:00
Order,
By,
Asc,
Desc,
2026-04-09 12:38:43 +02:00
Null,
Identifier(String),
String(String),
Star,
Comma,
Eq,
}
/// Parse a `SELECT-FROM-WHERE` query in the current SQL subset.
2026-04-09 12:38:43 +02:00
pub fn parse_select(input: &str) -> Result<Select, ParseError> {
let tokens = tokenize(input)?;
let mut parser = Parser::new(tokens);
parser.parse_select()
}
struct Parser {
tokens: Vec<Token>,
index: usize,
}
impl Parser {
fn new(tokens: Vec<Token>) -> Self {
Self { tokens, index: 0 }
}
fn parse_select(&mut self) -> Result<Select, ParseError> {
self.expect_keyword(Token::Select, "SELECT")?;
let projection = self.parse_projection()?;
self.expect_keyword(Token::From, "FROM")?;
let from = self.parse_from_list()?;
2026-04-09 12:38:43 +02:00
let selection = if self.peek() == Some(&Token::Where) {
self.index += 1;
Some(self.parse_expr()?)
} else {
None
};
2026-04-10 10:10:46 +02:00
let order_by = if self.peek() == Some(&Token::Order) {
self.index += 1;
self.expect_keyword(Token::By, "BY")?;
self.parse_order_by()?
} else {
Vec::new()
};
2026-04-09 12:38:43 +02:00
if let Some(token) = self.peek() {
return Err(ParseError::UnexpectedToken(render_token(token)));
}
Ok(Select {
projection,
from,
selection,
2026-04-10 10:10:46 +02:00
order_by,
2026-04-09 12:38:43 +02:00
})
}
fn parse_projection(&mut self) -> Result<Vec<SelectItem>, ParseError> {
let mut items = Vec::new();
loop {
let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? {
Token::Star => {
self.index += 1;
SelectItem::Wildcard
}
_ => {
let expr = self.parse_operand()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
SelectItem::Expr { expr, alias }
}
2026-04-09 12:38:43 +02:00
};
items.push(item);
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
}
break;
}
Ok(items)
}
2026-04-10 09:56:18 +02:00
fn parse_from_list(&mut self) -> Result<Vec<TableRef>, ParseError> {
let mut tables = Vec::new();
loop {
2026-04-10 09:56:18 +02:00
let name = self.expect_identifier()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
tables.push(TableRef { name, alias });
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
}
break;
}
Ok(tables)
}
2026-04-09 12:38:43 +02:00
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
let mut expr = self.parse_equality()?;
while self.peek() == Some(&Token::And) {
self.index += 1;
let right = self.parse_equality()?;
expr = Expr::Binary {
left: Box::new(expr),
op: BinaryOp::And,
right: Box::new(right),
};
}
Ok(expr)
}
2026-04-10 10:10:46 +02:00
fn parse_order_by(&mut self) -> Result<Vec<OrderByItem>, ParseError> {
let mut items = Vec::new();
loop {
let expr = self.parse_operand()?;
let direction = match self.peek() {
Some(Token::Asc) => {
self.index += 1;
SortDirection::Asc
}
Some(Token::Desc) => {
self.index += 1;
SortDirection::Desc
}
_ => SortDirection::Asc,
};
items.push(OrderByItem { expr, direction });
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
}
break;
}
Ok(items)
}
fn parse_equality(&mut self) -> Result<Expr, ParseError> {
2026-04-09 12:38:43 +02:00
let left = self.parse_operand()?;
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Eq => {
let right = self.parse_operand()?;
Ok(Expr::Binary {
left: Box::new(left),
op: BinaryOp::Eq,
right: Box::new(right),
})
}
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn parse_operand(&mut self) -> Result<Expr, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(Expr::Identifier(name)),
Token::String(value) => Ok(Expr::Literal(Literal::String(value))),
Token::Null => Ok(Expr::Literal(Literal::Null)),
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn expect_keyword(&mut self, token: Token, label: &'static str) -> Result<(), ParseError> {
let next = self.next().ok_or(ParseError::UnexpectedEnd)?;
if next == token {
Ok(())
} else {
Err(ParseError::ExpectedToken(label))
}
}
fn expect_identifier(&mut self) -> Result<String, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(name),
_ => Err(ParseError::ExpectedIdentifier),
}
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.index)
}
fn next(&mut self) -> Option<Token> {
let token = self.tokens.get(self.index).cloned();
if token.is_some() {
self.index += 1;
}
token
}
}
fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
let mut chars = input.chars().peekable();
let mut tokens = Vec::new();
while let Some(ch) = chars.peek().copied() {
if ch.is_whitespace() {
chars.next();
continue;
}
match ch {
'*' => {
chars.next();
tokens.push(Token::Star);
}
',' => {
chars.next();
tokens.push(Token::Comma);
}
'=' => {
chars.next();
tokens.push(Token::Eq);
}
'\'' => tokens.push(Token::String(parse_string(&mut chars)?)),
ch if is_identifier_start(ch) => {
let ident = parse_identifier(&mut chars);
let token = match ident.to_ascii_uppercase().as_str() {
"SELECT" => Token::Select,
"FROM" => Token::From,
"WHERE" => Token::Where,
"AS" => Token::As,
"AND" => Token::And,
2026-04-10 10:10:46 +02:00
"ORDER" => Token::Order,
"BY" => Token::By,
"ASC" => Token::Asc,
"DESC" => Token::Desc,
2026-04-09 12:38:43 +02:00
"NULL" => Token::Null,
_ => Token::Identifier(ident),
};
tokens.push(token);
}
other => return Err(ParseError::UnexpectedToken(other.to_string())),
}
}
Ok(tokens)
}
fn parse_string<I>(chars: &mut std::iter::Peekable<I>) -> Result<String, ParseError>
where
I: Iterator<Item = char>,
{
let mut value = String::new();
let quote = chars.next();
if quote != Some('\'') {
return Err(ParseError::ExpectedToken("'"));
}
while let Some(ch) = chars.next() {
if ch == '\'' {
if chars.peek() == Some(&'\'') {
chars.next();
value.push('\'');
continue;
}
return Ok(value);
}
value.push(ch);
}
Err(ParseError::UnterminatedString)
}
fn parse_identifier<I>(chars: &mut std::iter::Peekable<I>) -> String
where
I: Iterator<Item = char>,
{
let mut ident = String::new();
while let Some(ch) = chars.peek().copied() {
if is_identifier_part(ch) {
ident.push(ch);
chars.next();
} else {
break;
}
}
ident
}
fn is_identifier_start(ch: char) -> bool {
ch.is_ascii_alphabetic() || ch == '_'
}
fn is_identifier_part(ch: char) -> bool {
ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.')
2026-04-09 12:38:43 +02:00
}
fn render_token(token: &Token) -> String {
match token {
Token::Select => "SELECT".to_string(),
Token::From => "FROM".to_string(),
Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(),
Token::And => "AND".to_string(),
2026-04-10 10:10:46 +02:00
Token::Order => "ORDER".to_string(),
Token::By => "BY".to_string(),
Token::Asc => "ASC".to_string(),
Token::Desc => "DESC".to_string(),
2026-04-09 12:38:43 +02:00
Token::Null => "NULL".to_string(),
Token::Identifier(name) => name.clone(),
Token::String(value) => format!("'{}'", value),
Token::Star => "*".to_string(),
Token::Comma => ",".to_string(),
Token::Eq => "=".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_select_with_filter() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
2026-04-10 09:56:18 +02:00
assert_eq!(
select.from,
vec![TableRef {
name: "Parent".to_string(),
alias: None,
}]
);
2026-04-09 12:38:43 +02:00
assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some());
2026-04-10 10:10:46 +02:00
assert!(select.order_by.is_empty());
2026-04-09 12:38:43 +02:00
}
#[test]
fn parses_projection_aliases_and_literals() {
let select =
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
assert_eq!(select.projection.len(), 3);
assert_eq!(
select.projection[0],
SelectItem::Expr {
expr: Expr::Identifier("c0".to_string()),
alias: Some("parent_name".to_string()),
}
);
assert_eq!(
select.projection[1],
SelectItem::Expr {
expr: Expr::Literal(Literal::String("seed".to_string())),
alias: Some("label".to_string()),
}
);
assert_eq!(
select.projection[2],
SelectItem::Expr {
expr: Expr::Literal(Literal::Null),
alias: None,
}
);
}
#[test]
fn parses_multi_table_select_with_qualified_columns() {
let select = parse_select(
"SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
WHERE Parent.child = Ancestor.parent",
)
.unwrap();
assert_eq!(
select.from,
2026-04-10 09:56:18 +02:00
vec![
TableRef {
name: "Parent".to_string(),
alias: None,
},
TableRef {
name: "Ancestor".to_string(),
alias: None,
}
]
);
assert_eq!(
select.projection[0],
SelectItem::Expr {
expr: Expr::Identifier("Parent.parent".to_string()),
alias: None,
}
);
}
2026-04-10 09:56:18 +02:00
#[test]
fn parses_table_aliases() {
let select = parse_select(
"SELECT p.parent, a.child FROM Parent AS p, Ancestor AS a \
WHERE p.child = a.parent",
)
.unwrap();
assert_eq!(
select.from,
vec![
TableRef {
name: "Parent".to_string(),
alias: Some("p".to_string()),
},
TableRef {
name: "Ancestor".to_string(),
alias: Some("a".to_string()),
}
]
);
}
#[test]
fn parses_conjunctive_where_clause() {
let select =
parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'").unwrap();
assert_eq!(
select.selection,
Some(Expr::Binary {
left: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c1".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("bob".to_string()))),
}),
op: BinaryOp::And,
right: Box::new(Expr::Binary {
left: Box::new(Expr::Identifier("c0".to_string())),
op: BinaryOp::Eq,
right: Box::new(Expr::Literal(Literal::String("alice".to_string()))),
}),
})
);
}
2026-04-10 10:10:46 +02:00
#[test]
fn parses_order_by_clause() {
let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC, c1 ASC").unwrap();
assert_eq!(
select.order_by,
vec![
OrderByItem {
expr: Expr::Identifier("c0".to_string()),
direction: SortDirection::Desc,
},
OrderByItem {
expr: Expr::Identifier("c1".to_string()),
direction: SortDirection::Asc,
},
]
);
}
2026-04-09 12:38:43 +02:00
}