use std::error::Error; use std::fmt; use super::ast::{ BinaryOp, Expr, Literal, OrderByItem, Select, SelectItem, SortDirection, TableRef, }; /// Errors returned by the minimal SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParseError { UnexpectedEnd, ExpectedToken(&'static str), ExpectedIdentifier, UnexpectedToken(String), UnterminatedString, } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::UnexpectedEnd => write!(f, "unexpected end of input"), Self::ExpectedToken(token) => write!(f, "expected `{}`", token), Self::ExpectedIdentifier => write!(f, "expected identifier"), Self::UnexpectedToken(token) => write!(f, "unexpected token `{}`", token), Self::UnterminatedString => write!(f, "unterminated string literal"), } } } impl Error for ParseError {} #[derive(Debug, Clone, PartialEq, Eq)] enum Token { Select, From, Where, As, And, Order, By, Asc, Desc, Null, Identifier(String), String(String), Star, Comma, Eq, } /// Parse a `SELECT-FROM-WHERE` query in the current SQL subset. pub fn parse_select(input: &str) -> Result { let tokens = tokenize(input)?; let mut parser = Parser::new(tokens); parser.parse_select() } struct Parser { tokens: Vec, index: usize, } impl Parser { fn new(tokens: Vec) -> Self { Self { tokens, index: 0 } } fn parse_select(&mut self) -> Result { self.expect_keyword(Token::Select, "SELECT")?; let projection = self.parse_projection()?; self.expect_keyword(Token::From, "FROM")?; let from = self.parse_from_list()?; let selection = if self.peek() == Some(&Token::Where) { self.index += 1; Some(self.parse_expr()?) } else { None }; let order_by = if self.peek() == Some(&Token::Order) { self.index += 1; self.expect_keyword(Token::By, "BY")?; self.parse_order_by()? } else { Vec::new() }; if let Some(token) = self.peek() { return Err(ParseError::UnexpectedToken(render_token(token))); } Ok(Select { projection, from, selection, order_by, }) } fn parse_projection(&mut self) -> Result, ParseError> { let mut items = Vec::new(); loop { let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? { Token::Star => { self.index += 1; SelectItem::Wildcard } _ => { let expr = self.parse_operand()?; let alias = if self.peek() == Some(&Token::As) { self.index += 1; Some(self.expect_identifier()?) } else { None }; SelectItem::Expr { expr, alias } } }; items.push(item); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; } break; } Ok(items) } fn parse_from_list(&mut self) -> Result, ParseError> { let mut tables = Vec::new(); loop { let name = self.expect_identifier()?; let alias = if self.peek() == Some(&Token::As) { self.index += 1; Some(self.expect_identifier()?) } else { None }; tables.push(TableRef { name, alias }); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; } break; } Ok(tables) } fn parse_expr(&mut self) -> Result { let mut expr = self.parse_equality()?; while self.peek() == Some(&Token::And) { self.index += 1; let right = self.parse_equality()?; expr = Expr::Binary { left: Box::new(expr), op: BinaryOp::And, right: Box::new(right), }; } Ok(expr) } fn parse_order_by(&mut self) -> Result, ParseError> { let mut items = Vec::new(); loop { let expr = self.parse_operand()?; let direction = match self.peek() { Some(Token::Asc) => { self.index += 1; SortDirection::Asc } Some(Token::Desc) => { self.index += 1; SortDirection::Desc } _ => SortDirection::Asc, }; items.push(OrderByItem { expr, direction }); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; } break; } Ok(items) } fn parse_equality(&mut self) -> Result { let left = self.parse_operand()?; match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Eq => { let right = self.parse_operand()?; Ok(Expr::Binary { left: Box::new(left), op: BinaryOp::Eq, right: Box::new(right), }) } other => Err(ParseError::UnexpectedToken(render_token(&other))), } } fn parse_operand(&mut self) -> Result { match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Identifier(name) => Ok(Expr::Identifier(name)), Token::String(value) => Ok(Expr::Literal(Literal::String(value))), Token::Null => Ok(Expr::Literal(Literal::Null)), other => Err(ParseError::UnexpectedToken(render_token(&other))), } } fn expect_keyword(&mut self, token: Token, label: &'static str) -> Result<(), ParseError> { let next = self.next().ok_or(ParseError::UnexpectedEnd)?; if next == token { Ok(()) } else { Err(ParseError::ExpectedToken(label)) } } fn expect_identifier(&mut self) -> Result { match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Identifier(name) => Ok(name), _ => Err(ParseError::ExpectedIdentifier), } } fn peek(&self) -> Option<&Token> { self.tokens.get(self.index) } fn next(&mut self) -> Option { let token = self.tokens.get(self.index).cloned(); if token.is_some() { self.index += 1; } token } } fn tokenize(input: &str) -> Result, ParseError> { let mut chars = input.chars().peekable(); let mut tokens = Vec::new(); while let Some(ch) = chars.peek().copied() { if ch.is_whitespace() { chars.next(); continue; } match ch { '*' => { chars.next(); tokens.push(Token::Star); } ',' => { chars.next(); tokens.push(Token::Comma); } '=' => { chars.next(); tokens.push(Token::Eq); } '\'' => tokens.push(Token::String(parse_string(&mut chars)?)), ch if is_identifier_start(ch) => { let ident = parse_identifier(&mut chars); let token = match ident.to_ascii_uppercase().as_str() { "SELECT" => Token::Select, "FROM" => Token::From, "WHERE" => Token::Where, "AS" => Token::As, "AND" => Token::And, "ORDER" => Token::Order, "BY" => Token::By, "ASC" => Token::Asc, "DESC" => Token::Desc, "NULL" => Token::Null, _ => Token::Identifier(ident), }; tokens.push(token); } other => return Err(ParseError::UnexpectedToken(other.to_string())), } } Ok(tokens) } fn parse_string(chars: &mut std::iter::Peekable) -> Result where I: Iterator, { let mut value = String::new(); let quote = chars.next(); if quote != Some('\'') { return Err(ParseError::ExpectedToken("'")); } while let Some(ch) = chars.next() { if ch == '\'' { if chars.peek() == Some(&'\'') { chars.next(); value.push('\''); continue; } return Ok(value); } value.push(ch); } Err(ParseError::UnterminatedString) } fn parse_identifier(chars: &mut std::iter::Peekable) -> String where I: Iterator, { let mut ident = String::new(); while let Some(ch) = chars.peek().copied() { if is_identifier_part(ch) { ident.push(ch); chars.next(); } else { break; } } ident } fn is_identifier_start(ch: char) -> bool { ch.is_ascii_alphabetic() || ch == '_' } fn is_identifier_part(ch: char) -> bool { ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.') } fn render_token(token: &Token) -> String { match token { Token::Select => "SELECT".to_string(), Token::From => "FROM".to_string(), Token::Where => "WHERE".to_string(), Token::As => "AS".to_string(), Token::And => "AND".to_string(), Token::Order => "ORDER".to_string(), Token::By => "BY".to_string(), Token::Asc => "ASC".to_string(), Token::Desc => "DESC".to_string(), Token::Null => "NULL".to_string(), Token::Identifier(name) => name.clone(), Token::String(value) => format!("'{}'", value), Token::Star => "*".to_string(), Token::Comma => ",".to_string(), Token::Eq => "=".to_string(), } } #[cfg(test)] mod tests { use super::*; #[test] fn parses_select_with_filter() { let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap(); assert_eq!( select.from, vec![TableRef { name: "Parent".to_string(), alias: None, }] ); assert_eq!(select.projection.len(), 1); assert!(select.selection.is_some()); assert!(select.order_by.is_empty()); } #[test] fn parses_projection_aliases_and_literals() { let select = parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap(); assert_eq!(select.projection.len(), 3); assert_eq!( select.projection[0], SelectItem::Expr { expr: Expr::Identifier("c0".to_string()), alias: Some("parent_name".to_string()), } ); assert_eq!( select.projection[1], SelectItem::Expr { expr: Expr::Literal(Literal::String("seed".to_string())), alias: Some("label".to_string()), } ); assert_eq!( select.projection[2], SelectItem::Expr { expr: Expr::Literal(Literal::Null), alias: None, } ); } #[test] fn parses_multi_table_select_with_qualified_columns() { let select = parse_select( "SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \ WHERE Parent.child = Ancestor.parent", ) .unwrap(); assert_eq!( select.from, vec![ TableRef { name: "Parent".to_string(), alias: None, }, TableRef { name: "Ancestor".to_string(), alias: None, } ] ); assert_eq!( select.projection[0], SelectItem::Expr { expr: Expr::Identifier("Parent.parent".to_string()), alias: None, } ); } #[test] fn parses_table_aliases() { let select = parse_select( "SELECT p.parent, a.child FROM Parent AS p, Ancestor AS a \ WHERE p.child = a.parent", ) .unwrap(); assert_eq!( select.from, vec![ TableRef { name: "Parent".to_string(), alias: Some("p".to_string()), }, TableRef { name: "Ancestor".to_string(), alias: Some("a".to_string()), } ] ); } #[test] fn parses_conjunctive_where_clause() { let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob' AND c0 = 'alice'").unwrap(); assert_eq!( select.selection, Some(Expr::Binary { left: Box::new(Expr::Binary { left: Box::new(Expr::Identifier("c1".to_string())), op: BinaryOp::Eq, right: Box::new(Expr::Literal(Literal::String("bob".to_string()))), }), op: BinaryOp::And, right: Box::new(Expr::Binary { left: Box::new(Expr::Identifier("c0".to_string())), op: BinaryOp::Eq, right: Box::new(Expr::Literal(Literal::String("alice".to_string()))), }), }) ); } #[test] fn parses_order_by_clause() { let select = parse_select("SELECT c0 FROM Parent ORDER BY c0 DESC, c1 ASC").unwrap(); assert_eq!( select.order_by, vec![ OrderByItem { expr: Expr::Identifier("c0".to_string()), direction: SortDirection::Desc, }, OrderByItem { expr: Expr::Identifier("c1".to_string()), direction: SortDirection::Asc, }, ] ); } }