use std::error::Error; use std::fmt; use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem}; /// Errors returned by the minimal SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParseError { UnexpectedEnd, ExpectedToken(&'static str), ExpectedIdentifier, UnexpectedToken(String), UnterminatedString, } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::UnexpectedEnd => write!(f, "unexpected end of input"), Self::ExpectedToken(token) => write!(f, "expected `{}`", token), Self::ExpectedIdentifier => write!(f, "expected identifier"), Self::UnexpectedToken(token) => write!(f, "unexpected token `{}`", token), Self::UnterminatedString => write!(f, "unterminated string literal"), } } } impl Error for ParseError {} #[derive(Debug, Clone, PartialEq, Eq)] enum Token { Select, From, Where, As, Null, Identifier(String), String(String), Star, Comma, Eq, } /// Parse a `SELECT-FROM-WHERE` query in the current SQL subset. pub fn parse_select(input: &str) -> Result { let tokens = tokenize(input)?; let mut parser = Parser::new(tokens); parser.parse_select() } struct Parser { tokens: Vec, index: usize, } impl Parser { fn new(tokens: Vec) -> Self { Self { tokens, index: 0 } } fn parse_select(&mut self) -> Result { self.expect_keyword(Token::Select, "SELECT")?; let projection = self.parse_projection()?; self.expect_keyword(Token::From, "FROM")?; let from = self.parse_from_list()?; let selection = if self.peek() == Some(&Token::Where) { self.index += 1; Some(self.parse_expr()?) } else { None }; if let Some(token) = self.peek() { return Err(ParseError::UnexpectedToken(render_token(token))); } Ok(Select { projection, from, selection, }) } fn parse_projection(&mut self) -> Result, ParseError> { let mut items = Vec::new(); loop { let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? { Token::Star => { self.index += 1; SelectItem::Wildcard } _ => { let expr = self.parse_operand()?; let alias = if self.peek() == Some(&Token::As) { self.index += 1; Some(self.expect_identifier()?) } else { None }; SelectItem::Expr { expr, alias } } }; items.push(item); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; } break; } Ok(items) } fn parse_from_list(&mut self) -> Result, ParseError> { let mut tables = Vec::new(); loop { tables.push(self.expect_identifier()?); if self.peek() == Some(&Token::Comma) { self.index += 1; continue; } break; } Ok(tables) } fn parse_expr(&mut self) -> Result { let left = self.parse_operand()?; match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Eq => { let right = self.parse_operand()?; Ok(Expr::Binary { left: Box::new(left), op: BinaryOp::Eq, right: Box::new(right), }) } other => Err(ParseError::UnexpectedToken(render_token(&other))), } } fn parse_operand(&mut self) -> Result { match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Identifier(name) => Ok(Expr::Identifier(name)), Token::String(value) => Ok(Expr::Literal(Literal::String(value))), Token::Null => Ok(Expr::Literal(Literal::Null)), other => Err(ParseError::UnexpectedToken(render_token(&other))), } } fn expect_keyword(&mut self, token: Token, label: &'static str) -> Result<(), ParseError> { let next = self.next().ok_or(ParseError::UnexpectedEnd)?; if next == token { Ok(()) } else { Err(ParseError::ExpectedToken(label)) } } fn expect_identifier(&mut self) -> Result { match self.next().ok_or(ParseError::UnexpectedEnd)? { Token::Identifier(name) => Ok(name), _ => Err(ParseError::ExpectedIdentifier), } } fn peek(&self) -> Option<&Token> { self.tokens.get(self.index) } fn next(&mut self) -> Option { let token = self.tokens.get(self.index).cloned(); if token.is_some() { self.index += 1; } token } } fn tokenize(input: &str) -> Result, ParseError> { let mut chars = input.chars().peekable(); let mut tokens = Vec::new(); while let Some(ch) = chars.peek().copied() { if ch.is_whitespace() { chars.next(); continue; } match ch { '*' => { chars.next(); tokens.push(Token::Star); } ',' => { chars.next(); tokens.push(Token::Comma); } '=' => { chars.next(); tokens.push(Token::Eq); } '\'' => tokens.push(Token::String(parse_string(&mut chars)?)), ch if is_identifier_start(ch) => { let ident = parse_identifier(&mut chars); let token = match ident.to_ascii_uppercase().as_str() { "SELECT" => Token::Select, "FROM" => Token::From, "WHERE" => Token::Where, "AS" => Token::As, "NULL" => Token::Null, _ => Token::Identifier(ident), }; tokens.push(token); } other => return Err(ParseError::UnexpectedToken(other.to_string())), } } Ok(tokens) } fn parse_string(chars: &mut std::iter::Peekable) -> Result where I: Iterator, { let mut value = String::new(); let quote = chars.next(); if quote != Some('\'') { return Err(ParseError::ExpectedToken("'")); } while let Some(ch) = chars.next() { if ch == '\'' { if chars.peek() == Some(&'\'') { chars.next(); value.push('\''); continue; } return Ok(value); } value.push(ch); } Err(ParseError::UnterminatedString) } fn parse_identifier(chars: &mut std::iter::Peekable) -> String where I: Iterator, { let mut ident = String::new(); while let Some(ch) = chars.peek().copied() { if is_identifier_part(ch) { ident.push(ch); chars.next(); } else { break; } } ident } fn is_identifier_start(ch: char) -> bool { ch.is_ascii_alphabetic() || ch == '_' } fn is_identifier_part(ch: char) -> bool { ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.') } fn render_token(token: &Token) -> String { match token { Token::Select => "SELECT".to_string(), Token::From => "FROM".to_string(), Token::Where => "WHERE".to_string(), Token::As => "AS".to_string(), Token::Null => "NULL".to_string(), Token::Identifier(name) => name.clone(), Token::String(value) => format!("'{}'", value), Token::Star => "*".to_string(), Token::Comma => ",".to_string(), Token::Eq => "=".to_string(), } } #[cfg(test)] mod tests { use super::*; #[test] fn parses_select_with_filter() { let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap(); assert_eq!(select.from, vec!["Parent".to_string()]); assert_eq!(select.projection.len(), 1); assert!(select.selection.is_some()); } #[test] fn parses_projection_aliases_and_literals() { let select = parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap(); assert_eq!(select.projection.len(), 3); assert_eq!( select.projection[0], SelectItem::Expr { expr: Expr::Identifier("c0".to_string()), alias: Some("parent_name".to_string()), } ); assert_eq!( select.projection[1], SelectItem::Expr { expr: Expr::Literal(Literal::String("seed".to_string())), alias: Some("label".to_string()), } ); assert_eq!( select.projection[2], SelectItem::Expr { expr: Expr::Literal(Literal::Null), alias: None, } ); } #[test] fn parses_multi_table_select_with_qualified_columns() { let select = parse_select( "SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \ WHERE Parent.child = Ancestor.parent", ) .unwrap(); assert_eq!( select.from, vec!["Parent".to_string(), "Ancestor".to_string()] ); assert_eq!( select.projection[0], SelectItem::Expr { expr: Expr::Identifier("Parent.parent".to_string()), alias: None, } ); } }