query-engine/src/sql/parser.rs

318 lines
8.8 KiB
Rust

use std::error::Error;
use std::fmt;
use super::ast::{BinaryOp, Expr, Literal, Select, SelectItem};
/// Errors returned by the minimal SQL parser.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError {
UnexpectedEnd,
ExpectedToken(&'static str),
ExpectedIdentifier,
UnexpectedToken(String),
UnterminatedString,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::UnexpectedEnd => write!(f, "unexpected end of input"),
Self::ExpectedToken(token) => write!(f, "expected `{}`", token),
Self::ExpectedIdentifier => write!(f, "expected identifier"),
Self::UnexpectedToken(token) => write!(f, "unexpected token `{}`", token),
Self::UnterminatedString => write!(f, "unterminated string literal"),
}
}
}
impl Error for ParseError {}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Token {
Select,
From,
Where,
As,
Null,
Identifier(String),
String(String),
Star,
Comma,
Eq,
}
/// Parse a `SELECT-FROM-WHERE` query in the current SQL subset.
pub fn parse_select(input: &str) -> Result<Select, ParseError> {
let tokens = tokenize(input)?;
let mut parser = Parser::new(tokens);
parser.parse_select()
}
struct Parser {
tokens: Vec<Token>,
index: usize,
}
impl Parser {
fn new(tokens: Vec<Token>) -> Self {
Self { tokens, index: 0 }
}
fn parse_select(&mut self) -> Result<Select, ParseError> {
self.expect_keyword(Token::Select, "SELECT")?;
let projection = self.parse_projection()?;
self.expect_keyword(Token::From, "FROM")?;
let from = self.expect_identifier()?;
let selection = if self.peek() == Some(&Token::Where) {
self.index += 1;
Some(self.parse_expr()?)
} else {
None
};
if let Some(token) = self.peek() {
return Err(ParseError::UnexpectedToken(render_token(token)));
}
Ok(Select {
projection,
from,
selection,
})
}
fn parse_projection(&mut self) -> Result<Vec<SelectItem>, ParseError> {
let mut items = Vec::new();
loop {
let item = match self.peek().ok_or(ParseError::UnexpectedEnd)? {
Token::Star => {
self.index += 1;
SelectItem::Wildcard
}
_ => {
let expr = self.parse_operand()?;
let alias = if self.peek() == Some(&Token::As) {
self.index += 1;
Some(self.expect_identifier()?)
} else {
None
};
SelectItem::Expr { expr, alias }
}
};
items.push(item);
if self.peek() == Some(&Token::Comma) {
self.index += 1;
continue;
}
break;
}
Ok(items)
}
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
let left = self.parse_operand()?;
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Eq => {
let right = self.parse_operand()?;
Ok(Expr::Binary {
left: Box::new(left),
op: BinaryOp::Eq,
right: Box::new(right),
})
}
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn parse_operand(&mut self) -> Result<Expr, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(Expr::Identifier(name)),
Token::String(value) => Ok(Expr::Literal(Literal::String(value))),
Token::Null => Ok(Expr::Literal(Literal::Null)),
other => Err(ParseError::UnexpectedToken(render_token(&other))),
}
}
fn expect_keyword(&mut self, token: Token, label: &'static str) -> Result<(), ParseError> {
let next = self.next().ok_or(ParseError::UnexpectedEnd)?;
if next == token {
Ok(())
} else {
Err(ParseError::ExpectedToken(label))
}
}
fn expect_identifier(&mut self) -> Result<String, ParseError> {
match self.next().ok_or(ParseError::UnexpectedEnd)? {
Token::Identifier(name) => Ok(name),
_ => Err(ParseError::ExpectedIdentifier),
}
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.index)
}
fn next(&mut self) -> Option<Token> {
let token = self.tokens.get(self.index).cloned();
if token.is_some() {
self.index += 1;
}
token
}
}
fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
let mut chars = input.chars().peekable();
let mut tokens = Vec::new();
while let Some(ch) = chars.peek().copied() {
if ch.is_whitespace() {
chars.next();
continue;
}
match ch {
'*' => {
chars.next();
tokens.push(Token::Star);
}
',' => {
chars.next();
tokens.push(Token::Comma);
}
'=' => {
chars.next();
tokens.push(Token::Eq);
}
'\'' => tokens.push(Token::String(parse_string(&mut chars)?)),
ch if is_identifier_start(ch) => {
let ident = parse_identifier(&mut chars);
let token = match ident.to_ascii_uppercase().as_str() {
"SELECT" => Token::Select,
"FROM" => Token::From,
"WHERE" => Token::Where,
"AS" => Token::As,
"NULL" => Token::Null,
_ => Token::Identifier(ident),
};
tokens.push(token);
}
other => return Err(ParseError::UnexpectedToken(other.to_string())),
}
}
Ok(tokens)
}
fn parse_string<I>(chars: &mut std::iter::Peekable<I>) -> Result<String, ParseError>
where
I: Iterator<Item = char>,
{
let mut value = String::new();
let quote = chars.next();
if quote != Some('\'') {
return Err(ParseError::ExpectedToken("'"));
}
while let Some(ch) = chars.next() {
if ch == '\'' {
if chars.peek() == Some(&'\'') {
chars.next();
value.push('\'');
continue;
}
return Ok(value);
}
value.push(ch);
}
Err(ParseError::UnterminatedString)
}
fn parse_identifier<I>(chars: &mut std::iter::Peekable<I>) -> String
where
I: Iterator<Item = char>,
{
let mut ident = String::new();
while let Some(ch) = chars.peek().copied() {
if is_identifier_part(ch) {
ident.push(ch);
chars.next();
} else {
break;
}
}
ident
}
fn is_identifier_start(ch: char) -> bool {
ch.is_ascii_alphabetic() || ch == '_'
}
fn is_identifier_part(ch: char) -> bool {
ch.is_ascii_alphanumeric() || ch == '_'
}
fn render_token(token: &Token) -> String {
match token {
Token::Select => "SELECT".to_string(),
Token::From => "FROM".to_string(),
Token::Where => "WHERE".to_string(),
Token::As => "AS".to_string(),
Token::Null => "NULL".to_string(),
Token::Identifier(name) => name.clone(),
Token::String(value) => format!("'{}'", value),
Token::Star => "*".to_string(),
Token::Comma => ",".to_string(),
Token::Eq => "=".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_select_with_filter() {
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
assert_eq!(select.from, "Parent");
assert_eq!(select.projection.len(), 1);
assert!(select.selection.is_some());
}
#[test]
fn parses_projection_aliases_and_literals() {
let select =
parse_select("SELECT c0 AS parent_name, 'seed' AS label, NULL FROM Parent").unwrap();
assert_eq!(select.projection.len(), 3);
assert_eq!(
select.projection[0],
SelectItem::Expr {
expr: Expr::Identifier("c0".to_string()),
alias: Some("parent_name".to_string()),
}
);
assert_eq!(
select.projection[1],
SelectItem::Expr {
expr: Expr::Literal(Literal::String("seed".to_string())),
alias: Some("label".to_string()),
}
);
assert_eq!(
select.projection[2],
SelectItem::Expr {
expr: Expr::Literal(Literal::Null),
alias: None,
}
);
}
}