diff --git a/ROADMAP.md b/ROADMAP.md index 675d265..d9ccc6e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -28,7 +28,7 @@ This document tracks the current state and next steps for the repository. - [x] Minimal SQL AST and parser - [x] Logical plan scaffolding - [x] Logical-plan execution for the first SQL slice -- [x] `SELECT-FROM-WHERE` support with positional or named columns +- [x] `SELECT-FROM-WHERE-ORDER BY` support with positional or named columns - [x] Basic multi-table SQL joins via qualified-column filtering - [x] Table aliases for self-joins and qualified references - [x] Basic `ORDER BY` support over output columns diff --git a/src/frontend/language.rs b/src/frontend/language.rs index 2f0a67a..6d3c9b8 100644 --- a/src/frontend/language.rs +++ b/src/frontend/language.rs @@ -22,6 +22,8 @@ pub enum Command { pub fn parse_script(input: &str) -> Result, String> { let mut commands = Vec::new(); + let mut pending = String::new(); + let mut start_line = 0usize; for (index, raw_line) in input.lines().enumerate() { let line = raw_line.trim(); @@ -29,7 +31,26 @@ pub fn parse_script(input: &str) -> Result, String> { continue; } - let command = parse_command(line).map_err(|err| format!("line {}: {}", index + 1, err))?; + if pending.is_empty() { + start_line = index + 1; + } else { + pending.push(' '); + } + pending.push_str(line); + + if !command_is_complete(&pending) { + continue; + } + + let command = + parse_command(&pending).map_err(|err| format!("line {}: {}", start_line, err))?; + commands.push(command); + pending.clear(); + } + + if !pending.is_empty() { + let command = + parse_command(&pending).map_err(|err| format!("line {}: {}", start_line, err))?; commands.push(command); } @@ -62,11 +83,15 @@ pub fn parse_command(input: &str) -> Result { if let Some(rest) = strip_keyword(trimmed, "schema") { let atom = parse_atom(trim_suffix(rest, '.')?)?; + validate_identifier(&atom.predicate, "schema table")?; let columns = atom .terms .into_iter() .map(|term| match term { - Term::Constant(name) => Ok(name), + Term::Constant(name) => { + validate_identifier(&name, "schema column")?; + Ok(name) + } Term::Null(_) | Term::Variable(_) => { Err("schema columns must be constant identifiers".to_string()) } @@ -121,6 +146,18 @@ pub fn parse_command(input: &str) -> Result { Err("unknown command; try `help`".to_string()) } +fn command_is_complete(input: &str) -> bool { + let trimmed = input.trim(); + trimmed.ends_with('.') + || trimmed.ends_with(';') + || trimmed.ends_with('?') + || trimmed.eq_ignore_ascii_case("run") + || trimmed.eq_ignore_ascii_case("show facts") + || trimmed.eq_ignore_ascii_case("show rules") + || trimmed.eq_ignore_ascii_case("reset") + || trimmed.eq_ignore_ascii_case("help") +} + fn strip_keyword<'a>(input: &'a str, keyword: &str) -> Option<&'a str> { let prefix = input.get(..keyword.len())?; if !prefix.eq_ignore_ascii_case(keyword) { @@ -258,11 +295,20 @@ fn validate_identifier(value: &str, label: &str) -> Result<(), String> { return Err(format!("{} cannot be empty", label)); } - if value.chars().all(is_identifier_char) { - Ok(()) - } else { - Err(format!("invalid {} `{}`", label, value)) + let mut chars = value.chars(); + let Some(first) = chars.next() else { + return Err(format!("{} cannot be empty", label)); + }; + + if !is_identifier_start_char(first) || !chars.all(is_identifier_char) { + return Err(format!("invalid {} `{}`", label, value)); } + + Ok(()) +} + +fn is_identifier_start_char(ch: char) -> bool { + ch.is_ascii_alphanumeric() || ch == '_' } fn is_identifier_char(ch: char) -> bool { @@ -475,6 +521,32 @@ mod tests { } } + #[test] + fn parse_schema_command_rejects_non_identifier_columns() { + let error = parse_command(r#"schema Parent("given name", child)."#).unwrap_err(); + assert_eq!(error, "invalid schema column `given name`"); + } + + #[test] + fn parse_script_supports_multiline_sql() { + let script = "fact Parent(alice, bob).\n\ + fact Parent(bob, carol).\n\ + schema Parent(parent, child).\n\ + sql SELECT parent\n\ + FROM Parent\n\ + WHERE child = 'bob';"; + + let commands = parse_script(script).unwrap(); + assert_eq!(commands.len(), 4); + match &commands[3] { + Command::Sql(select) => { + assert_eq!(select.from.len(), 1); + assert_eq!(select.from[0].name, "Parent"); + } + other => panic!("unexpected command: {:?}", other), + } + } + #[test] fn parse_query_command() { let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap(); diff --git a/src/lib.rs b/src/lib.rs index a0e8f74..1f2ec94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,8 @@ //! The current codebase primarily contains a chase-based reasoning core plus //! lightweight frontends for experimenting with rule-driven query answering. //! It also contains an early relational and SQL scaffold for a narrow -//! single-table `SELECT-FROM-WHERE` slice. It is not yet a full SQL engine. +//! `SELECT-FROM-WHERE-ORDER BY` slice with basic joins, aliases, and +//! conjunctions. It is not yet a full SQL engine. pub mod catalog; pub mod chase; diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 84f6f62..cccfccb 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -5,8 +5,9 @@ //! - [`logical`]: plan and expression data structures //! - [`sql`]: translation from SQL AST into the current logical-plan subset //! -//! At the moment this is intentionally small and only covers the first -//! single-table SQL slice. +//! At the moment this is intentionally small and covers the current SQL slice: +//! filtering, ordering, projection, and basic joins over predicate-backed +//! tables. pub mod logical; pub mod sql; diff --git a/src/sql/ast.rs b/src/sql/ast.rs index 8fe150a..7bbae35 100644 --- a/src/sql/ast.rs +++ b/src/sql/ast.rs @@ -1,4 +1,4 @@ -/// A parsed `SELECT-FROM-WHERE` statement in the current SQL subset. +/// A parsed `SELECT-FROM-WHERE-ORDER BY` statement in the current SQL subset. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Select { /// Output expressions requested by the query. diff --git a/src/sql/mod.rs b/src/sql/mod.rs index 3e9f3f8..7ac342e 100644 --- a/src/sql/mod.rs +++ b/src/sql/mod.rs @@ -1,7 +1,8 @@ //! Minimal SQL front-end scaffolding. //! -//! The current SQL layer supports a narrow `SELECT-FROM-WHERE` subset over one -//! predicate-backed table. It provides: +//! The current SQL layer supports a narrow `SELECT-FROM-WHERE-ORDER BY` subset +//! over predicate-backed tables, including comma-join style multi-table +//! queries, table aliases, and `AND` in filter predicates. It provides: //! //! - a small AST in [`ast`] //! - a parser in [`parser`] diff --git a/src/sql/parser.rs b/src/sql/parser.rs index df32fca..c127dfa 100644 --- a/src/sql/parser.rs +++ b/src/sql/parser.rs @@ -48,7 +48,7 @@ enum Token { Eq, } -/// Parse a `SELECT-FROM-WHERE` query in the current SQL subset. +/// Parse a `SELECT-FROM-WHERE-ORDER BY` query in the current SQL subset. pub fn parse_select(input: &str) -> Result { let tokens = tokenize(input)?; let mut parser = Parser::new(tokens); @@ -337,11 +337,11 @@ where } fn is_identifier_start(ch: char) -> bool { - ch.is_ascii_alphabetic() || ch == '_' + ch.is_ascii_alphanumeric() || ch == '_' } fn is_identifier_part(ch: char) -> bool { - ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.') + ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | ':' | '.') } fn render_token(token: &Token) -> String { @@ -507,4 +507,12 @@ mod tests { ] ); } + + #[test] + fn parses_frontend_style_identifiers() { + let select = parse_select("SELECT * FROM Employee-Records:2025").unwrap(); + + assert_eq!(select.from.len(), 1); + assert_eq!(select.from[0].name, "Employee-Records:2025"); + } }