Add basic multi-table SQL joins with qualified columns
This commit is contained in:
parent
23cbc6a4bf
commit
4535d4a918
32
README.md
32
README.md
@ -14,7 +14,7 @@ execution boundaries.
|
|||||||
- Provenance-oriented explanations for derived answers
|
- Provenance-oriented explanations for derived answers
|
||||||
- Script, REPL, and local web UI for experimentation
|
- Script, REPL, and local web UI for experimentation
|
||||||
- Relational schema, catalog, logical-plan, and execution scaffolding
|
- Relational schema, catalog, logical-plan, and execution scaffolding
|
||||||
- A minimal SQL slice for single-table `SELECT-FROM-WHERE` queries
|
- A minimal SQL slice for `SELECT-FROM-WHERE` queries over predicate-backed tables
|
||||||
|
|
||||||
### Architecture
|
### Architecture
|
||||||
|
|
||||||
@ -91,6 +91,7 @@ cargo run -- script examples/scripts/ancestor.chase
|
|||||||
```text
|
```text
|
||||||
fact Parent(alice, bob).
|
fact Parent(alice, bob).
|
||||||
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
|
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
|
||||||
|
schema Parent(parent, child).
|
||||||
sql SELECT * FROM Parent;
|
sql SELECT * FROM Parent;
|
||||||
run.
|
run.
|
||||||
query Ancestor(?X, ?Y)?
|
query Ancestor(?X, ?Y)?
|
||||||
@ -109,7 +110,7 @@ The repository now has a narrow SQL pipeline with:
|
|||||||
- relational schemas, rows, and values
|
- relational schemas, rows, and values
|
||||||
- SQL parsing for a small subset
|
- SQL parsing for a small subset
|
||||||
- logical planning
|
- logical planning
|
||||||
- execution for single-table queries
|
- execution for single-table queries and basic multi-table joins
|
||||||
|
|
||||||
Currently supported examples:
|
Currently supported examples:
|
||||||
|
|
||||||
@ -118,6 +119,9 @@ SELECT * FROM Parent
|
|||||||
SELECT c0 FROM Parent
|
SELECT c0 FROM Parent
|
||||||
SELECT c0 FROM Parent WHERE c1 = 'bob'
|
SELECT c0 FROM Parent WHERE c1 = 'bob'
|
||||||
SELECT c0 AS parent_name, 'seed' AS label FROM Parent
|
SELECT c0 AS parent_name, 'seed' AS label FROM Parent
|
||||||
|
SELECT Parent.parent, Ancestor.child
|
||||||
|
FROM Parent, Ancestor
|
||||||
|
WHERE Parent.child = Ancestor.parent
|
||||||
```
|
```
|
||||||
|
|
||||||
In the REPL or script runner, use the `sql` command and end the statement with
|
In the REPL or script runner, use the `sql` command and end the statement with
|
||||||
@ -127,11 +131,29 @@ In the REPL or script runner, use the `sql` command and end the statement with
|
|||||||
sql SELECT c0 FROM Parent WHERE c1 = 'bob';
|
sql SELECT c0 FROM Parent WHERE c1 = 'bob';
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also register stable column names for a predicate-backed table in the
|
||||||
|
frontend before running SQL:
|
||||||
|
|
||||||
|
```text
|
||||||
|
schema Parent(parent, child).
|
||||||
|
sql SELECT parent FROM Parent WHERE child = 'bob';
|
||||||
|
```
|
||||||
|
|
||||||
|
For multi-table queries, qualify column names with the table name:
|
||||||
|
|
||||||
|
```text
|
||||||
|
schema Parent(parent, child).
|
||||||
|
schema Ancestor(parent, child).
|
||||||
|
sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent;
|
||||||
|
```
|
||||||
|
|
||||||
Current limits:
|
Current limits:
|
||||||
|
|
||||||
- single-table only
|
- default column names are positional such as `c0`, `c1`
|
||||||
- positional column names such as `c0`, `c1`
|
- stable names require explicit catalog registration or `schema ...` in the frontend
|
||||||
- no joins
|
- joins currently use comma-separated tables plus `WHERE` filtering
|
||||||
|
- multi-table queries require qualified column names such as `Parent.child`
|
||||||
|
- no table aliases yet
|
||||||
- no aggregates
|
- no aggregates
|
||||||
- projection aliases only via `AS`
|
- projection aliases only via `AS`
|
||||||
|
|
||||||
|
|||||||
11
ROADMAP.md
11
ROADMAP.md
@ -28,7 +28,8 @@ This document tracks the current state and next steps for the repository.
|
|||||||
- [x] Minimal SQL AST and parser
|
- [x] Minimal SQL AST and parser
|
||||||
- [x] Logical plan scaffolding
|
- [x] Logical plan scaffolding
|
||||||
- [x] Logical-plan execution for the first SQL slice
|
- [x] Logical-plan execution for the first SQL slice
|
||||||
- [x] Single-table `SELECT-FROM-WHERE` support with positional columns (`c0`, `c1`, ...)
|
- [x] `SELECT-FROM-WHERE` support with positional or named columns
|
||||||
|
- [x] Basic multi-table SQL joins via qualified-column filtering
|
||||||
|
|
||||||
### Near-Term Cleanup
|
### Near-Term Cleanup
|
||||||
|
|
||||||
@ -36,7 +37,7 @@ This document tracks the current state and next steps for the repository.
|
|||||||
- [ ] Remove remaining stale terminology in comments and help text
|
- [ ] Remove remaining stale terminology in comments and help text
|
||||||
- [ ] Expand examples for the current rule-engine workflow
|
- [ ] Expand examples for the current rule-engine workflow
|
||||||
- [ ] Add rustdoc coverage for the main public types
|
- [ ] Add rustdoc coverage for the main public types
|
||||||
- [ ] Document the current SQL subset and its limits
|
- [x] Document the current SQL subset and its limits
|
||||||
|
|
||||||
### Query-Engine Structure
|
### Query-Engine Structure
|
||||||
|
|
||||||
@ -53,9 +54,9 @@ This document tracks the current state and next steps for the repository.
|
|||||||
- [x] Add logical plan node types
|
- [x] Add logical plan node types
|
||||||
- [x] Add name resolution and schema validation hooks
|
- [x] Add name resolution and schema validation hooks
|
||||||
- [x] Add expression typing and nullability tracking
|
- [x] Add expression typing and nullability tracking
|
||||||
- [ ] Add aliases and richer projection expressions
|
- [x] Add aliases and richer projection expressions
|
||||||
- [ ] Add joins across multiple predicate-backed tables
|
- [x] Add joins across multiple predicate-backed tables
|
||||||
- [ ] Add a catalog path for stable column naming beyond `c0`, `c1`, ...
|
- [x] Add a catalog path for stable column naming beyond `c0`, `c1`, ...
|
||||||
|
|
||||||
### Execution and Optimization
|
### Execution and Optimization
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,14 @@ pub enum CatalogError {
|
|||||||
expected: usize,
|
expected: usize,
|
||||||
found: usize,
|
found: usize,
|
||||||
},
|
},
|
||||||
|
/// An explicit column-name list does not match the table arity.
|
||||||
|
ColumnCountMismatch {
|
||||||
|
table: String,
|
||||||
|
expected: usize,
|
||||||
|
found: usize,
|
||||||
|
},
|
||||||
|
/// An explicit column-name list contains duplicates.
|
||||||
|
DuplicateColumnName { table: String, column: String },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for CatalogError {
|
impl fmt::Display for CatalogError {
|
||||||
@ -36,6 +44,20 @@ impl fmt::Display for CatalogError {
|
|||||||
"table `{}` has inconsistent arity: expected {}, found {}",
|
"table `{}` has inconsistent arity: expected {}, found {}",
|
||||||
table, expected, found
|
table, expected, found
|
||||||
),
|
),
|
||||||
|
Self::ColumnCountMismatch {
|
||||||
|
table,
|
||||||
|
expected,
|
||||||
|
found,
|
||||||
|
} => write!(
|
||||||
|
f,
|
||||||
|
"table `{}` expects {} column name(s), found {}",
|
||||||
|
table, expected, found
|
||||||
|
),
|
||||||
|
Self::DuplicateColumnName { table, column } => write!(
|
||||||
|
f,
|
||||||
|
"table `{}` has duplicate column name `{}`",
|
||||||
|
table, column
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -66,6 +88,46 @@ impl PredicateCatalog {
|
|||||||
.ok_or_else(|| CatalogError::UnknownTable(table.to_string()))
|
.ok_or_else(|| CatalogError::UnknownTable(table.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Replace the positional column names for a registered table.
|
||||||
|
///
|
||||||
|
/// This preserves the inferred data types and nullability metadata while
|
||||||
|
/// assigning stable names such as `parent` or `child`.
|
||||||
|
pub fn rename_columns<I, S>(&mut self, table: &str, columns: I) -> Result<(), CatalogError>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = S>,
|
||||||
|
S: Into<String>,
|
||||||
|
{
|
||||||
|
let schema = self.schema_for(table)?.clone();
|
||||||
|
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
if columns.len() != schema.len() {
|
||||||
|
return Err(CatalogError::ColumnCountMismatch {
|
||||||
|
table: table.to_string(),
|
||||||
|
expected: schema.len(),
|
||||||
|
found: columns.len(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut unique = std::collections::HashSet::new();
|
||||||
|
for column in &columns {
|
||||||
|
if !unique.insert(column.clone()) {
|
||||||
|
return Err(CatalogError::DuplicateColumnName {
|
||||||
|
table: table.to_string(),
|
||||||
|
column: column.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields = schema
|
||||||
|
.fields()
|
||||||
|
.iter()
|
||||||
|
.zip(columns)
|
||||||
|
.map(|(field, name)| Field::new(name, field.data_type().clone(), field.nullable()))
|
||||||
|
.collect();
|
||||||
|
self.schemas.insert(table.to_string(), Schema::new(fields));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Infer table schemas from the predicates present in an instance.
|
/// Infer table schemas from the predicates present in an instance.
|
||||||
///
|
///
|
||||||
/// Each predicate becomes one table, with positional column names `c0`,
|
/// Each predicate becomes one table, with positional column names `c0`,
|
||||||
@ -144,4 +206,51 @@ mod tests {
|
|||||||
assert_eq!(schema.fields()[1].name(), "c1");
|
assert_eq!(schema.fields()[1].name(), "c1");
|
||||||
assert!(schema.fields()[1].nullable());
|
assert!(schema.fields()[1].nullable());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rename_columns_preserves_shape_and_metadata() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new("Parent", vec![Term::constant("bob"), Term::null(0)]),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Parent", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let schema = catalog.schema_for("Parent").unwrap();
|
||||||
|
assert_eq!(schema.fields()[0].name(), "parent");
|
||||||
|
assert_eq!(schema.fields()[1].name(), "child");
|
||||||
|
assert!(!schema.fields()[0].nullable());
|
||||||
|
assert!(schema.fields()[1].nullable());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rename_columns_rejects_duplicates() {
|
||||||
|
let instance: Instance = vec![Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||||
|
let error = catalog
|
||||||
|
.rename_columns("Parent", ["name", "name"])
|
||||||
|
.unwrap_err();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
error,
|
||||||
|
CatalogError::DuplicateColumnName {
|
||||||
|
table: "Parent".to_string(),
|
||||||
|
column: "name".to_string(),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -44,6 +44,25 @@ pub fn execute(plan: &LogicalPlan, instance: &Instance) -> Result<ResultSet, Exe
|
|||||||
}
|
}
|
||||||
Ok(ResultSet::new(schema.clone(), rows))
|
Ok(ResultSet::new(schema.clone(), rows))
|
||||||
}
|
}
|
||||||
|
LogicalPlan::CrossJoin {
|
||||||
|
left,
|
||||||
|
right,
|
||||||
|
schema,
|
||||||
|
} => {
|
||||||
|
let left_result = execute(left, instance)?;
|
||||||
|
let right_result = execute(right, instance)?;
|
||||||
|
let mut rows = Vec::new();
|
||||||
|
|
||||||
|
for left_row in left_result.rows() {
|
||||||
|
for right_row in right_result.rows() {
|
||||||
|
let mut values = left_row.values().to_vec();
|
||||||
|
values.extend_from_slice(right_row.values());
|
||||||
|
rows.push(Row::new(values));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ResultSet::new(schema.clone(), rows))
|
||||||
|
}
|
||||||
LogicalPlan::Filter { input, predicate } => {
|
LogicalPlan::Filter { input, predicate } => {
|
||||||
let result = execute(input, instance)?;
|
let result = execute(input, instance)?;
|
||||||
let filtered_rows = result
|
let filtered_rows = result
|
||||||
|
|||||||
@ -9,6 +9,7 @@ use crate::sql::parser::parse_select;
|
|||||||
pub enum Command {
|
pub enum Command {
|
||||||
Fact(Atom),
|
Fact(Atom),
|
||||||
Rule(Rule),
|
Rule(Rule),
|
||||||
|
Schema { table: String, columns: Vec<String> },
|
||||||
Sql(Select),
|
Sql(Select),
|
||||||
Run,
|
Run,
|
||||||
Query(Vec<Atom>),
|
Query(Vec<Atom>),
|
||||||
@ -59,6 +60,24 @@ pub fn parse_command(input: &str) -> Result<Command, String> {
|
|||||||
return Ok(Command::Sql(select));
|
return Ok(Command::Sql(select));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(rest) = strip_keyword(trimmed, "schema") {
|
||||||
|
let atom = parse_atom(trim_suffix(rest, '.')?)?;
|
||||||
|
let columns = atom
|
||||||
|
.terms
|
||||||
|
.into_iter()
|
||||||
|
.map(|term| match term {
|
||||||
|
Term::Constant(name) => Ok(name),
|
||||||
|
Term::Null(_) | Term::Variable(_) => {
|
||||||
|
Err("schema columns must be constant identifiers".to_string())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
return Ok(Command::Schema {
|
||||||
|
table: atom.predicate,
|
||||||
|
columns,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(rest) = strip_keyword(trimmed, "fact") {
|
if let Some(rest) = strip_keyword(trimmed, "fact") {
|
||||||
let atom = parse_atom(trim_suffix(rest, '.')?)?;
|
let atom = parse_atom(trim_suffix(rest, '.')?)?;
|
||||||
if !atom.is_ground() {
|
if !atom.is_ground() {
|
||||||
@ -380,13 +399,42 @@ mod tests {
|
|||||||
let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap();
|
let command = parse_command("sql SELECT c0 FROM Parent WHERE c1 = 'bob';").unwrap();
|
||||||
match command {
|
match command {
|
||||||
Command::Sql(select) => {
|
Command::Sql(select) => {
|
||||||
assert_eq!(select.from, "Parent");
|
assert_eq!(select.from, vec!["Parent".to_string()]);
|
||||||
assert!(select.selection.is_some());
|
assert!(select.selection.is_some());
|
||||||
}
|
}
|
||||||
other => panic!("unexpected command: {:?}", other),
|
other => panic!("unexpected command: {:?}", other),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_sql_join_command() {
|
||||||
|
let command = parse_command(
|
||||||
|
"sql SELECT Parent.parent FROM Parent, Ancestor WHERE Parent.child = Ancestor.parent;",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
match command {
|
||||||
|
Command::Sql(select) => {
|
||||||
|
assert_eq!(
|
||||||
|
select.from,
|
||||||
|
vec!["Parent".to_string(), "Ancestor".to_string()]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
other => panic!("unexpected command: {:?}", other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_schema_command() {
|
||||||
|
let command = parse_command("schema Parent(parent, child).").unwrap();
|
||||||
|
match command {
|
||||||
|
Command::Schema { table, columns } => {
|
||||||
|
assert_eq!(table, "Parent");
|
||||||
|
assert_eq!(columns, vec!["parent".to_string(), "child".to_string()]);
|
||||||
|
}
|
||||||
|
other => panic!("unexpected command: {:?}", other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_query_command() {
|
fn parse_query_command() {
|
||||||
let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap();
|
let command = parse_command("query Ancestor(?X, ?Y), Parent(?Y, ?Z)?").unwrap();
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
//! Session state and command execution shared by the REPL and GUI.
|
//! Session state and command execution shared by the REPL and GUI.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use crate::catalog::PredicateCatalog;
|
use crate::catalog::PredicateCatalog;
|
||||||
@ -17,6 +18,7 @@ use super::provenance::explain_atom;
|
|||||||
pub struct Session {
|
pub struct Session {
|
||||||
base_instance: Instance,
|
base_instance: Instance,
|
||||||
rules: Vec<Rule>,
|
rules: Vec<Rule>,
|
||||||
|
column_names: HashMap<String, Vec<String>>,
|
||||||
materialized: Option<MaterializedState>,
|
materialized: Option<MaterializedState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,6 +65,14 @@ impl Session {
|
|||||||
self.rules.push(rule.clone());
|
self.rules.push(rule.clone());
|
||||||
Ok(format!("Added rule #{}: {}", self.rules.len(), rule))
|
Ok(format!("Added rule #{}: {}", self.rules.len(), rule))
|
||||||
}
|
}
|
||||||
|
Command::Schema { table, columns } => {
|
||||||
|
self.column_names.insert(table.clone(), columns.clone());
|
||||||
|
Ok(format!(
|
||||||
|
"Registered schema for {}: {}",
|
||||||
|
table,
|
||||||
|
columns.join(", ")
|
||||||
|
))
|
||||||
|
}
|
||||||
Command::Sql(select) => self.run_sql(&select),
|
Command::Sql(select) => self.run_sql(&select),
|
||||||
Command::Run => Ok(self.run_chase()),
|
Command::Run => Ok(self.run_chase()),
|
||||||
Command::Query(query) => Ok(self.run_query(&query)),
|
Command::Query(query) => Ok(self.run_query(&query)),
|
||||||
@ -130,7 +140,13 @@ impl Session {
|
|||||||
|
|
||||||
fn run_sql(&self, select: &crate::sql::ast::Select) -> Result<String, String> {
|
fn run_sql(&self, select: &crate::sql::ast::Select) -> Result<String, String> {
|
||||||
let instance = self.active_instance();
|
let instance = self.active_instance();
|
||||||
let catalog = PredicateCatalog::from_instance(instance).map_err(|err| err.to_string())?;
|
let mut catalog =
|
||||||
|
PredicateCatalog::from_instance(instance).map_err(|err| err.to_string())?;
|
||||||
|
for (table, columns) in &self.column_names {
|
||||||
|
catalog
|
||||||
|
.rename_columns(table, columns.clone())
|
||||||
|
.map_err(|err| err.to_string())?;
|
||||||
|
}
|
||||||
let plan = plan_select(select, &catalog).map_err(|err| err.to_string())?;
|
let plan = plan_select(select, &catalog).map_err(|err| err.to_string())?;
|
||||||
let result = execute(&plan, instance).map_err(|err| err.to_string())?;
|
let result = execute(&plan, instance).map_err(|err| err.to_string())?;
|
||||||
Ok(render_result_set(&result))
|
Ok(render_result_set(&result))
|
||||||
@ -211,6 +227,7 @@ fn help_text() -> &'static str {
|
|||||||
"Commands:
|
"Commands:
|
||||||
fact Parent(alice, bob).
|
fact Parent(alice, bob).
|
||||||
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
|
rule Parent(?X, ?Y) -> Ancestor(?X, ?Y).
|
||||||
|
schema Parent(parent, child).
|
||||||
sql SELECT * FROM Parent;
|
sql SELECT * FROM Parent;
|
||||||
run.
|
run.
|
||||||
query Ancestor(?X, ?Y)?
|
query Ancestor(?X, ?Y)?
|
||||||
@ -366,4 +383,44 @@ mod tests {
|
|||||||
assert!(output.contains("alice | seed"));
|
assert!(output.contains("alice | seed"));
|
||||||
assert!(output.contains("bob | seed"));
|
assert!(output.contains("bob | seed"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn session_runs_sql_query_with_named_columns() {
|
||||||
|
let mut session = Session::new();
|
||||||
|
let output = session
|
||||||
|
.execute_script(
|
||||||
|
"fact Parent(alice, bob).\n\
|
||||||
|
fact Parent(bob, carol).\n\
|
||||||
|
schema Parent(parent, child).\n\
|
||||||
|
sql SELECT parent FROM Parent WHERE child = 'bob';",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert!(output.contains("Registered schema for Parent: parent, child"));
|
||||||
|
assert!(output.contains("1 row(s)"));
|
||||||
|
assert!(output.contains("parent"));
|
||||||
|
assert!(output.contains("alice"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn session_runs_sql_join_query() {
|
||||||
|
let mut session = Session::new();
|
||||||
|
let output = session
|
||||||
|
.execute_script(
|
||||||
|
"fact Parent(alice, bob).\n\
|
||||||
|
fact Parent(bob, carol).\n\
|
||||||
|
fact Ancestor(bob, carol).\n\
|
||||||
|
fact Ancestor(carol, dave).\n\
|
||||||
|
schema Parent(parent, child).\n\
|
||||||
|
schema Ancestor(parent, child).\n\
|
||||||
|
sql SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
|
||||||
|
WHERE Parent.child = Ancestor.parent;",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert!(output.contains("2 row(s)"));
|
||||||
|
assert!(output.contains("Parent.parent | Ancestor.child"));
|
||||||
|
assert!(output.contains("alice | carol"));
|
||||||
|
assert!(output.contains("bob | dave"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,6 +25,12 @@ pub struct NamedExpr {
|
|||||||
pub enum LogicalPlan {
|
pub enum LogicalPlan {
|
||||||
/// Read all facts for one predicate-backed table.
|
/// Read all facts for one predicate-backed table.
|
||||||
Scan { table: String, schema: Schema },
|
Scan { table: String, schema: Schema },
|
||||||
|
/// Form the Cartesian product of two inputs.
|
||||||
|
CrossJoin {
|
||||||
|
left: Box<LogicalPlan>,
|
||||||
|
right: Box<LogicalPlan>,
|
||||||
|
schema: Schema,
|
||||||
|
},
|
||||||
/// Filter rows by a predicate.
|
/// Filter rows by a predicate.
|
||||||
Filter {
|
Filter {
|
||||||
input: Box<LogicalPlan>,
|
input: Box<LogicalPlan>,
|
||||||
@ -43,6 +49,7 @@ impl LogicalPlan {
|
|||||||
pub fn output_schema(&self) -> &Schema {
|
pub fn output_schema(&self) -> &Schema {
|
||||||
match self {
|
match self {
|
||||||
Self::Scan { schema, .. } => schema,
|
Self::Scan { schema, .. } => schema,
|
||||||
|
Self::CrossJoin { schema, .. } => schema,
|
||||||
Self::Filter { input, .. } => input.output_schema(),
|
Self::Filter { input, .. } => input.output_schema(),
|
||||||
Self::Project { schema, .. } => schema,
|
Self::Project { schema, .. } => schema,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
@ -13,6 +14,8 @@ pub enum PlannerError {
|
|||||||
Catalog(CatalogError),
|
Catalog(CatalogError),
|
||||||
/// A referenced column does not exist in the input schema.
|
/// A referenced column does not exist in the input schema.
|
||||||
UnknownColumn(String),
|
UnknownColumn(String),
|
||||||
|
/// The same table name appears more than once without alias support.
|
||||||
|
DuplicateTable(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for PlannerError {
|
impl fmt::Display for PlannerError {
|
||||||
@ -20,6 +23,13 @@ impl fmt::Display for PlannerError {
|
|||||||
match self {
|
match self {
|
||||||
Self::Catalog(err) => write!(f, "catalog error: {}", err),
|
Self::Catalog(err) => write!(f, "catalog error: {}", err),
|
||||||
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
|
Self::UnknownColumn(column) => write!(f, "unknown column `{}`", column),
|
||||||
|
Self::DuplicateTable(table) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"table `{}` appears more than once; aliases are not supported",
|
||||||
|
table
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -28,7 +38,7 @@ impl Error for PlannerError {
|
|||||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
match self {
|
match self {
|
||||||
Self::Catalog(err) => Some(err),
|
Self::Catalog(err) => Some(err),
|
||||||
Self::UnknownColumn(_) => None,
|
Self::UnknownColumn(_) | Self::DuplicateTable(_) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -44,14 +54,10 @@ pub fn plan_select(
|
|||||||
select: &Select,
|
select: &Select,
|
||||||
catalog: &PredicateCatalog,
|
catalog: &PredicateCatalog,
|
||||||
) -> Result<LogicalPlan, PlannerError> {
|
) -> Result<LogicalPlan, PlannerError> {
|
||||||
let scan_schema = catalog.schema_for(&select.from)?.clone();
|
let (mut plan, input_schema) = plan_from_tables(&select.from, catalog)?;
|
||||||
let mut plan = LogicalPlan::Scan {
|
|
||||||
table: select.from.clone(),
|
|
||||||
schema: scan_schema.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(selection) = &select.selection {
|
if let Some(selection) = &select.selection {
|
||||||
let predicate = plan_expr(selection, &scan_schema)?;
|
let predicate = plan_expr(selection, &input_schema)?;
|
||||||
plan = LogicalPlan::Filter {
|
plan = LogicalPlan::Filter {
|
||||||
input: Box::new(plan),
|
input: Box::new(plan),
|
||||||
predicate,
|
predicate,
|
||||||
@ -67,11 +73,11 @@ pub fn plan_select(
|
|||||||
for (index, item) in select.projection.iter().enumerate() {
|
for (index, item) in select.projection.iter().enumerate() {
|
||||||
match item {
|
match item {
|
||||||
SelectItem::Expr { expr, alias } => {
|
SelectItem::Expr { expr, alias } => {
|
||||||
let planned_expr = plan_expr(expr, &scan_schema)?;
|
let planned_expr = plan_expr(expr, &input_schema)?;
|
||||||
let output_name = alias
|
let output_name = alias
|
||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| default_projection_name(expr, index + 1));
|
.unwrap_or_else(|| default_projection_name(expr, index + 1));
|
||||||
let (data_type, nullable) = projection_metadata(expr, &scan_schema)?;
|
let (data_type, nullable) = projection_metadata(expr, &input_schema)?;
|
||||||
expressions.push(NamedExpr {
|
expressions.push(NamedExpr {
|
||||||
name: output_name.clone(),
|
name: output_name.clone(),
|
||||||
expr: planned_expr,
|
expr: planned_expr,
|
||||||
@ -93,6 +99,46 @@ fn is_wildcard_projection(items: &[SelectItem]) -> bool {
|
|||||||
matches!(items, [SelectItem::Wildcard])
|
matches!(items, [SelectItem::Wildcard])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn plan_from_tables(
|
||||||
|
tables: &[String],
|
||||||
|
catalog: &PredicateCatalog,
|
||||||
|
) -> Result<(LogicalPlan, Schema), PlannerError> {
|
||||||
|
let mut seen = HashSet::new();
|
||||||
|
let mut table_iter = tables.iter();
|
||||||
|
let first = table_iter.next().ok_or_else(|| {
|
||||||
|
PlannerError::Catalog(CatalogError::UnknownTable("<missing>".to_string()))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
seen.insert(first.clone());
|
||||||
|
let first_schema = input_schema_for_table(first, catalog, tables.len() > 1)?;
|
||||||
|
let mut plan = LogicalPlan::Scan {
|
||||||
|
table: first.clone(),
|
||||||
|
schema: first_schema.clone(),
|
||||||
|
};
|
||||||
|
let mut combined_schema = first_schema;
|
||||||
|
|
||||||
|
for table in table_iter {
|
||||||
|
if !seen.insert(table.clone()) {
|
||||||
|
return Err(PlannerError::DuplicateTable(table.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let right_schema = input_schema_for_table(table, catalog, tables.len() > 1)?;
|
||||||
|
let join_schema = combine_schemas(&combined_schema, &right_schema);
|
||||||
|
let right_plan = LogicalPlan::Scan {
|
||||||
|
table: table.clone(),
|
||||||
|
schema: right_schema.clone(),
|
||||||
|
};
|
||||||
|
plan = LogicalPlan::CrossJoin {
|
||||||
|
left: Box::new(plan),
|
||||||
|
right: Box::new(right_plan),
|
||||||
|
schema: join_schema.clone(),
|
||||||
|
};
|
||||||
|
combined_schema = join_schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((plan, combined_schema))
|
||||||
|
}
|
||||||
|
|
||||||
fn plan_expr(expr: &Expr, schema: &Schema) -> Result<LogicalExpr, PlannerError> {
|
fn plan_expr(expr: &Expr, schema: &Schema) -> Result<LogicalExpr, PlannerError> {
|
||||||
match expr {
|
match expr {
|
||||||
Expr::Identifier(name) => {
|
Expr::Identifier(name) => {
|
||||||
@ -140,6 +186,36 @@ fn default_projection_name(expr: &Expr, ordinal: usize) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn input_schema_for_table(
|
||||||
|
table: &str,
|
||||||
|
catalog: &PredicateCatalog,
|
||||||
|
qualify_columns: bool,
|
||||||
|
) -> Result<Schema, PlannerError> {
|
||||||
|
let schema = catalog.schema_for(table)?.clone();
|
||||||
|
if !qualify_columns {
|
||||||
|
return Ok(schema);
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields = schema
|
||||||
|
.fields()
|
||||||
|
.iter()
|
||||||
|
.map(|field| {
|
||||||
|
Field::new(
|
||||||
|
format!("{}.{}", table, field.name()),
|
||||||
|
field.data_type().clone(),
|
||||||
|
field.nullable(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Ok(Schema::new(fields))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn combine_schemas(left: &Schema, right: &Schema) -> Schema {
|
||||||
|
let mut fields = left.fields().to_vec();
|
||||||
|
fields.extend_from_slice(right.fields());
|
||||||
|
Schema::new(fields)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -182,4 +258,39 @@ mod tests {
|
|||||||
assert_eq!(schema.fields()[2].name(), "expr3");
|
assert_eq!(schema.fields()[2].name(), "expr3");
|
||||||
assert_eq!(schema.fields()[1].data_type(), &DataType::Text);
|
assert_eq!(schema.fields()[1].data_type(), &DataType::Text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plans_multi_table_select_with_qualified_columns() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Ancestor",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Parent", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Ancestor", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let select = parse_select(
|
||||||
|
"SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
|
||||||
|
WHERE Parent.child = Ancestor.parent",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let plan = plan_select(&select, &catalog).unwrap();
|
||||||
|
let schema = plan.output_schema();
|
||||||
|
assert_eq!(schema.len(), 2);
|
||||||
|
assert_eq!(schema.fields()[0].name(), "Parent.parent");
|
||||||
|
assert_eq!(schema.fields()[1].name(), "Ancestor.child");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,8 +3,8 @@
|
|||||||
pub struct Select {
|
pub struct Select {
|
||||||
/// Output expressions requested by the query.
|
/// Output expressions requested by the query.
|
||||||
pub projection: Vec<SelectItem>,
|
pub projection: Vec<SelectItem>,
|
||||||
/// Source table name.
|
/// Source table names.
|
||||||
pub from: String,
|
pub from: Vec<String>,
|
||||||
/// Optional filter predicate.
|
/// Optional filter predicate.
|
||||||
pub selection: Option<Expr>,
|
pub selection: Option<Expr>,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -62,7 +62,7 @@ impl Parser {
|
|||||||
self.expect_keyword(Token::Select, "SELECT")?;
|
self.expect_keyword(Token::Select, "SELECT")?;
|
||||||
let projection = self.parse_projection()?;
|
let projection = self.parse_projection()?;
|
||||||
self.expect_keyword(Token::From, "FROM")?;
|
self.expect_keyword(Token::From, "FROM")?;
|
||||||
let from = self.expect_identifier()?;
|
let from = self.parse_from_list()?;
|
||||||
let selection = if self.peek() == Some(&Token::Where) {
|
let selection = if self.peek() == Some(&Token::Where) {
|
||||||
self.index += 1;
|
self.index += 1;
|
||||||
Some(self.parse_expr()?)
|
Some(self.parse_expr()?)
|
||||||
@ -113,6 +113,21 @@ impl Parser {
|
|||||||
Ok(items)
|
Ok(items)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_from_list(&mut self) -> Result<Vec<String>, ParseError> {
|
||||||
|
let mut tables = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tables.push(self.expect_identifier()?);
|
||||||
|
if self.peek() == Some(&Token::Comma) {
|
||||||
|
self.index += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tables)
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
|
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
|
||||||
let left = self.parse_operand()?;
|
let left = self.parse_operand()?;
|
||||||
match self.next().ok_or(ParseError::UnexpectedEnd)? {
|
match self.next().ok_or(ParseError::UnexpectedEnd)? {
|
||||||
@ -255,7 +270,7 @@ fn is_identifier_start(ch: char) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_part(ch: char) -> bool {
|
fn is_identifier_part(ch: char) -> bool {
|
||||||
ch.is_ascii_alphanumeric() || ch == '_'
|
ch.is_ascii_alphanumeric() || matches!(ch, '_' | '.')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_token(token: &Token) -> String {
|
fn render_token(token: &Token) -> String {
|
||||||
@ -281,7 +296,7 @@ mod tests {
|
|||||||
fn parses_select_with_filter() {
|
fn parses_select_with_filter() {
|
||||||
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
|
let select = parse_select("SELECT c0 FROM Parent WHERE c1 = 'bob'").unwrap();
|
||||||
|
|
||||||
assert_eq!(select.from, "Parent");
|
assert_eq!(select.from, vec!["Parent".to_string()]);
|
||||||
assert_eq!(select.projection.len(), 1);
|
assert_eq!(select.projection.len(), 1);
|
||||||
assert!(select.selection.is_some());
|
assert!(select.selection.is_some());
|
||||||
}
|
}
|
||||||
@ -314,4 +329,25 @@ mod tests {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_multi_table_select_with_qualified_columns() {
|
||||||
|
let select = parse_select(
|
||||||
|
"SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
|
||||||
|
WHERE Parent.child = Ancestor.parent",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
select.from,
|
||||||
|
vec!["Parent".to_string(), "Ancestor".to_string()]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
select.projection[0],
|
||||||
|
SelectItem::Expr {
|
||||||
|
expr: Expr::Identifier("Parent.parent".to_string()),
|
||||||
|
alias: None,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -83,3 +83,76 @@ fn select_alias_and_literal_projection_shape_output() {
|
|||||||
assert_eq!(format!("{}", result.rows()[0].values()[1]), "seed");
|
assert_eq!(format!("{}", result.rows()[0].values()[1]), "seed");
|
||||||
assert_eq!(format!("{}", result.rows()[0].values()[2]), "NULL");
|
assert_eq!(format!("{}", result.rows()[0].values()[2]), "NULL");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn select_uses_explicit_catalog_column_names() {
|
||||||
|
let instance = parent_instance();
|
||||||
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Parent", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
let select = parse_select("SELECT parent FROM Parent WHERE child = 'bob'").unwrap();
|
||||||
|
|
||||||
|
let plan = plan_select(&select, &catalog).unwrap();
|
||||||
|
let result = execute(&plan, &instance).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(result.schema().fields()[0].name(), "parent");
|
||||||
|
assert_eq!(result.rows().len(), 1);
|
||||||
|
assert_eq!(format!("{}", result.rows()[0].values()[0]), "alice");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn select_join_filters_cross_product_by_qualified_columns() {
|
||||||
|
let instance: Instance = vec![
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("alice"), Term::constant("bob")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Parent",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Ancestor",
|
||||||
|
vec![Term::constant("bob"), Term::constant("carol")],
|
||||||
|
),
|
||||||
|
Atom::new(
|
||||||
|
"Ancestor",
|
||||||
|
vec![Term::constant("carol"), Term::constant("dave")],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut catalog = PredicateCatalog::from_instance(&instance).unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Parent", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
catalog
|
||||||
|
.rename_columns("Ancestor", ["parent", "child"])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let select = parse_select(
|
||||||
|
"SELECT Parent.parent, Ancestor.child FROM Parent, Ancestor \
|
||||||
|
WHERE Parent.child = Ancestor.parent",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let plan = plan_select(&select, &catalog).unwrap();
|
||||||
|
let result = execute(&plan, &instance).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(result.schema().fields()[0].name(), "Parent.parent");
|
||||||
|
assert_eq!(result.schema().fields()[1].name(), "Ancestor.child");
|
||||||
|
assert_eq!(result.rows().len(), 2);
|
||||||
|
|
||||||
|
let mut rows = result
|
||||||
|
.rows()
|
||||||
|
.iter()
|
||||||
|
.map(|row| format!("{} -> {}", row.values()[0], row.values()[1]))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
rows.sort();
|
||||||
|
assert_eq!(
|
||||||
|
rows,
|
||||||
|
vec!["alice -> carol".to_string(), "bob -> dave".to_string()]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user