diff --git a/Cargo.lock b/Cargo.lock index 7b88ca3..83e6b37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,18 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -47,12 +59,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.11.1" @@ -149,15 +155,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - [[package]] name = "critical-section" version = "1.2.0" @@ -209,7 +206,7 @@ dependencies = [ "hashbrown 0.14.5", "lock_api", "once_cell", - "parking_lot_core 0.9.12", + "parking_lot_core", ] [[package]] @@ -290,6 +287,18 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.4.1" @@ -334,25 +343,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "geolog-lang" version = "0.1.0" @@ -385,6 +375,7 @@ version = "0.1.0" dependencies = [ "geomerge", "serde_json", + "storage", ] [[package]] @@ -420,6 +411,9 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] [[package]] name = "hashbrown" @@ -442,6 +436,15 @@ version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "heapless" version = "0.7.17" @@ -468,7 +471,7 @@ version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" dependencies = [ - "bitflags 2.11.1", + "bitflags", "byteorder", "heed-traits", "heed-types", @@ -645,15 +648,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "interval-heap" version = "0.0.5" @@ -693,6 +687,17 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -791,7 +796,7 @@ version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ - "bitflags 2.11.1", + "bitflags", "cfg-if", "cfg_aliases", "libc", @@ -822,31 +827,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", -] - [[package]] name = "parking_lot_core" version = "0.9.12" @@ -855,7 +835,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.18", + "redox_syscall", "smallvec", "windows-link", ] @@ -932,6 +912,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "postcard" version = "1.1.3" @@ -977,19 +963,7 @@ dependencies = [ name = "query-ops" version = "0.1.0" dependencies = [ - "query-storage", -] - -[[package]] -name = "query-storage" -version = "0.1.0" -dependencies = [ - "fjall", - "geomerge", - "heed", - "redb", - "sled", - "tempfile", + "storage", ] [[package]] @@ -1051,22 +1025,13 @@ dependencies = [ "libc", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.11.1", + "bitflags", ] [[package]] @@ -1086,6 +1051,20 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "2.1.2" @@ -1107,7 +1086,7 @@ version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.11.1", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -1120,7 +1099,7 @@ version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a990b25f351b25139ddc7f21ee3f6f56f86d6846b74ac8fad3a719a287cd4a0" dependencies = [ - "bitflags 2.11.1", + "bitflags", "cfg-if", "clipboard-win", "home", @@ -1223,22 +1202,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot", -] - [[package]] name = "smallvec" version = "1.15.1" @@ -1266,6 +1229,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ae9eec00137a8eed469fb4148acd9fc6ac8c3f9b110f52cd34698c8b5bfa0e" +[[package]] +name = "storage" +version = "0.1.0" +dependencies = [ + "fjall", + "geomerge", + "heed", + "redb", + "rusqlite", + "smallvec", + "tempfile", +] + [[package]] name = "syn" version = "2.0.117" @@ -1487,6 +1463,18 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasip2" version = "1.0.3+wasi-0.2.9" @@ -1533,7 +1521,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.11.1", + "bitflags", "hashbrown 0.15.5", "indexmap", "semver", @@ -1640,7 +1628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.11.1", + "bitflags", "indexmap", "log", "serde", @@ -1705,6 +1693,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.8" diff --git a/crates/geomerge-demo/Cargo.toml b/crates/geomerge-demo/Cargo.toml index 2e9a343..db64078 100644 --- a/crates/geomerge-demo/Cargo.toml +++ b/crates/geomerge-demo/Cargo.toml @@ -10,4 +10,5 @@ workspace = true [dependencies] geomerge = { path = "../../external/geomerge/crates/geomerge" } +storage = { path = "../storage", features = ["geomerge"] } serde_json = "1" diff --git a/crates/geomerge-demo/docs/diagrams/workflow.dot b/crates/geomerge-demo/docs/diagrams/workflow.dot index ee724d2..0848f3d 100644 --- a/crates/geomerge-demo/docs/diagrams/workflow.dot +++ b/crates/geomerge-demo/docs/diagrams/workflow.dot @@ -58,21 +58,26 @@ flat_theory [label = < - - - - - + + + + + +
add_paths_data (transact)
• append Graphs rows
• append G0, G1 rows
• append G.V vertices
• append G.E edge
add_paths_data (tx.insert ×7)
• insert Graphs rows
• insert G0, G1 rows
• insert G.V vertices
• insert G.E edge
• pending RowIds reused as FKs
>, fillcolor = "#E8F5E9", color = "#4CAF50", shape = box] -validate [label = "Law Validation\n(append_row_validated)", fillcolor = "#E8F5E9", color = "#4CAF50"] -assert_edge [label = "assert_edge_was_stored\n(row count, cells)", fillcolor = "#E8F5E9", color = "#4CAF50"] +commit [label = < + + + +
tx.commit()
• law validation
• CommittedTx resolves pending RowIds
>, fillcolor = "#E8F5E9", color = "#4CAF50", shape = box] +assert_edge [label = "assert_edge_was_stored\n(storage.scan(G.E))", fillcolor = "#E8F5E9", color = "#4CAF50"] } subgraph cluster_persist { @@ -114,9 +119,8 @@ build_store -> transact [color = "#4CAF50"] fixture_rows -> transact [style = "dashed", color = "#2196F3"] // Transaction internals -transact -> validate [color = "#4CAF50"] -validate -> transact [style = "dashed", label = "row ids", color = "#4CAF50"] -transact -> assert_edge [color = "#4CAF50"] +transact -> commit [color = "#4CAF50"] +commit -> assert_edge [color = "#4CAF50"] // Into persistence assert_edge -> dump_before [color = "#FF9800"] diff --git a/crates/geomerge-demo/docs/diagrams/workflow.svg b/crates/geomerge-demo/docs/diagrams/workflow.svg index 23f3e81..aded812 100644 --- a/crates/geomerge-demo/docs/diagrams/workflow.svg +++ b/crates/geomerge-demo/docs/diagrams/workflow.svg @@ -1,247 +1,385 @@ + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> - - -GeomergeDemoWorkflow - - -cluster_inputs - -Inputs - - -cluster_demo - -geomerge-demo (run_demo) - - -cluster_loading - -Theory Loading - - -cluster_store - -Store and Transaction - - -cluster_persist - -Persistence Round Trip - - -cluster_report - -Report - - - -paths_schema - -paths.json -(compiled schema) - - - -load_theory - -load_paths_theory -(serde_json) - - - -paths_schema->load_theory - - -include_str! - - - -fixture_rows - -Fixture Rows -(graphs, vertices, edge) - - - -transact - -add_paths_data (transact) -• append Graphs rows -• append G0, G1 rows -• append G.V vertices -• append G.E edge - - - -fixture_rows->transact - - - - - -flat_theory - -FlatTheory -• 10 tables -• 12 laws - - - -load_theory->flat_theory - - - - - -build_store - -Store::try_from_theory - - - -flat_theory->build_store - - - - - -demo_report - -DemoReport -• table_count, law_count -• graph, vertex, edge counts -• edge endpoints -• persisted_bytes - - - -flat_theory->demo_report - - -counts - - - -build_store->transact - - - - - -validate - -Law Validation -(append_row_validated) - - - -transact->validate - - - - - -assert_edge - -assert_edge_was_stored -(row count, cells) - - - -transact->assert_edge - - - - - -validate->transact - - -row ids - - - -dump_before - -store.dump() -(before persist) - - - -assert_edge->dump_before - - - - - -encode - -pst::encode_store --> bytes - - - -dump_before->encode - - - - - -compare - -dump equality check - - - -dump_before->compare - - -expected - - - -decode - -pst::decode_store --> restored Store - - - -encode->decode - - -bytes - - - -decode->compare - - - - - -compare->demo_report - - - - - -stdout - -stdout -(println! lines) - - - -demo_report->stdout - - - - + + + GeomergeDemoWorkflow + + + cluster_inputs + + Inputs + + + + cluster_demo + + geomerge-demo (run_demo) + + + + cluster_loading + + Theory Loading + + + + cluster_store + + Storage and Transaction + + + + cluster_persist + + Persistence Round Trip + + + + cluster_report + + Report + + + + + paths_schema + + + paths.json + + + (compiled schema) + + + + + load_theory + + + load_paths_theory + + + (serde_json) + + + + + paths_schema->load_theory + + + include_str! + + + + + fixture_rows + + + Fixture Rows + + + (graphs, vertices, edge) + + + + + transact + + add_paths_data (tx.insert ×7) + + + • insert Graphs rows + + + • insert G0, G1 rows + + + • insert G.V vertices + + • + insert G.E edge + + • + pending RowIds reused as FKs + + + + + fixture_rows->transact + + + + + + flat_theory + + FlatTheory + + • + 10 tables + + • + 12 laws + + + + + load_theory->flat_theory + + + + + + build_store + + + GeomergeStorage::from_theory + + + (Store::try_from_theory) + + + + + flat_theory->build_store + + + + + + demo_report + + DemoReport + + • table_count, law_count + + • graph, vertex, edge counts + + • edge endpoints + + • persisted_bytes + + + + + flat_theory->demo_report + + + counts + + + + + build_store->transact + + + + + + commit + + tx.commit() + + • law validation + + • CommittedTx resolves pending RowIds + + + + + transact->commit + + + + + + assert_edge + + assert_edge_was_stored + + (storage.scan(G.E)) + + + + + commit->assert_edge + + + + + + dump_before + + store.dump() + + (before persist) + + + + + assert_edge->dump_before + + + + + + encode + + pst::encode_store + + -> bytes + + + + + dump_before->encode + + + + + + compare + + dump equality check + + + + + dump_before->compare + + + expected + + + + + decode + + pst::decode_store + + -> restored Store + + + + + encode->decode + + + bytes + + + + + decode->compare + + + + + + compare->demo_report + + + + + + stdout + + stdout + + (println! lines) + + + + + demo_report->stdout + + + + diff --git a/crates/geomerge-demo/src/main.rs b/crates/geomerge-demo/src/main.rs index f2cb071..4421b6b 100644 --- a/crates/geomerge-demo/src/main.rs +++ b/crates/geomerge-demo/src/main.rs @@ -1,25 +1,26 @@ +//! Geomerge storage demo, routed through `storage`. +//! +//! With the v3 trait, [`Storage::transaction`] returns a transaction handle +//! that supports batched inserts with in-flight foreign-key references via +//! pending [`RowId`](storage::id::RowId)s. The demo's chain of related +//! inserts now goes entirely through the trait. Theory loading, persistence +//! (`encode_store`/`decode_store`), and `Store::dump` remain direct +//! geomerge calls, since the trait does not model them. + use std::error::Error; -use geomerge::{ - ir::{FlatTheory, Path}, - persist::pst, - store::{Store, StoreIntError}, - table::{CellValue, RowId, ValidationError}, -}; +use geomerge::commit::pst; +use geomerge::ir::FlatTheory; +use storage::adapters::geomerge::GeomergeStorage; +use storage::id::RowId; +use storage::value::Value; +use storage::{Storage, StorageError}; const PATHS_SCHEMA_JSON: &str = include_str!("../../../external/geomerge/crates/geomerge/tests/data/paths.json"); type DemoResult = Result>; -#[derive(Debug, Clone, Copy)] -struct DemoRows { - graph: RowId, - source_vertex: RowId, - target_vertex: RowId, - edge: RowId, -} - #[derive(Debug)] struct DemoReport { table_count: usize, @@ -42,7 +43,7 @@ fn main() -> DemoResult<()> { println!("vertices stored: {}", report.vertex_count); println!("edges stored: {}", report.edge_count); println!( - "edge endpoints: #{} -> #{}", + "edge endpoints: {} -> {}", report.edge_source, report.edge_target ); println!("persisted store bytes: {}", report.persisted_bytes); @@ -55,26 +56,28 @@ fn run_demo() -> DemoResult { let table_count = theory.tables.len(); let law_count = theory.laws.len(); - let mut store = Store::try_from_theory(theory)?; - let rows = add_paths_data(&mut store)?; - assert_edge_was_stored(&store, rows)?; + let mut storage = GeomergeStorage::from_theory(theory)?; + add_paths_data(&mut storage)?; + let (edge_source, edge_target) = assert_edge_was_stored(&storage)?; - let before_persist = store.dump(); - let encoded = pst::encode_store(&store)?; - let restored = pst::decode_store(&encoded)?; + let before_persist = storage.store().dump(); + let encoded = pst::encode_store(storage.store())?; + let restored_store = pst::decode_store(&encoded)?; - if before_persist != restored.dump() { + if before_persist != restored_store.dump() { return Err("persisted store did not round-trip".into()); } + let restored = GeomergeStorage::from_store(restored_store); + Ok(DemoReport { table_count, law_count, graph_count: row_count(&restored, "Graphs")?, vertex_count: row_count(&restored, "G.V")?, edge_count: row_count(&restored, "G.E")?, - edge_source: rows.source_vertex, - edge_target: rows.target_vertex, + edge_source, + edge_target, persisted_bytes: encoded.len(), }) } @@ -83,90 +86,50 @@ fn load_paths_theory() -> DemoResult { Ok(serde_json::from_str(PATHS_SCHEMA_JSON)?) } -fn add_paths_data(store: &mut Store) -> Result> { - store.transact(|store| { - let g0_graph = append_row(store, "Graphs", vec![])?; - let designated_graph = append_row(store, "Graphs", vec![])?; +fn add_paths_data(storage: &mut GeomergeStorage) -> Result<(), StorageError> { + let mut tx = storage.transaction()?; - append_row(store, "G0", vec![CellValue::Id(designated_graph)])?; - append_row(store, "G1", vec![CellValue::Id(designated_graph)])?; - - let source_vertex = append_row(store, "G.V", vec![CellValue::Id(g0_graph)])?; - let target_vertex = append_row(store, "G.V", vec![CellValue::Id(g0_graph)])?; - let edge = append_row( - store, - "G.E", - vec![ - CellValue::Id(g0_graph), - CellValue::Id(source_vertex), - CellValue::Id(target_vertex), - ], - )?; - - Ok(DemoRows { - graph: g0_graph, - source_vertex, - target_vertex, - edge, - }) - }) -} - -fn append_row( - store: &mut Store, - table: &str, - values: Vec, -) -> Result> { - let path = Path::from(table); - let table = store - .table_at_mut(&path) - .ok_or_else(|| ValidationError::UnknownTable { path: path.clone() })?; - Ok(table.append_row_validated(values)?) -} - -fn assert_edge_was_stored(store: &Store, rows: DemoRows) -> DemoResult<()> { - let edge_table = store - .table_at(&Path::from("G.E")) - .ok_or("missing G.E table after insert")?; - - if edge_table.row_count() != 1 { - return Err(format!("expected one edge row, got {}", edge_table.row_count()).into()); - } - - if edge_table.row_id_at(0) != Some(rows.edge) { - return Err("stored edge row id did not match transaction output".into()); - } - - let expected = [ - CellValue::Id(rows.graph), - CellValue::Id(rows.source_vertex), - CellValue::Id(rows.target_vertex), - ]; - for (column, expected_value) in expected.iter().enumerate() { - if edge_table.cell_at(0, column) != Some(expected_value) { - return Err(format!("unexpected G.E column {column}").into()); - } - } + let g0_graph = tx.insert("Graphs", vec![])?; + let designated = tx.insert("Graphs", vec![])?; + tx.insert("G0", vec![Value::Id(designated.clone())])?; + tx.insert("G1", vec![Value::Id(designated)])?; + let source_vertex = tx.insert("G.V", vec![Value::Id(g0_graph.clone())])?; + let target_vertex = tx.insert("G.V", vec![Value::Id(g0_graph.clone())])?; + tx.insert( + "G.E", + vec![ + Value::Id(g0_graph), + Value::Id(source_vertex), + Value::Id(target_vertex), + ], + )?; + tx.commit()?; Ok(()) } -fn row_count(store: &Store, table: &str) -> DemoResult { - let path = Path::from(table); - Ok(store - .table_at(&path) - .ok_or_else(|| format!("missing table {table}"))? - .row_count()) +fn assert_edge_was_stored(storage: &GeomergeStorage) -> DemoResult<(RowId, RowId)> { + let edges = storage.scan("G.E")?; + if edges.len() != 1 { + return Err(format!("expected one edge row, got {}", edges.len()).into()); + } + let (_edge_id, cells) = &edges[0]; + let (source, target) = match cells.as_slice() { + [_graph, Value::Id(source), Value::Id(target)] => (source.clone(), target.clone()), + other => return Err(format!("unexpected G.E cells: {other:?}").into()), + }; + Ok((source, target)) +} + +fn row_count(storage: &GeomergeStorage, table: &str) -> Result { + Ok(storage.scan(table)?.len()) } #[cfg(test)] #[allow(clippy::expect_used, clippy::unwrap_used)] mod tests { - use super::{add_paths_data, load_paths_theory, row_count, run_demo}; - use geomerge::{ - ir::Path, - store::{Store, StoreIntError}, - table::CellValue, + use super::{ + add_paths_data, load_paths_theory, row_count, run_demo, GeomergeStorage, Storage, Value, }; #[test] @@ -184,23 +147,48 @@ mod tests { #[test] fn invalid_edge_is_rejected_without_mutating_store() { let theory = load_paths_theory().expect("fixture"); - let mut store = Store::try_from_theory(theory).expect("store"); - add_paths_data(&mut store).expect("initial valid data"); + let mut storage = GeomergeStorage::from_theory(theory).expect("storage"); + add_paths_data(&mut storage).expect("initial valid data"); - let before_edges = row_count(&store, "G.E").expect("edge count"); - let error = store - .transact(|store| { - let edge_table = store.table_at_mut(&Path::from("G.E")).expect("G.E table"); - edge_table.append_row_validated(vec![ - CellValue::Id(0), - CellValue::Id(0), - CellValue::Id(u64::MAX), - ])?; - Ok(()) - }) - .expect_err("missing target vertex should violate a law"); + let before_edges = row_count(&storage, "G.E").expect("edge count"); - assert!(matches!(*error, StoreIntError::Law(_))); - assert_eq!(row_count(&store, "G.E").expect("edge count"), before_edges); + // Try to insert an edge with an existing source RowId but a bogus + // target (we reuse the source's bytes mutated so the RowId is valid + // shape but doesn't reference any actual G.V row). Geomerge should + // reject the commit via a foreign-key law violation, leaving the + // store unchanged. + let edges = storage.scan("G.E").expect("scan G.E"); + let (_, edge_cells) = &edges[0]; + let (graph_id, source_id) = match edge_cells.as_slice() { + [Value::Id(g), Value::Id(s), Value::Id(_)] => (g.clone(), s.clone()), + other => panic!("unexpected G.E cells: {other:?}"), + }; + + let bogus = { + let mut bytes = source_id.as_bytes().to_vec(); + for b in &mut bytes[..32] { + *b ^= 0xFF; + } + storage::id::RowId::new(bytes) + }; + + let result = { + let mut tx = storage.transaction().expect("begin tx"); + tx.insert( + "G.E", + vec![Value::Id(graph_id), Value::Id(source_id), Value::Id(bogus)], + ) + .expect("add succeeds at txn level"); + tx.commit() + }; + assert!( + matches!(result, Err(storage::StorageError::Validation(_))), + "commit should fail with Validation, got {result:?}" + ); + + assert_eq!( + row_count(&storage, "G.E").expect("edge count after failed insert"), + before_edges + ); } } diff --git a/crates/query-ops/Cargo.toml b/crates/query-ops/Cargo.toml index 6d201ae..9eb9376 100644 --- a/crates/query-ops/Cargo.toml +++ b/crates/query-ops/Cargo.toml @@ -9,4 +9,4 @@ rust-version.workspace = true workspace = true [dependencies] -query-storage = { path = "../query-storage" } +storage = { path = "../storage" } diff --git a/crates/query-ops/README.md b/crates/query-ops/README.md index 583cc52..bde49be 100644 --- a/crates/query-ops/README.md +++ b/crates/query-ops/README.md @@ -10,11 +10,12 @@ The operators are: **atom scan**, **semijoin**, and **natural join**. | `scan_atom(&Table, &AtomPattern) -> Relation` | function | Scans the table under the pattern and returns a binding relation with one column per distinct variable in first-occurrence order. Literal positions and repeated variables filter rows during the scan. | | `semijoin(&Relation, &Relation) -> Relation` | function | Returns the rows of `left` whose values on the columns shared with `right` also appear in `right`. The output column list is the same as `left.columns`. | | `natural_join(&Relation, &Relation) -> Relation` | function | Returns every pair of `left` and `right` rows that agree on shared columns. Each output row holds the columns of `left` followed by the non-shared columns of `right`. | -| `Table` | struct | Holds positional input rows of fixed arity and carries no column names. Construct it with `Table::new(arity)` or `Table::from_rows(arity, rows)`. | | `AtomPattern` | struct | Specifies, for each table column, either a variable to bind or a literal value to match. The pattern is a `Vec` whose length must equal the table's arity. | | `Term` | enum | Represents one position of an `AtomPattern`. A term is either `Var(String)` to bind the cell to a named variable, or `Lit(Value)` to require the cell to equal a given value. | | `Relation` | struct | Holds rows over named columns and is the type produced by every operator. Construct it with `Relation::new(columns)` or `Relation::from_rows(columns, rows)`. Column names within a single relation must be unique. | -| `Value` | enum | Represents a single cell value stored in a `Table` or `Relation`. A value is either `Int(i64)` or `Str(String)`. | + +The foundational types `Table` (positional input rows of fixed arity) and `Value` (`Int(i64)`, `Str(String)`, or `Id(RowId)`) live in the [ +`storage`](../storage) crate; query-ops imports them. Data types and their relationships: @@ -43,8 +44,8 @@ The code below implements the rule (also available [here](tests/hand_plan.rs)): ```rust use query_ops::atom::{AtomPattern, Term, scan_atom}; use query_ops::join::{natural_join, semijoin}; -use query_ops::table::Table; -use query_ops::value::Value; +use storage::table::Table; +use storage::value::Value; fn s(x: &str) -> Value { Value::Str(x.to_string()) diff --git a/crates/query-ops/docs/diagrams/types.dot b/crates/query-ops/docs/diagrams/types.dot index d11a233..6b983a8 100644 --- a/crates/query-ops/docs/diagrams/types.dot +++ b/crates/query-ops/docs/diagrams/types.dot @@ -50,6 +50,7 @@ value_node [label = < +
Value (enum)
Int(i64)
Str(String)
Id(RowId)
>, fillcolor = "#FFF3E0", color = "#FF9800"] // composition edges: arrow X -> Y reads "X contains Y" diff --git a/crates/query-ops/docs/diagrams/types.svg b/crates/query-ops/docs/diagrams/types.svg index f34a94d..2a08f7d 100644 --- a/crates/query-ops/docs/diagrams/types.svg +++ b/crates/query-ops/docs/diagrams/types.svg @@ -1,85 +1,147 @@ + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> - - -QueryOpsTypes - - - -table_node - -Table -  (struct) -arity: usize -rows: Vec<Vec<Value>> - - - -value_node - -Value -  (enum) -Int(i64) -Str(String) - - - -table_node->value_node - - -Vec<Vec<Value>> - - - -relation_node - -Relation -  (struct) -columns: Vec<String> -rows: Vec<Vec<Value>> - - - -relation_node->value_node - - -Vec<Vec<Value>> - - - -atom_pattern_node - -AtomPattern -  (struct) -columns: Vec<Term> - - - -term_node - -Term -  (enum) -Var(String) -Lit(Value) - - - -atom_pattern_node->term_node - - -Vec<Term> - - - -term_node->value_node - - -Lit(Value) - - + + + QueryOpsTypes + + + + table_node + + Table + + +  (struct) + + + arity: usize + + rows: + Vec<Vec<Value>> + + + + + value_node + + Value + + +  (enum) + + + Int(i64) + + + Str(String) + + + Id(RowId) + + + + + table_node->value_node + + + Vec<Vec<Value>> + + + + + relation_node + + Relation + + +  (struct) + + + columns: Vec<String> + + + rows: Vec<Vec<Value>> + + + + + relation_node->value_node + + + Vec<Vec<Value>> + + + + + atom_pattern_node + + AtomPattern + + +  (struct) + + + columns: Vec<Term> + + + + + term_node + + Term + + +  (enum) + + + Var(String) + + + Lit(Value) + + + + + atom_pattern_node->term_node + + + Vec<Term> + + + + + term_node->value_node + + + Lit(Value) + + + diff --git a/crates/query-ops/docs/diagrams/workflow.svg b/crates/query-ops/docs/diagrams/workflow.svg index f02b646..56c7759 100644 --- a/crates/query-ops/docs/diagrams/workflow.svg +++ b/crates/query-ops/docs/diagrams/workflow.svg @@ -1,159 +1,257 @@ + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> - -QueryOpsHandPlan - - -cluster_inputs - -Inputs (positional tables) - - -cluster_atoms - -Atom Scans  (scan_atom: Table × AtomPattern → Relation) - - -cluster_joins - -Joins  (shared cols = matching column names) - - -cluster_output - -Output (binding relation) - - - -author_table - -Table: author -• arity 2 -• rows: (name, book) - - - -author_rel - -author_rel -pattern: [Var name, Var book] -cols: [name, book] - - - -author_table->author_rel - - - - - -bestseller_table - -Table: bestseller -• arity 1 -• rows: (book) - - - -bestseller_rel - -bestseller_rel -pattern: [Var book] -cols: [book] - - - -bestseller_table->bestseller_rel - - - - - -price_table - -Table: price -• arity 2 -• rows: (book, dollars) - - - -price_rel - -price_rel -pattern: [Var book, Var dollars] -cols: [book, dollars] - - - -price_table->price_rel - - - - - -semijoin_step - -semijoin -authors of bestsellers -shared: book -cols: [name, book] - - - -author_rel->semijoin_step - - -left - - - -bestseller_rel->semijoin_step - - -right - - - -natural_join_step - -natural_join -attach each book's price -shared: book -cols: [name, book, dollars] - - - -price_rel->natural_join_step - - -right - - - -semijoin_step->natural_join_step - - -left - - - -result - -Q result -authors of bestsellers with each book's price -cols: [name, book, dollars] - - - -natural_join_step->result - - - - + viewBox="0.00 0.00 1481.75 471.00" xmlns="http://www.w3.org/2000/svg"> + + QueryOpsHandPlan + + + cluster_inputs + + Inputs (positional tables) + + + + cluster_atoms + + Atom Scans  (scan_atom: Table × AtomPattern → Relation) + + + + cluster_joins + + Joins  (shared cols = matching column names) + + + + cluster_output + + Output (binding relation) + + + + + author_table + + Table: author + + • + arity 2 + + • + rows: (name, book) + + + + + author_rel + + author_rel + + + pattern: [Var name, Var book] + + + cols: [name, book] + + + + + author_table->author_rel + + + + + + bestseller_table + + Table: bestseller + + • + arity 1 + + • + rows: (book) + + + + + bestseller_rel + + bestseller_rel + + + pattern: [Var book] + + + cols: [book] + + + + + bestseller_table->bestseller_rel + + + + + + price_table + + Table: price + + • + arity 2 + + • + rows: (book, dollars) + + + + + price_rel + + price_rel + + + pattern: [Var book, Var dollars] + + + cols: [book, dollars] + + + + + price_table->price_rel + + + + + + semijoin_step + + semijoin + + + authors of bestsellers + + + shared: book + + + cols: [name, book] + + + + + author_rel->semijoin_step + + + left + + + + + bestseller_rel->semijoin_step + + + right + + + + + natural_join_step + + natural_join + + + attach each book's price + + + shared: book + + + cols: [name, book, dollars] + + + + + price_rel->natural_join_step + + + right + + + + + semijoin_step->natural_join_step + + + left + + + + + result + + Q result + + + authors of bestsellers with each book's price + + + cols: [name, book, dollars] + + + + + natural_join_step->result + + + + diff --git a/crates/query-ops/src/atom.rs b/crates/query-ops/src/atom.rs index e202f90..dcd7f35 100644 --- a/crates/query-ops/src/atom.rs +++ b/crates/query-ops/src/atom.rs @@ -9,8 +9,8 @@ use std::collections::HashMap; -use query_storage::table::Table; -use query_storage::value::Value; +use storage::table::Table; +use storage::value::Value; use crate::relation::Relation; diff --git a/crates/query-ops/src/join.rs b/crates/query-ops/src/join.rs index 425c6f9..8764327 100644 --- a/crates/query-ops/src/join.rs +++ b/crates/query-ops/src/join.rs @@ -11,7 +11,7 @@ use std::collections::{HashMap, HashSet}; -use query_storage::value::Value; +use storage::value::Value; use crate::relation::Relation; diff --git a/crates/query-ops/src/lib.rs b/crates/query-ops/src/lib.rs index 995f5b7..f06b4b1 100644 --- a/crates/query-ops/src/lib.rs +++ b/crates/query-ops/src/lib.rs @@ -2,7 +2,7 @@ //! //! Three operators are in scope: //! -//! - [`atom::scan_atom`] scans a [`Table`](query_storage::table::Table) under +//! - [`atom::scan_atom`] scans a [`Table`](storage::table::Table) under //! an [`atom::AtomPattern`], filtering for repeated-variable equality and //! literal equality, and outputs a binding [`relation::Relation`]. //! - [`join::semijoin`] keeps rows of one relation whose shared-column values @@ -14,8 +14,8 @@ //! is just an expression like //! `natural_join(&semijoin(&a, &b), &scan_atom(&t, &p))`. //! -//! Foundational types [`Value`](query_storage::value::Value) and -//! [`Table`](query_storage::table::Table) live in `query-storage`, the +//! Foundational types [`Value`](storage::value::Value) and +//! [`Table`](storage::table::Table) live in `storage`, the //! storage-layer crate this crate is built on; storage backends produce //! `Table`s that operators here consume. diff --git a/crates/query-ops/src/relation.rs b/crates/query-ops/src/relation.rs index 1c32565..4704ab5 100644 --- a/crates/query-ops/src/relation.rs +++ b/crates/query-ops/src/relation.rs @@ -10,7 +10,7 @@ use std::collections::HashSet; -use query_storage::value::Value; +use storage::value::Value; #[derive(Debug, Clone)] pub struct Relation { diff --git a/crates/query-ops/tests/hand_plan.rs b/crates/query-ops/tests/hand_plan.rs index 082e189..3e7a2e2 100644 --- a/crates/query-ops/tests/hand_plan.rs +++ b/crates/query-ops/tests/hand_plan.rs @@ -13,10 +13,10 @@ //! bestsellers via a semijoin against `bestseller`, then attaches each book's //! price via a natural join against `price`. -use query_ops::atom::{AtomPattern, Term, scan_atom}; +use query_ops::atom::{scan_atom, AtomPattern, Term}; use query_ops::join::{natural_join, semijoin}; -use query_storage::table::Table; -use query_storage::value::Value; +use storage::table::Table; +use storage::value::Value; fn s(x: &str) -> Value { Value::Str(x.to_string()) diff --git a/crates/query-ops/tests/storage_bridge.rs b/crates/query-ops/tests/storage_bridge.rs index 90ce75b..8493e3c 100644 --- a/crates/query-ops/tests/storage_bridge.rs +++ b/crates/query-ops/tests/storage_bridge.rs @@ -4,10 +4,10 @@ //! Demonstrates that `query-ops` operators can consume from a storage backend //! through the [`scan_as_table`] bridge, with no changes to `query-ops` itself. -use query_ops::atom::{AtomPattern, Term, scan_atom}; -use query_storage::table::Table; -use query_storage::value::Value; -use query_storage::{MemoryStorage, Storage, StorageError, scan_as_table}; +use query_ops::atom::{scan_atom, AtomPattern, Term}; +use storage::table::Table; +use storage::value::Value; +use storage::{scan_as_table, MemoryStorage, Storage, StorageError}; fn i(x: i64) -> Value { Value::Int(x) diff --git a/crates/query-storage/src/fjall.rs b/crates/query-storage/src/fjall.rs deleted file mode 100644 index f46c0ea..0000000 --- a/crates/query-storage/src/fjall.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! fjall adapter. -//! -//! Each relation gets a fjall [`PartitionHandle`](fjall::PartitionHandle) of -//! the same name. A reserved partition named `__meta` carries per-relation -//! metadata (arity and next synthetic row ID). - -use fjall::{Keyspace, PartitionCreateOptions, PartitionHandle}; - -use crate::value::Value; - -use crate::codec::{decode_meta, decode_row, encode_meta, encode_row, row_key}; -use crate::{Storage, StorageError}; - -const META_PARTITION: &str = "__meta"; - -fn backend(err: E) -> StorageError { - StorageError::Backend(Box::new(err)) -} - -/// fjall-backed [`Storage`] implementation. -pub struct FjallStorage { - keyspace: Keyspace, - meta: PartitionHandle, -} - -impl FjallStorage { - /// Open or create a fjall keyspace at `path`. - /// - /// # Errors - /// Returns [`StorageError::Backend`] if fjall fails to open the path. - pub fn open(path: impl AsRef) -> Result { - let keyspace = fjall::Config::new(path).open().map_err(backend)?; - let meta = keyspace - .open_partition(META_PARTITION, PartitionCreateOptions::default()) - .map_err(backend)?; - Ok(Self { keyspace, meta }) - } - - fn relation_partition(&self, name: &str) -> Result { - self.keyspace - .open_partition(name, PartitionCreateOptions::default()) - .map_err(backend) - } - - fn load_meta(&self, name: &str) -> Result<(u32, u64), StorageError> { - let raw = self - .meta - .get(name.as_bytes()) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - Ok(decode_meta(raw.as_ref())?) - } - - fn store_meta(&self, name: &str, arity: u32, next_id: u64) -> Result<(), StorageError> { - self.meta - .insert(name.as_bytes(), encode_meta(arity, next_id)) - .map_err(backend)?; - Ok(()) - } -} - -impl Storage for FjallStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if name == META_PARTITION { - return Err(StorageError::Validation(format!( - "relation name '{name}' is reserved" - ))); - } - if self.meta.contains_key(name.as_bytes()).map_err(backend)? { - return Err(StorageError::RelationExists(name.to_string())); - } - let arity_u32 = u32::try_from(arity) - .map_err(|_| StorageError::Validation(format!("arity {arity} exceeds u32 range")))?; - self.store_meta(name, arity_u32, 0)?; - let _ = self.relation_partition(name)?; - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - let (arity, _) = self.load_meta(name)?; - Ok(arity as usize) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - let _ = self.load_meta(name)?; - let partition = self.relation_partition(name)?; - let mut rows = Vec::new(); - for entry in partition.iter() { - let (_, value) = entry.map_err(backend)?; - rows.push(decode_row(value.as_ref())?); - } - Ok(rows) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let (arity, next_id) = self.load_meta(name)?; - if row.len() != arity as usize { - return Err(StorageError::ArityMismatch { - expected: arity as usize, - got: row.len(), - }); - } - let partition = self.relation_partition(name)?; - partition - .insert(row_key(next_id), encode_row(&row)) - .map_err(backend)?; - self.store_meta(name, arity, next_id + 1)?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - fn open_temp() -> Result { - let dir = tempfile::tempdir().map_err(backend)?; - let storage = FjallStorage::open(dir.path())?; - std::mem::forget(dir); - Ok(storage) - } - - #[test] - fn create_insert_scan_roundtrip() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![i(2), i(3)])?; - let rows = storage.scan("edge")?; - assert_eq!(rows, vec![vec![i(1), i(2)], vec![i(2), i(3)]]); - assert_eq!(storage.arity("edge")?, 2); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } -} diff --git a/crates/query-storage/src/geomerge.rs b/crates/query-storage/src/geomerge.rs deleted file mode 100644 index d3e1e32..0000000 --- a/crates/query-storage/src/geomerge.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! Geomerge adapter. -//! -//! Unlike the other backends, geomerge schemas are **immutable after store -//! construction**: there is no public API to register a new table on a live -//! `Store`. The adapter therefore expects all relations to be declared up -//! front via a `FlatTheory` passed to [`GeomergeStorage::from_theory`], and -//! [`Storage::create_relation`] becomes a verifier that the relation exists -//! in the loaded theory and that its arity matches. -//! -//! Additional v1 mismatches with the trait: -//! -//! - Column types are typed (`PrimInt` / `PrimString`) in geomerge but the -//! trait's `create_relation` only carries `arity`. The adapter cannot -//! declare a relation at runtime, so this issue surfaces only at insert -//! time when geomerge rejects a row with `StorageError::Validation`. -//! - Cells of type `CellValue::Id` cannot be represented in our `Value` enum. -//! Scanning a table that contains such cells returns `StorageError::Validation`. -//! - Every `insert` opens a fresh `Transaction` and commits. Law violations -//! surface at commit time, not at the `add` call. - -use std::collections::HashSet; - -use geomerge::ir::{self, Path}; -use geomerge::store::Store; -use geomerge::table::CellValue; -use geomerge::txn::ops::TxnCellValue; - -use crate::value::Value; - -use crate::{Storage, StorageError}; - -fn backend(err: E) -> StorageError { - StorageError::Backend(Box::new(err)) -} - -fn validation(msg: impl Into) -> StorageError { - StorageError::Validation(msg.into()) -} - -/// Geomerge-backed [`Storage`] implementation. -/// -/// Construct via [`GeomergeStorage::new`] (empty store, no relations) or -/// [`GeomergeStorage::from_theory`] (preloaded with a `FlatTheory`). -pub struct GeomergeStorage { - store: Store, - declared: HashSet, -} - -impl Default for GeomergeStorage { - fn default() -> Self { - Self::new() - } -} - -impl GeomergeStorage { - /// Build an empty store. No relations are available until the store is - /// rebuilt via a theory. - #[must_use] - pub fn new() -> Self { - Self { - store: Store::new(), - declared: HashSet::new(), - } - } - - /// Build a store from a pre-defined `FlatTheory`. All `create_relation` - /// calls must reference relations declared in the theory. - /// - /// # Errors - /// Returns [`StorageError::Backend`] if geomerge rejects the theory. - pub fn from_theory(theory: ir::FlatTheory) -> Result { - let store = Store::try_from_theory(theory).map_err(|e| backend(*e))?; - Ok(Self { - store, - declared: HashSet::new(), - }) - } -} - -impl Storage for GeomergeStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if self.declared.contains(name) { - return Err(StorageError::RelationExists(name.to_string())); - } - let path: Path = name.into(); - let table = self.store.table_at(&path).ok_or_else(|| { - validation(format!( - "relation '{name}' is not declared in the loaded geomerge theory; \ - geomerge does not support runtime relation creation" - )) - })?; - let declared_arity = table.schema().columns.len(); - if declared_arity != arity { - return Err(StorageError::ArityMismatch { - expected: declared_arity, - got: arity, - }); - } - self.declared.insert(name.to_string()); - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - let path: Path = name.into(); - let table = self - .store - .table_at(&path) - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - Ok(table.schema().columns.len()) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - let path: Path = name.into(); - let table = self - .store - .table_at(&path) - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - let arity = table.schema().columns.len(); - let mut rows = Vec::with_capacity(table.row_count()); - for r in 0..table.row_count() { - let mut row = Vec::with_capacity(arity); - for c in 0..arity { - let cell = table - .cell_at(r, c) - .ok_or_else(|| validation(format!("missing cell at ({r}, {c}) in '{name}'")))?; - row.push(cell_to_value(cell)?); - } - rows.push(row); - } - Ok(rows) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let path: Path = name.into(); - let arity = self.arity(name)?; - if row.len() != arity { - return Err(StorageError::ArityMismatch { - expected: arity, - got: row.len(), - }); - } - let values: Vec = row.into_iter().map(value_to_txn_cell).collect(); - let mut txn = self.store.transaction(); - txn.add(&path, values) - .map_err(|e| validation(e.to_string()))?; - // Law violations surface here at commit time, not at add time. - txn.commit().map_err(|e| validation(e.to_string()))?; - Ok(()) - } -} - -fn cell_to_value(cell: &CellValue) -> Result { - match cell { - CellValue::Int(i) => Ok(Value::Int(*i)), - CellValue::Str(s) => Ok(Value::Str(s.clone())), - CellValue::Id(_) => Err(validation( - "geomerge CellValue::Id cannot be represented in the playground's Value enum", - )), - } -} - -fn value_to_txn_cell(value: Value) -> TxnCellValue { - match value { - Value::Int(i) => TxnCellValue::Int(i), - Value::Str(s) => TxnCellValue::Str(s), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use geomerge::ir::{ColType, FlatTheory, PrimType, Schema, TableEntry}; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - fn int_schema(arity: usize) -> Schema { - Schema { - columns: (0..arity) - .map(|_| ColType::PrimType { - prim: PrimType::PrimInt, - }) - .collect(), - primary_key: None, - } - } - - fn theory_with_one_int_table(name: &str, arity: usize) -> FlatTheory { - FlatTheory { - tables: vec![TableEntry { - path: name.into(), - table: int_schema(arity), - }], - laws: Vec::new(), - } - } - - #[test] - fn empty_store_has_no_relations() { - let storage = GeomergeStorage::new(); - assert!(matches!( - storage.arity("edge"), - Err(StorageError::RelationNotFound(_)) - )); - } - - #[test] - fn create_relation_on_undeclared_returns_validation_error() { - let mut storage = GeomergeStorage::new(); - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::Validation(_)) - )); - } - - #[test] - fn theory_loaded_insert_scan_roundtrip() -> Result<(), StorageError> { - let theory = theory_with_one_int_table("edge", 2); - let mut storage = GeomergeStorage::from_theory(theory)?; - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![i(3), i(4)])?; - let rows = storage.scan("edge")?; - assert_eq!(rows, vec![vec![i(1), i(2)], vec![i(3), i(4)]]); - assert_eq!(storage.arity("edge")?, 2); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let theory = theory_with_one_int_table("edge", 2); - let mut storage = GeomergeStorage::from_theory(theory)?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } - - #[test] - fn insert_wrong_type_returns_validation_error() -> Result<(), StorageError> { - let theory = theory_with_one_int_table("edge", 2); - let mut storage = GeomergeStorage::from_theory(theory)?; - storage.create_relation("edge", 2)?; - // Insert a Str into an Int column: geomerge rejects it. - let result = storage.insert("edge", vec![Value::Str("not an int".to_string()), i(2)]); - assert!(matches!(result, Err(StorageError::Validation(_)))); - Ok(()) - } -} diff --git a/crates/query-storage/src/lib.rs b/crates/query-storage/src/lib.rs deleted file mode 100644 index 56c514e..0000000 --- a/crates/query-storage/src/lib.rs +++ /dev/null @@ -1,145 +0,0 @@ -//! Storage layer for the query-plan playground. -//! -//! This is the foundational crate of the workspace. It owns the [`Value`] cell -//! type and the [`Table`] container, defines the [`Storage`] trait, and ships -//! adapters for several backends behind Cargo features. Higher-level crates -//! such as `query-ops` depend on this crate for both the types and the trait. -//! -//! The v1 trait surface is deliberately narrow: create a relation, scan all -//! rows, insert a row, ask for arity. Transactions, range scans, deletes, and -//! delta streams are not modeled yet, and will be added when a specific -//! experiment demands them. -//! -//! ## Backends -//! -//! [`MemoryStorage`] is always available. Other backends are gated behind -//! Cargo features so users only pay for what they need: -//! -//! - `lmdb` — LMDB via the `heed` crate -//! - `redb` — pure-Rust embedded KV -//! - `fjall` — pure-Rust LSM-tree -//! - `sled` — pure-Rust LSM-tree -//! - `geomerge` — the workspace's `geomerge` crate - -use crate::table::Table; -use crate::value::Value; - -pub mod codec; -pub mod memory; -pub mod table; -pub mod value; - -#[cfg(feature = "sled")] -pub mod sled; - -#[cfg(feature = "redb")] -pub mod redb; - -#[cfg(feature = "fjall")] -pub mod fjall; - -#[cfg(feature = "lmdb")] -pub mod lmdb; - -#[cfg(feature = "geomerge")] -pub mod geomerge; - -pub use memory::MemoryStorage; - -/// Errors returned by a [`Storage`] backend. -/// -/// Backend-specific failures (LMDB transaction aborts, sled I/O errors, etc.) -/// are wrapped in [`StorageError::Backend`]. -#[derive(Debug)] -pub enum StorageError { - /// No relation with the given name exists in this backend. - RelationNotFound(String), - /// A relation with the given name already exists. - RelationExists(String), - /// A row was offered with the wrong number of columns. - ArityMismatch { expected: usize, got: usize }, - /// A backend-defined validation rule rejected the operation, for example - /// a `geomerge` law violation. - Validation(String), - /// A row decoded from storage was malformed. - Decode(codec::CodecError), - /// A backend-specific error wrapped for transport across the trait. - Backend(Box), -} - -impl std::fmt::Display for StorageError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::RelationNotFound(name) => write!(f, "relation not found: {name}"), - Self::RelationExists(name) => write!(f, "relation already exists: {name}"), - Self::ArityMismatch { expected, got } => { - write!(f, "arity mismatch: expected {expected}, got {got}") - } - Self::Validation(msg) => write!(f, "validation failed: {msg}"), - Self::Decode(err) => write!(f, "decode error: {err}"), - Self::Backend(err) => write!(f, "backend error: {err}"), - } - } -} - -impl std::error::Error for StorageError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Self::Backend(err) => Some(err.as_ref()), - Self::Decode(err) => Some(err), - _ => None, - } - } -} - -impl From for StorageError { - fn from(err: codec::CodecError) -> Self { - Self::Decode(err) - } -} - -/// Backend-agnostic interface for storing and retrieving rows. -/// -/// Each relation has a fixed name, a fixed arity (row width), and an ordered -/// collection of rows whose cells are [`Value`]s. Concrete implementations -/// include [`MemoryStorage`] in this crate plus the feature-gated backends. -pub trait Storage { - /// Create a new relation with the given name and arity. - /// - /// # Errors - /// Returns [`StorageError::RelationExists`] if a relation with the given - /// name already exists. - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError>; - - /// Return the arity of the given relation. - /// - /// # Errors - /// Returns [`StorageError::RelationNotFound`] if no such relation exists. - fn arity(&self, name: &str) -> Result; - - /// Scan all rows of the given relation in storage order. - /// - /// # Errors - /// Returns [`StorageError::RelationNotFound`] if no such relation exists. - fn scan(&self, name: &str) -> Result>, StorageError>; - - /// Append a row to the given relation. - /// - /// # Errors - /// Returns [`StorageError::RelationNotFound`] if no such relation exists, - /// [`StorageError::ArityMismatch`] if the row's length differs from the - /// declared arity, or [`StorageError::Validation`] / [`StorageError::Backend`] - /// if a backend-specific rule rejects the row. - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError>; -} - -/// Materialize a relation from a [`Storage`] backend as a [`Table`] that -/// query-language operators can consume. -/// -/// # Errors -/// Returns any error produced by [`Storage::arity`] or [`Storage::scan`]. -pub fn scan_as_table(storage: &dyn Storage, name: &str) -> Result { - let arity = storage.arity(name)?; - let rows = storage.scan(name)?; - Ok(Table::from_rows(arity, rows)) -} diff --git a/crates/query-storage/src/lmdb.rs b/crates/query-storage/src/lmdb.rs deleted file mode 100644 index 45e3956..0000000 --- a/crates/query-storage/src/lmdb.rs +++ /dev/null @@ -1,201 +0,0 @@ -//! LMDB adapter via the `heed` crate. -//! -//! Maps each relation onto a named LMDB sub-database of the same name. A -//! reserved sub-database named `__meta` carries per-relation metadata (arity -//! and next synthetic row ID). -//! -//! Note: every [`Storage::insert`] opens its own write transaction. LMDB -//! serializes writers across the env, so per-row inserts will be slow on real -//! workloads. The v1 trait does not yet expose batch inserts. - -use heed::types::Bytes; -use heed::{Database, Env, EnvOpenOptions}; - -use crate::value::Value; - -use crate::codec::{decode_meta, decode_row, encode_meta, encode_row, row_key}; -use crate::{Storage, StorageError}; - -const META_DB: &str = "__meta"; -const DEFAULT_MAX_DBS: u32 = 128; -const DEFAULT_MAP_SIZE: usize = 100 * 1024 * 1024; - -fn backend(err: E) -> StorageError { - StorageError::Backend(Box::new(err)) -} - -/// LMDB-backed [`Storage`] implementation. -pub struct LmdbStorage { - env: Env, - meta: Database, -} - -impl LmdbStorage { - /// Open or create an LMDB environment at `path`. - /// - /// The path must already exist as a directory; LMDB will create its data - /// files inside it. - /// - /// # Errors - /// Returns [`StorageError::Backend`] if LMDB fails to open. - /// - /// # Safety - /// Internally uses `EnvOpenOptions::open`, which heed marks `unsafe` - /// because the memory-mapped file's contents can be modified by other - /// processes. The adapter assumes single-process exclusive access. - #[allow(unsafe_code)] - pub fn open(path: impl AsRef) -> Result { - // SAFETY: heed marks `open` unsafe because the mmap'd file's contents - // can be modified by other processes, violating Rust's aliasing rules. - // This adapter assumes single-process exclusive access to the path, - // which holds for tests and typical playground use. - let env = unsafe { - EnvOpenOptions::new() - .max_dbs(DEFAULT_MAX_DBS) - .map_size(DEFAULT_MAP_SIZE) - .open(path) - .map_err(backend)? - }; - let mut wtxn = env.write_txn().map_err(backend)?; - let meta: Database = env - .create_database(&mut wtxn, Some(META_DB)) - .map_err(backend)?; - wtxn.commit().map_err(backend)?; - Ok(Self { env, meta }) - } - - fn open_relation_db( - &self, - wtxn: &mut heed::RwTxn, - name: &str, - ) -> Result, StorageError> { - self.env.create_database(wtxn, Some(name)).map_err(backend) - } -} - -impl Storage for LmdbStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if name == META_DB { - return Err(StorageError::Validation(format!( - "relation name '{name}' is reserved" - ))); - } - let arity_u32 = u32::try_from(arity) - .map_err(|_| StorageError::Validation(format!("arity {arity} exceeds u32 range")))?; - let mut wtxn = self.env.write_txn().map_err(backend)?; - if self - .meta - .get(&wtxn, name.as_bytes()) - .map_err(backend)? - .is_some() - { - return Err(StorageError::RelationExists(name.to_string())); - } - let encoded = encode_meta(arity_u32, 0); - self.meta - .put(&mut wtxn, name.as_bytes(), &encoded[..]) - .map_err(backend)?; - let _ = self.open_relation_db(&mut wtxn, name)?; - wtxn.commit().map_err(backend)?; - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - let rtxn = self.env.read_txn().map_err(backend)?; - let raw = self - .meta - .get(&rtxn, name.as_bytes()) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - let (arity, _) = decode_meta(raw)?; - Ok(arity as usize) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - let rtxn = self.env.read_txn().map_err(backend)?; - if self - .meta - .get(&rtxn, name.as_bytes()) - .map_err(backend)? - .is_none() - { - return Err(StorageError::RelationNotFound(name.to_string())); - } - let db: Database = self - .env - .open_database(&rtxn, Some(name)) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - let mut rows = Vec::new(); - for entry in db.iter(&rtxn).map_err(backend)? { - let (_, value) = entry.map_err(backend)?; - rows.push(decode_row(value)?); - } - Ok(rows) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let mut wtxn = self.env.write_txn().map_err(backend)?; - let meta_bytes = self - .meta - .get(&wtxn, name.as_bytes()) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - let (arity, next_id) = decode_meta(meta_bytes)?; - if row.len() != arity as usize { - return Err(StorageError::ArityMismatch { - expected: arity as usize, - got: row.len(), - }); - } - let db = self.open_relation_db(&mut wtxn, name)?; - let key = row_key(next_id); - let value = encode_row(&row); - db.put(&mut wtxn, &key[..], &value[..]).map_err(backend)?; - let new_meta = encode_meta(arity, next_id + 1); - self.meta - .put(&mut wtxn, name.as_bytes(), &new_meta[..]) - .map_err(backend)?; - wtxn.commit().map_err(backend)?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - fn open_temp() -> Result { - let dir = tempfile::tempdir().map_err(backend)?; - let storage = LmdbStorage::open(dir.path())?; - std::mem::forget(dir); - Ok(storage) - } - - #[test] - fn create_insert_scan_roundtrip() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![i(2), i(3)])?; - let rows = storage.scan("edge")?; - assert_eq!(rows, vec![vec![i(1), i(2)], vec![i(2), i(3)]]); - assert_eq!(storage.arity("edge")?, 2); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } -} diff --git a/crates/query-storage/src/memory.rs b/crates/query-storage/src/memory.rs deleted file mode 100644 index 8cd96d8..0000000 --- a/crates/query-storage/src/memory.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! In-memory backend, keyed by relation name. Always available. - -use std::collections::HashMap; - -use crate::value::Value; - -use crate::{Storage, StorageError}; - -/// In-memory backend, useful as the default in tests and as a correctness -/// oracle for other backends. -#[derive(Debug, Default)] -pub struct MemoryStorage { - relations: HashMap, -} - -#[derive(Debug)] -struct MemoryRelation { - arity: usize, - rows: Vec>, -} - -impl MemoryStorage { - #[must_use] - pub fn new() -> Self { - Self::default() - } -} - -impl Storage for MemoryStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if self.relations.contains_key(name) { - return Err(StorageError::RelationExists(name.to_string())); - } - self.relations.insert( - name.to_string(), - MemoryRelation { - arity, - rows: Vec::new(), - }, - ); - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - self.relations - .get(name) - .map(|r| r.arity) - .ok_or_else(|| StorageError::RelationNotFound(name.to_string())) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - self.relations - .get(name) - .map(|r| r.rows.clone()) - .ok_or_else(|| StorageError::RelationNotFound(name.to_string())) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let relation = self - .relations - .get_mut(name) - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - if row.len() != relation.arity { - return Err(StorageError::ArityMismatch { - expected: relation.arity, - got: row.len(), - }); - } - relation.rows.push(row); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::scan_as_table; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - #[test] - fn create_insert_scan_roundtrip() -> Result<(), StorageError> { - let mut storage = MemoryStorage::new(); - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![i(2), i(3)])?; - let rows = storage.scan("edge")?; - assert_eq!(rows, vec![vec![i(1), i(2)], vec![i(2), i(3)]]); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let mut storage = MemoryStorage::new(); - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } - - #[test] - fn scan_unknown_relation_returns_err() { - let storage = MemoryStorage::new(); - assert!(matches!( - storage.scan("missing"), - Err(StorageError::RelationNotFound(_)) - )); - } - - #[test] - fn arity_unknown_relation_returns_err() { - let storage = MemoryStorage::new(); - assert!(matches!( - storage.arity("missing"), - Err(StorageError::RelationNotFound(_)) - )); - } - - #[test] - fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { - let mut storage = MemoryStorage::new(); - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.insert("edge", vec![i(1)]), - Err(StorageError::ArityMismatch { - expected: 2, - got: 1 - }) - )); - Ok(()) - } - - #[test] - fn scan_as_table_materializes_table() -> Result<(), StorageError> { - let mut storage = MemoryStorage::new(); - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - let table = scan_as_table(&storage, "edge")?; - assert_eq!(table.arity, 2); - assert_eq!(table.rows, vec![vec![i(1), i(2)]]); - Ok(()) - } -} diff --git a/crates/query-storage/src/redb.rs b/crates/query-storage/src/redb.rs deleted file mode 100644 index 2f7b05e..0000000 --- a/crates/query-storage/src/redb.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! redb adapter. -//! -//! Each relation gets a redb table named after it, keyed by `u64` row IDs. -//! A reserved table named `__meta`, keyed by relation name, carries per-relation -//! metadata (arity and next synthetic row ID). - -use redb::{Database, ReadableTable, TableDefinition}; - -use crate::value::Value; - -use crate::codec::{decode_meta, decode_row, encode_meta, encode_row}; -use crate::{Storage, StorageError}; - -const META_TABLE: &str = "__meta"; - -fn backend(err: E) -> StorageError { - StorageError::Backend(Box::new(err)) -} - -fn meta_def() -> TableDefinition<'static, &'static str, &'static [u8]> { - TableDefinition::new(META_TABLE) -} - -fn rows_def(name: &str) -> TableDefinition<'_, u64, &'static [u8]> { - TableDefinition::new(name) -} - -/// redb-backed [`Storage`] implementation. -pub struct RedbStorage { - db: Database, -} - -impl RedbStorage { - /// Open or create a redb database at `path`. - /// - /// # Errors - /// Returns [`StorageError::Backend`] if redb fails to open the file. - pub fn open(path: impl AsRef) -> Result { - let db = Database::create(path).map_err(backend)?; - Ok(Self { db }) - } -} - -impl Storage for RedbStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if name == META_TABLE { - return Err(StorageError::Validation(format!( - "relation name '{name}' is reserved" - ))); - } - let arity_u32 = u32::try_from(arity) - .map_err(|_| StorageError::Validation(format!("arity {arity} exceeds u32 range")))?; - let txn = self.db.begin_write().map_err(backend)?; - { - let mut meta = txn.open_table(meta_def()).map_err(backend)?; - if meta.get(name).map_err(backend)?.is_some() { - return Err(StorageError::RelationExists(name.to_string())); - } - let encoded = encode_meta(arity_u32, 0); - meta.insert(name, &encoded[..]).map_err(backend)?; - // open_table creates the rows table if it does not exist - let _ = txn.open_table(rows_def(name)).map_err(backend)?; - } - txn.commit().map_err(backend)?; - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - let txn = self.db.begin_read().map_err(backend)?; - let meta = txn.open_table(meta_def()).map_err(backend)?; - let raw = meta - .get(name) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - let (arity, _) = decode_meta(raw.value())?; - Ok(arity as usize) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - let txn = self.db.begin_read().map_err(backend)?; - let meta = txn.open_table(meta_def()).map_err(backend)?; - if meta.get(name).map_err(backend)?.is_none() { - return Err(StorageError::RelationNotFound(name.to_string())); - } - let table = txn.open_table(rows_def(name)).map_err(backend)?; - let mut rows = Vec::new(); - for entry in table.iter().map_err(backend)? { - let (_, value) = entry.map_err(backend)?; - rows.push(decode_row(value.value())?); - } - Ok(rows) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let txn = self.db.begin_write().map_err(backend)?; - let (arity, next_id) = { - let meta = txn.open_table(meta_def()).map_err(backend)?; - let entry = meta - .get(name) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - decode_meta(entry.value())? - }; - if row.len() != arity as usize { - return Err(StorageError::ArityMismatch { - expected: arity as usize, - got: row.len(), - }); - } - { - let mut rows = txn.open_table(rows_def(name)).map_err(backend)?; - let encoded = encode_row(&row); - rows.insert(next_id, &encoded[..]).map_err(backend)?; - } - { - let mut meta = txn.open_table(meta_def()).map_err(backend)?; - let new_meta = encode_meta(arity, next_id + 1); - meta.insert(name, new_meta.as_ref()).map_err(backend)?; - } - txn.commit().map_err(backend)?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - fn s(x: &str) -> Value { - Value::Str(x.to_string()) - } - - fn open_temp() -> Result { - let dir = tempfile::tempdir().map_err(backend)?; - // The file does not have to exist for redb::create. - let path = dir.path().join("test.redb"); - let storage = RedbStorage::open(&path)?; - // Keep the tempdir alive by leaking it (test-only). - std::mem::forget(dir); - Ok(storage) - } - - #[test] - fn create_insert_scan_roundtrip() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![s("hello"), i(7)])?; - let rows = storage.scan("edge")?; - assert_eq!(rows, vec![vec![i(1), i(2)], vec![s("hello"), i(7)]]); - assert_eq!(storage.arity("edge")?, 2); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } - - #[test] - fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { - let mut storage = open_temp()?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.insert("edge", vec![i(1)]), - Err(StorageError::ArityMismatch { - expected: 2, - got: 1, - }) - )); - Ok(()) - } -} diff --git a/crates/query-storage/src/sled.rs b/crates/query-storage/src/sled.rs deleted file mode 100644 index 0f2ae8c..0000000 --- a/crates/query-storage/src/sled.rs +++ /dev/null @@ -1,161 +0,0 @@ -//! Sled adapter. -//! -//! Maps each relation onto a sled [`Tree`](sled::Tree) of the same name. A -//! reserved tree named `__meta` carries per-relation metadata (arity and the -//! next synthetic row ID). - -use crate::value::Value; - -use crate::codec::{decode_meta, decode_row, encode_meta, encode_row, row_key}; -use crate::{Storage, StorageError}; - -const META_TREE: &str = "__meta"; - -fn backend(err: E) -> StorageError { - StorageError::Backend(Box::new(err)) -} - -/// Sled-backed [`Storage`] implementation. -pub struct SledStorage { - db: sled::Db, -} - -impl SledStorage { - /// Open or create a sled database at `path`. - /// - /// # Errors - /// Returns [`StorageError::Backend`] if sled fails to open the path. - pub fn open(path: impl AsRef) -> Result { - let db = sled::open(path).map_err(backend)?; - Ok(Self { db }) - } - - fn meta_tree(&self) -> Result { - self.db.open_tree(META_TREE).map_err(backend) - } - - fn relation_tree(&self, name: &str) -> Result { - self.db.open_tree(name).map_err(backend) - } - - fn load_meta(&self, name: &str) -> Result<(u32, u64), StorageError> { - let meta = self.meta_tree()?; - let raw = meta - .get(name.as_bytes()) - .map_err(backend)? - .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; - Ok(decode_meta(raw.as_ref())?) - } - - fn store_meta(&self, name: &str, arity: u32, next_id: u64) -> Result<(), StorageError> { - let meta = self.meta_tree()?; - let encoded = encode_meta(arity, next_id); - meta.insert(name.as_bytes(), encoded.as_ref()) - .map_err(backend)?; - Ok(()) - } -} - -impl Storage for SledStorage { - fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { - if name == META_TREE { - return Err(StorageError::Validation(format!( - "relation name '{name}' is reserved" - ))); - } - let meta = self.meta_tree()?; - if meta.contains_key(name.as_bytes()).map_err(backend)? { - return Err(StorageError::RelationExists(name.to_string())); - } - let arity_u32 = u32::try_from(arity) - .map_err(|_| StorageError::Validation(format!("arity {arity} exceeds u32 range")))?; - self.store_meta(name, arity_u32, 0)?; - // open_tree creates the tree if it doesn't exist - let _ = self.relation_tree(name)?; - Ok(()) - } - - fn arity(&self, name: &str) -> Result { - let (arity, _) = self.load_meta(name)?; - Ok(arity as usize) - } - - fn scan(&self, name: &str) -> Result>, StorageError> { - let _ = self.load_meta(name)?; - let tree = self.relation_tree(name)?; - let mut rows = Vec::new(); - for entry in &tree { - let (_, value) = entry.map_err(backend)?; - rows.push(decode_row(value.as_ref())?); - } - Ok(rows) - } - - fn insert(&mut self, name: &str, row: Vec) -> Result<(), StorageError> { - let (arity, next_id) = self.load_meta(name)?; - if row.len() != arity as usize { - return Err(StorageError::ArityMismatch { - expected: arity as usize, - got: row.len(), - }); - } - let tree = self.relation_tree(name)?; - tree.insert(row_key(next_id), encode_row(&row)) - .map_err(backend)?; - self.store_meta(name, arity, next_id + 1)?; - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn i(x: i64) -> Value { - Value::Int(x) - } - - #[test] - fn create_insert_scan_roundtrip() -> Result<(), StorageError> { - let dir = tempfile::tempdir().map_err(backend)?; - let mut storage = SledStorage::open(dir.path())?; - storage.create_relation("edge", 2)?; - storage.insert("edge", vec![i(1), i(2)])?; - storage.insert("edge", vec![i(2), i(3)])?; - storage.insert("edge", vec![i(3), i(3)])?; - let rows = storage.scan("edge")?; - assert_eq!( - rows, - vec![vec![i(1), i(2)], vec![i(2), i(3)], vec![i(3), i(3)],], - ); - assert_eq!(storage.arity("edge")?, 2); - Ok(()) - } - - #[test] - fn duplicate_create_returns_err() -> Result<(), StorageError> { - let dir = tempfile::tempdir().map_err(backend)?; - let mut storage = SledStorage::open(dir.path())?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.create_relation("edge", 2), - Err(StorageError::RelationExists(_)) - )); - Ok(()) - } - - #[test] - fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { - let dir = tempfile::tempdir().map_err(backend)?; - let mut storage = SledStorage::open(dir.path())?; - storage.create_relation("edge", 2)?; - assert!(matches!( - storage.insert("edge", vec![i(1)]), - Err(StorageError::ArityMismatch { - expected: 2, - got: 1, - }) - )); - Ok(()) - } -} diff --git a/crates/query-storage/Cargo.toml b/crates/storage/Cargo.toml similarity index 80% rename from crates/query-storage/Cargo.toml rename to crates/storage/Cargo.toml index 8f60f10..530e6a3 100644 --- a/crates/query-storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "query-storage" +name = "storage" version = "0.1.0" edition.workspace = true license.workspace = true @@ -16,14 +16,15 @@ default = [] lmdb = ["dep:heed"] redb = ["dep:redb"] fjall = ["dep:fjall"] -sled = ["dep:sled"] +sqlite = ["dep:rusqlite"] geomerge = ["dep:geomerge"] [dependencies] +smallvec = "1" heed = { version = "0.20", optional = true } redb = { version = "2", optional = true } fjall = { version = "2", optional = true } -sled = { version = "0.34", optional = true } +rusqlite = { version = "0.32", features = ["bundled"], optional = true } geomerge = { path = "../../external/geomerge/crates/geomerge", optional = true } [dev-dependencies] diff --git a/crates/storage/README.md b/crates/storage/README.md new file mode 100644 index 0000000..324b68b --- /dev/null +++ b/crates/storage/README.md @@ -0,0 +1,111 @@ +## Storage + +This crate is the storage layer of the workspace. +It defines a backend-agnostic `Storage` trait, the row, value, and identifier types that travel through it, and adapter modules that implement the +trait over different engines. +Higher-level crates such as `query-ops` depend on this crate for both the types and the trait. + +### Public API + +| Item | Kind | Description | +|--------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Storage` | trait | Backend-agnostic interface for storing and retrieving rows. Required methods: `create_relation`, `arity`, `scan_iter`, and `transaction`. The rest (`scan`, `scan_where`, `insert`, `delete`) have default implementations. | +| `Transaction` | trait | Atomic batch of inserts and deletes against a `Storage`. `insert` returns a pending `RowId`; `commit` consumes the boxed transaction and returns a `CommittedTx`; dropping without committing rolls back. | +| `CommittedTx` | struct | Result of a successful `Transaction::commit`. Resolves pending `RowId`s returned during the transaction to their post-commit form via `resolve`. Empty for KV adapters where pending equals real; populated for `geomerge`. | +| `StorageError` | enum | Error type returned by every fallible method. Variants: `RelationNotFound`, `RelationExists`, `ArityMismatch`, `Validation`, `Decode`, `Unsupported`, and `Backend`. | +| `CodecError` | enum | Wire-format failure reported as `StorageError::Decode`. Variants describe truncation, unknown tags, length overruns, and UTF-8 errors. | +| `RowStream<'a>` | type alias | `Box), StorageError>> + 'a>`. The value yielded by `Storage::scan_iter` and `Storage::scan_where`. | +| `RowId` | struct | Opaque, backend-assigned row identifier. Bytes are inline up to 36 bytes (covers every encoding the workspace produces today) and spill to the heap otherwise. Construct with `RowId::new(bytes)` or `RowId::from(u64)`. | +| `Value` | enum | Single cell value. Variants: `Int(i64)`, `Str(String)`, and `Id(RowId)`. `Value::Id` is the foreign-key reference used by `geomerge` and any future referencing backend. | +| `Table` | struct | Positional input relation with fixed arity. Produced from a backend scan by `scan_as_table`. Consumed by `query-ops` operators. | +| `scan_as_table(&dyn Storage, &str) -> Result` | function | Materialize a relation from a `Storage` backend into a `Table` for query-language operators. Row IDs are dropped; only cell values remain. | +| `MemoryStorage` | struct | In-process backend kept in `HashMap`s. Always available; useful for tests and snapshot oracles. | +| `adapters::sqlite::SqliteStorage` | struct (feat) | `SQLite`-backed `Storage`, behind the `sqlite` feature. Uses `rusqlite` with bundled libsqlite3; supports a single connection with native write transactions. | +| `adapters::redb::RedbStorage` | struct (feat) | Single-file B-tree backed `Storage`, behind the `redb` feature. Wraps `redb::WriteTransaction` for native atomic commits. | +| `adapters::fjall::FjallStorage` | struct (feat) | LSM-tree backed `Storage`, behind the `fjall` feature. Each relation gets a partition; transactions buffer inserts and apply them on commit. | +| `adapters::lmdb::LmdbStorage` | struct (feat) | mmap'd B-tree backed `Storage`, behind the `lmdb` feature. Wraps `heed`'s `RwTxn` for native atomic commits. | +| `adapters::geomerge::GeomergeStorage` | struct (feat) | CRDT-backed `Storage` over the workspace's `geomerge` crate, behind the `geomerge` feature. Wraps `geomerge::Transaction` and resolves pending row IDs via `CommittedTx`. Deletion is not supported (append-only log). | + +Data types and their relationships: + +
+ + Types + +
+ +### Example + +The example below opens an in-memory backend, declares a relation, inserts two rows inside a single transaction, then scans the result. + +```rust +use storage::value::Value; +use storage::{MemoryStorage, Storage, StorageError}; + +fn i(x: i64) -> Value { + Value::Int(x) +} + +fn main() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(2), i(3)])?; + let committed = tx.commit()?; + // For KV backends pending IDs equal real IDs, so resolve is the identity. + (committed.resolve(&a), committed.resolve(&b)) + }; + + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(2), i(3)])]); + Ok(()) +} +``` + +Swapping `MemoryStorage` for any other adapter (for example `adapters::sqlite::SqliteStorage::open(":memory:")?`) requires no other code changes. + +How a backend is used (logically): + +
+ + Workflow + +
+ +### Run the Tests + +```sh +cargo test -p storage --all-features +``` + +### Notes + +- **Opaque row IDs.** + A `RowId` is a backend-assigned byte sequence; callers do not interpret the bytes. + KV adapters use big-endian `u64`; the `geomerge` adapter encodes a `(CommitHash, counter)` pair. + Hand a `RowId` back to the same backend to reference an existing row. +- **Pending row IDs.** + `Transaction::insert` may return a pending `RowId` that the backend cannot stabilize until commit; this is the case for `geomerge`, where the final + ID depends on the resulting `CommitHash`. + Resolve such IDs through the `CommittedTx` returned by `commit`. + For all KV backends the pending ID is already the real one and `CommittedTx::resolve` is the identity. +- **Streaming first.** + `scan_iter` is the primary scan operation; `scan` defaults to collecting it. + In-memory and LSM backends stream natively; B-tree and SQL backends materialize a `Vec` internally and yield from it to avoid self-referential + iterators. +- **Atomic transactions.** + Adapters with native write transactions (LMDB, redb, `SQLite`, `geomerge`) wrap the engine's transaction directly. + Adapters without (memory, fjall) buffer pending operations and apply them on commit. + Dropping a transaction without calling `commit` rolls back any pending operations. +- **Deletion support.** + Most adapters implement `delete`. + The `geomerge` adapter does not: its append-only commit log returns `StorageError::Unsupported("row deletion")`. +- **Geomerge is alpha.** + The upstream `geomerge` crate is prototype-status and its API may change without notice; treat breakage in `adapters::geomerge` as expected churn + rather than regression. +- **Feature gates.** + `MemoryStorage` is always available. + Every other adapter is feature-gated (`lmdb`, `redb`, `fjall`, `sqlite`, `geomerge`) so callers only pay for what they need. diff --git a/crates/storage/docs/diagrams/make_figures.sh b/crates/storage/docs/diagrams/make_figures.sh new file mode 100755 index 0000000..6d30150 --- /dev/null +++ b/crates/storage/docs/diagrams/make_figures.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# You need to have Graphviz installed to run this script +# On Debian-based OSes, you can install it using: sudo apt-get install graphviz + +# Directory containing .dot files. Defaults to the script's own directory so the +# script works regardless of the caller's working directory. +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +ASSET_DIR=${1:-"${SCRIPT_DIR}"} + +# Make figures from .dot files +for f in "${ASSET_DIR}"/*.dot; do + dot -Tsvg "$f" -o "${f%.dot}.svg" +done diff --git a/crates/storage/docs/diagrams/types.dot b/crates/storage/docs/diagrams/types.dot new file mode 100644 index 0000000..f3e8a50 --- /dev/null +++ b/crates/storage/docs/diagrams/types.dot @@ -0,0 +1,118 @@ +digraph StorageTypes { +fontname = "Helvetica,Arial,sans-serif" +layout = dot +rankdir = TB +ranksep = 0.7; +nodesep = 0.7; +splines = true; +bgcolor = "white" + +node [ +fontname = "Helvetica,Arial,sans-serif", +shape = box, +style = "filled,rounded", +color = "#555555", +fillcolor = "white", +penwidth = 1.5 +] +edge [ +fontname = "Helvetica,Arial,sans-serif", +color = "#333333", +fontsize = 9, +fontcolor = "#555555", +penwidth = 1.2 +] + +storage_node [label = < + + + + + + + + + +
Storage (trait)
create_relation(name, arity)
arity(name)
scan_iter(name) -> RowStream
scan(name) -> Vec<(RowId, Vec<Value>)>
scan_where(name, col, value)
transaction() -> Box<dyn Transaction>
insert(name, row) -> RowId
delete(name, id)
>, fillcolor = "#E8F4FD", color = "#2196F3"] + +transaction_node [label = < + + + + +
Transaction (trait)
insert(name, row) -> RowId
delete(name, id)
commit() -> CommittedTx
>, fillcolor = "#E8F5E9", color = "#4CAF50"] + +committed_tx_node [label = < + + + +
CommittedTx (struct)
resolutions: HashMap<RowId, RowId>
resolve(pending) -> RowId
>, fillcolor = "#E8F5E9", color = "#4CAF50"] + +row_stream_node [label = < + + + + +
RowStream<'a> (type alias)
Box<dyn Iterator<Item =
Result<(RowId, Vec<Value>), StorageError>
> + 'a>
>, fillcolor = "#ECEFF1", color = "#607D8B"] + +row_id_node [label = < + + + +
RowId (struct)
SmallVec<[u8; 36]> (opaque)
new(bytes), as_bytes(), from(u64)
>, fillcolor = "#FFF3E0", color = "#FF9800"] + +value_node [label = < + + + + +
Value (enum)
Int(i64)
Str(String)
Id(RowId)
>, fillcolor = "#FFF3E0", color = "#FF9800"] + +table_node [label = < + + + +
Table (struct)
arity: usize
rows: Vec<Vec<Value>>
>, fillcolor = "#E8F4FD", color = "#2196F3"] + +storage_error_node [label = < + + + + + + + + +
StorageError (enum)
RelationNotFound(String)
RelationExists(String)
ArityMismatch { expected, got }
Validation(String)
Decode(CodecError)
Unsupported(&'static str)
Backend(Box<dyn Error>)
>, fillcolor = "#FBE9E7", color = "#E64A19"] + +codec_error_node [label = < + + + + + +
CodecError (enum)
UnexpectedEof
UnknownTag(u8)
LengthOverrun { declared, available }
InvalidUtf8
>, fillcolor = "#FBE9E7", color = "#E64A19"] + +adapters_node [label = < + + + + + + + +
Adapters (impl Storage)
MemoryStorage
SqliteStorage (feat sqlite)
RedbStorage (feat redb)
FjallStorage (feat fjall)
LmdbStorage (feat lmdb)
GeomergeStorage (feat geomerge)
>, fillcolor = "#F3E5F5", color = "#9C27B0"] + +// composition and produces-edges: arrow X -> Y reads "X yields / contains / depends on Y" +adapters_node -> storage_node [label = "impl", style = "dashed"] +storage_node -> transaction_node [label = "transaction() yields"] +transaction_node -> committed_tx_node [label = "commit() yields"] +committed_tx_node -> row_id_node [label = "resolve() yields"] +storage_node -> row_stream_node [label = "scan_iter yields"] +row_stream_node -> row_id_node [label = "Item = (RowId, _)"] +row_stream_node -> value_node [label = "Item = (_, Vec)"] +transaction_node -> row_id_node [label = "insert() yields"] +value_node -> row_id_node [label = "Id(RowId)"] +table_node -> value_node [label = "Vec>"] +storage_error_node -> codec_error_node [label = "Decode(CodecError)"] +} diff --git a/crates/storage/docs/diagrams/types.svg b/crates/storage/docs/diagrams/types.svg new file mode 100644 index 0000000..3177d12 --- /dev/null +++ b/crates/storage/docs/diagrams/types.svg @@ -0,0 +1,374 @@ + + + + + + + StorageTypes + + + + storage_node + + Storage + + +  (trait) + + + create_relation(name, arity) + + + arity(name) + + + scan_iter(name) -> RowStream + + + scan(name) -> Vec<(RowId, Vec<Value>)> + + + scan_where(name, col, value) + + + transaction() -> Box<dyn Transaction> + + + insert(name, row) -> RowId + + + delete(name, id) + + + + + transaction_node + + Transaction + + +  (trait) + + + insert(name, row) -> RowId + + + delete(name, id) + + + commit() -> CommittedTx + + + + + storage_node->transaction_node + + + transaction() yields + + + + + row_stream_node + + RowStream<'a> + + +  (type alias) + + + Box<dyn Iterator<Item = + + +  Result<(RowId, Vec<Value>), StorageError> + + + > + 'a> + + + + + storage_node->row_stream_node + + + scan_iter yields + + + + + committed_tx_node + + CommittedTx + + +  (struct) + + + resolutions: HashMap<RowId, RowId> + + + resolve(pending) -> RowId + + + + + transaction_node->committed_tx_node + + + commit() yields + + + + + row_id_node + + RowId + + +  (struct) + + + SmallVec<[u8; 36]> (opaque) + + + new(bytes), as_bytes(), from(u64) + + + + + transaction_node->row_id_node + + + insert() yields + + + + + committed_tx_node->row_id_node + + + resolve() yields + + + + + row_stream_node->row_id_node + + + Item = (RowId, _) + + + + + value_node + + Value + + +  (enum) + + + Int(i64) + + + Str(String) + + + Id(RowId) + + + + + row_stream_node->value_node + + + Item = (_, Vec<Value>) + + + + + value_node->row_id_node + + + Id(RowId) + + + + + table_node + + Table + + +  (struct) + + + arity: usize + + + rows: Vec<Vec<Value>> + + + + + table_node->value_node + + + Vec<Vec<Value>> + + + + + storage_error_node + + StorageError + + +  (enum) + + + RelationNotFound(String) + + + RelationExists(String) + + + ArityMismatch { expected, got } + + + Validation(String) + + + Decode(CodecError) + + + Unsupported(&'static str) + + + Backend(Box<dyn Error>) + + + + + codec_error_node + + CodecError + + +  (enum) + + + UnexpectedEof + + + UnknownTag(u8) + + + LengthOverrun { declared, available } + + + InvalidUtf8 + + + + + storage_error_node->codec_error_node + + + Decode(CodecError) + + + + + adapters_node + + Adapters + + +  (impl Storage) + + + MemoryStorage + + + SqliteStorage  (feat sqlite) + + + RedbStorage  (feat redb) + + + FjallStorage  (feat fjall) + + + LmdbStorage  (feat lmdb) + + + GeomergeStorage  (feat geomerge) + + + + + adapters_node->storage_node + + + impl + + + + diff --git a/crates/storage/docs/diagrams/workflow.dot b/crates/storage/docs/diagrams/workflow.dot new file mode 100644 index 0000000..2faf099 --- /dev/null +++ b/crates/storage/docs/diagrams/workflow.dot @@ -0,0 +1,135 @@ +digraph StorageWorkflow { +fontname = "Helvetica,Arial,sans-serif" +layout = dot +rankdir = LR +ranksep = 0.9; +nodesep = 0.7; +splines = true; +compound = true; +bgcolor = "white" + +node [ +fontname = "Helvetica,Arial,sans-serif", +shape = box, +style = "filled,rounded", +color = "#555555", +fillcolor = "white", +penwidth = 1.5 +] +edge [ +fontname = "Helvetica,Arial,sans-serif", +color = "#333333", +fontsize = 9, +fontcolor = "#555555", +labeldistance = 2.0, +penwidth = 1.2 +] + +subgraph cluster_inputs { +label = "Inputs" +style = "dashed" +color = "#888888" +fontcolor = "#555555" +margin = 18 +schema [label = < + + + +
Schema
• relation name
• arity (column count)
>, fillcolor = "#E8F4FD", color = "#2196F3"] +row_data [label = < + + + +
Row Data
• Vec<Value>
• Int / Str / Id(RowId)
>, fillcolor = "#E8F4FD", color = "#2196F3"] +} + +subgraph cluster_setup { +label = "Setup (open backend, declare relations)" +style = "dashed" +color = "#9C27B0" +fontcolor = "#7B1FA2" +margin = 14 +open_backend [label = < + + + + +
Open Backend
MemoryStorage::new() /
SqliteStorage::open(path) /
FjallStorage::open(path) / ...
>, fillcolor = "#F3E5F5", color = "#9C27B0"] +create_relation [label = "storage.create_relation(name, arity)", fillcolor = "#F3E5F5", color = "#9C27B0"] +} + +subgraph cluster_write { +label = "Write (atomic batch via Transaction)" +style = "dashed" +color = "#4CAF50" +fontcolor = "#388E3C" +margin = 14 +begin_tx [label = "storage.transaction()\n-> Box<dyn Transaction>", fillcolor = "#E8F5E9", color = "#4CAF50"] +tx_ops [label = < + + + + +
tx.insert / tx.delete
• insert yields pending RowId
• pending RowIds reused as FKs
• delete by RowId
>, fillcolor = "#E8F5E9", color = "#4CAF50", shape = box] +commit [label = < + + + + + +
tx.commit()
• native commit (LMDB, redb, SQLite, geomerge)
• buffered apply (memory, fjall)
• law validation (geomerge)
• yields CommittedTx
>, fillcolor = "#E8F5E9", color = "#4CAF50", shape = box] +resolve_ids [label = < + + + +
CommittedTx::resolve
• KV: pending == real
• geomerge: pending counter → (commit, counter)
>, fillcolor = "#E8F5E9", color = "#4CAF50", shape = box] +} + +subgraph cluster_read { +label = "Read" +style = "dashed" +color = "#FF9800" +fontcolor = "#F57C00" +margin = 14 +scan_iter [label = "storage.scan_iter(name)\n-> RowStream", fillcolor = "#FFF3E0", color = "#FF9800"] +scan_where [label = "storage.scan_where(name, col, value)\n-> RowStream (filtered)", fillcolor = "#FFF3E0", color = "#FF9800"] +scan_full [label = "storage.scan(name)\n-> Vec<(RowId, Vec<Value>)>", fillcolor = "#FFF3E0", color = "#FF9800"] +} + +subgraph cluster_output { +label = "Output" +style = "dashed" +color = "#888888" +fontcolor = "#555555" +margin = 18 +rows_out [label = < + + + + +
Rows
• (RowId, Vec<Value>)
• consumed by query-ops
via scan_as_table
>, fillcolor = "#ECEFF1", color = "#607D8B"] +} + +// Setup +schema -> create_relation [color = "#2196F3"] +open_backend -> create_relation [color = "#9C27B0"] + +// Write path +create_relation -> begin_tx [color = "#4CAF50"] +begin_tx -> tx_ops [color = "#4CAF50"] +row_data -> tx_ops [style = "dashed", color = "#2196F3"] +tx_ops -> commit [color = "#4CAF50"] +commit -> resolve_ids [label = "CommittedTx", color = "#4CAF50"] + +// Read path +create_relation -> scan_iter [style = "dashed", color = "#9C27B0"] +create_relation -> scan_where [style = "dashed", color = "#9C27B0"] +create_relation -> scan_full [style = "dashed", color = "#9C27B0"] +commit -> scan_iter [style = "dashed", label = "after commit", color = "#4CAF50"] + +// Output +scan_iter -> rows_out [color = "#FF9800"] +scan_where -> rows_out [color = "#FF9800"] +scan_full -> rows_out [color = "#FF9800"] +resolve_ids -> rows_out [style = "dashed", label = "real RowIds", color = "#4CAF50"] +} diff --git a/crates/storage/docs/diagrams/workflow.svg b/crates/storage/docs/diagrams/workflow.svg new file mode 100644 index 0000000..c7f3f41 --- /dev/null +++ b/crates/storage/docs/diagrams/workflow.svg @@ -0,0 +1,359 @@ + + + + + + + StorageWorkflow + + + cluster_inputs + + Inputs + + + + cluster_setup + + Setup  (open backend, declare relations) + + + + cluster_write + + Write  (atomic batch via Transaction) + + + + cluster_read + + Read + + + + cluster_output + + Output + + + + + schema + + Schema + + • + relation name + + • + arity (column count) + + + + + create_relation + + + storage.create_relation(name, arity) + + + + + schema->create_relation + + + + + + row_data + + Row Data + + • + Vec<Value> + + • + Int / Str / Id(RowId) + + + + + tx_ops + + tx.insert / tx.delete + + + • insert yields pending RowId + + + • pending RowIds reused as FKs + + + • delete by RowId + + + + + row_data->tx_ops + + + + + + open_backend + + Open Backend + + + MemoryStorage::new() / + + + SqliteStorage::open(path) / + + + FjallStorage::open(path) / ... + + + + + open_backend->create_relation + + + + + + begin_tx + + + storage.transaction() + + + -> Box<dyn Transaction> + + + + + create_relation->begin_tx + + + + + + scan_iter + + + storage.scan_iter(name) + + + -> RowStream + + + + + create_relation->scan_iter + + + + + + scan_where + + + storage.scan_where(name, col, value) + + + -> RowStream  (filtered) + + + + + create_relation->scan_where + + + + + + scan_full + + + storage.scan(name) + + + -> Vec<(RowId, Vec<Value>)> + + + + + create_relation->scan_full + + + + + + begin_tx->tx_ops + + + + + + commit + + tx.commit() + + • native commit (LMDB, redb, SQLite, geomerge) + + • buffered apply (memory, fjall) + + • law validation (geomerge) + + • yields CommittedTx + + + + + tx_ops->commit + + + + + + resolve_ids + + CommittedTx::resolve + + + • KV: pending == real + + + • geomerge: pending counter → (commit, counter) + + + + + commit->resolve_ids + + + CommittedTx + + + + + commit->scan_iter + + + after commit + + + + + rows_out + + Rows + + • (RowId, Vec<Value>) + + • consumed by query-ops + +  via scan_as_table + + + + + resolve_ids->rows_out + + + real RowIds + + + + + scan_iter->rows_out + + + + + + scan_where->rows_out + + + + + + scan_full->rows_out + + + + + diff --git a/crates/storage/src/adapters.rs b/crates/storage/src/adapters.rs new file mode 100644 index 0000000..60c5b1b --- /dev/null +++ b/crates/storage/src/adapters.rs @@ -0,0 +1,22 @@ +//! Backend adapters for the [`Storage`](crate::Storage) trait. +//! +//! Each module here implements [`Storage`](crate::Storage) over a different +//! engine. [`memory`] is always available; the rest are gated behind Cargo +//! features so users only pay for what they need. + +pub mod memory; + +#[cfg(feature = "sqlite")] +pub mod sqlite; + +#[cfg(feature = "redb")] +pub mod redb; + +#[cfg(feature = "fjall")] +pub mod fjall; + +#[cfg(feature = "lmdb")] +pub mod lmdb; + +#[cfg(feature = "geomerge")] +pub mod geomerge; diff --git a/crates/storage/src/adapters/fjall.rs b/crates/storage/src/adapters/fjall.rs new file mode 100644 index 0000000..986b76e --- /dev/null +++ b/crates/storage/src/adapters/fjall.rs @@ -0,0 +1,253 @@ +//! fjall adapter. +//! +//! Each relation gets a fjall [`PartitionHandle`](fjall::PartitionHandle) of +//! the same name. A reserved partition named `__meta` carries per-relation +//! metadata (arity and next synthetic row ID). +//! +//! fjall has no native cross-partition write transactions, so the adapter +//! buffers inserts inside [`FjallTx`] and applies them when +//! [`Transaction::commit`] is called; dropping the tx without commit discards +//! the buffer. + +use std::collections::HashMap; + +use fjall::{Keyspace, PartitionCreateOptions, PartitionHandle}; + +use crate::codec::{decode_meta, decode_row, encode_meta, encode_row}; +use crate::id::RowId; +use crate::value::Value; +use crate::{backend, CommittedTx, RowStream, Storage, StorageError, Transaction}; + +const META_PARTITION: &str = "__meta"; + +/// fjall-backed [`Storage`] implementation. +pub struct FjallStorage { + keyspace: Keyspace, + meta: PartitionHandle, +} + +impl FjallStorage { + /// Open or create a fjall keyspace at `path`. + /// + /// # Errors + /// Returns [`StorageError::Backend`] if fjall fails to open the path. + pub fn open(path: impl AsRef) -> Result { + let keyspace = fjall::Config::new(path).open().map_err(backend)?; + let meta = keyspace + .open_partition(META_PARTITION, PartitionCreateOptions::default()) + .map_err(backend)?; + Ok(Self { keyspace, meta }) + } + + fn relation_partition(&self, name: &str) -> Result { + self.keyspace + .open_partition(name, PartitionCreateOptions::default()) + .map_err(backend) + } +} + +impl Storage for FjallStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + if name == META_PARTITION { + return Err(StorageError::Validation(format!( + "relation name '{name}' is reserved" + ))); + } + if self.meta.contains_key(name.as_bytes()).map_err(backend)? { + return Err(StorageError::RelationExists(name.to_string())); + } + let Ok(arity_u32) = u32::try_from(arity) else { + unreachable!("arity exceeds u32::MAX") + }; + self.meta + .insert(name.as_bytes(), encode_meta(arity_u32, 0)) + .map_err(backend)?; + let _ = self.relation_partition(name)?; + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + let raw = self + .meta + .get(name.as_bytes()) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let (arity, _) = decode_meta(raw.as_ref())?; + Ok(arity as usize) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let _ = self.arity(name)?; + let partition = self.relation_partition(name)?; + let iter = partition.iter().map(|res| { + let (key, value) = res.map_err(backend)?; + Ok((RowId::new(key.as_ref()), decode_row(value.as_ref())?)) + }); + Ok(Box::new(iter)) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + Ok(Box::new(FjallTx { + keyspace: &self.keyspace, + meta: &self.meta, + pending: Vec::new(), + deletes: Vec::new(), + next_ids: HashMap::new(), + })) + } +} + +pub(crate) struct FjallTx<'a> { + keyspace: &'a Keyspace, + meta: &'a PartitionHandle, + pending: Vec<(String, RowId, Vec)>, + deletes: Vec<(String, RowId)>, + next_ids: HashMap, +} + +impl FjallTx<'_> { + fn meta_for(&mut self, name: &str) -> Result<(u32, u64), StorageError> { + if let Some(&entry) = self.next_ids.get(name) { + return Ok(entry); + } + let raw = self + .meta + .get(name.as_bytes()) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let entry = decode_meta(raw.as_ref())?; + self.next_ids.insert(name.to_string(), entry); + Ok(entry) + } +} + +impl Transaction for FjallTx<'_> { + fn insert(&mut self, name: &str, row: Vec) -> Result { + let (arity, next_id) = self.meta_for(name)?; + if row.len() != arity as usize { + return Err(StorageError::ArityMismatch { + expected: arity as usize, + got: row.len(), + }); + } + let id = RowId::from(next_id); + self.next_ids.insert(name.to_string(), (arity, next_id + 1)); + self.pending.push((name.to_string(), id.clone(), row)); + Ok(id) + } + + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + if !self.meta.contains_key(name.as_bytes()).map_err(backend)? { + return Err(StorageError::RelationNotFound(name.to_string())); + } + self.deletes.push((name.to_string(), id.clone())); + Ok(()) + } + + fn commit(self: Box) -> Result { + let FjallTx { + keyspace, + meta, + pending, + deletes, + next_ids, + } = *self; + for (name, id, row) in pending { + let partition = keyspace + .open_partition(&name, PartitionCreateOptions::default()) + .map_err(backend)?; + partition + .insert(id.as_bytes(), encode_row(&row)) + .map_err(backend)?; + } + for (name, id) in deletes { + let partition = keyspace + .open_partition(&name, PartitionCreateOptions::default()) + .map_err(backend)?; + partition.remove(id.as_bytes()).map_err(backend)?; + } + for (name, (arity, next_id)) in next_ids { + meta.insert(name.as_bytes(), encode_meta(arity, next_id)) + .map_err(backend)?; + } + Ok(CommittedTx::empty()) + } +} + +#[cfg(test)] +mod tests { + fn i(x: i64) -> Value { + Value::Int(x) + } + + fn open_temp() -> Result { + let dir = tempfile::tempdir().map_err(backend)?; + let storage = FjallStorage::open(dir.path())?; + std::mem::forget(dir); + Ok(storage) + } + + #[test] + fn create_insert_scan_roundtrip() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let id0 = storage.insert("edge", vec![i(1), i(2)])?; + let id1 = storage.insert("edge", vec![i(2), i(3)])?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(id0, vec![i(1), i(2)]), (id1, vec![i(2), i(3)])]); + assert_eq!(storage.arity("edge")?, 2); + Ok(()) + } + + #[test] + fn batched_inserts_share_one_commit() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(3), i(4)])?; + tx.commit()?; + (a, b) + }; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(3), i(4)])]); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + } + assert!(storage.scan("edge")?.is_empty()); + Ok(()) + } + + #[test] + fn delete_removes_row() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let b = storage.insert("edge", vec![i(2)])?; + storage.delete("edge", &a)?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(b, vec![i(2)])]); + storage.delete("edge", &a)?; + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } +} diff --git a/crates/storage/src/adapters/geomerge.rs b/crates/storage/src/adapters/geomerge.rs new file mode 100644 index 0000000..f6c3efa --- /dev/null +++ b/crates/storage/src/adapters/geomerge.rs @@ -0,0 +1,458 @@ +//! Geomerge adapter. +//! +//! Geomerge schemas are immutable after store construction: there is no +//! public API to register a new table on a live `Store`. The adapter expects +//! all relations to be declared up front via a `FlatTheory` passed to +//! [`GeomergeStorage::from_theory`] (or a pre-built `Store` via +//! [`GeomergeStorage::from_store`]). [`Storage::create_relation`] is then a +//! verifier that the relation exists and that its arity matches. +//! +//! ## Deletion +//! +//! Geomerge upstream's `txn::ops::Op` enum currently has only `Op::Add`; +//! there is no retract or remove op. The adapter therefore uses the default +//! [`Transaction::delete`] implementation, which returns +//! [`StorageError::Backend`]. +//! +//! ## Row Identifier Encoding +//! +//! Geomerge's [`RowId`](geomerge::table::RowId) is `{ commit: CommitHash, counter: u32 }`. +//! The adapter uses two encodings under [`crate::id::RowId`]: +//! +//! - **Existing (36 bytes):** 32-byte commit hash followed by a 4-byte BE +//! counter. Returned by [`Storage::scan`] and stable across calls. +//! - **Pending (4 bytes):** just a 4-byte BE counter, returned by +//! [`Transaction::insert`] to reference an in-flight row from later +//! inserts in the same transaction. Pending ids become invalid after +//! commit; post-commit identifiers must be looked up via +//! [`Storage::scan`]. +//! +//! Foreign-key references inside a transaction work because geomerge's +//! `TxnCellValue::Id(RowRef::Pending(TempRowId(counter)))` accepts the +//! counter the adapter tracked locally. The local counter is assumed to +//! match geomerge's internal `TempRowId` counter (both start at 0 and +//! increment per `Transaction::add`). + +use std::collections::{HashMap, HashSet}; + +use geomerge::commit::hash::CommitHash; +use geomerge::ir::{self, Path}; +use geomerge::store::Store; +use geomerge::table::{CellValue, RowId as GmRowId}; +use geomerge::txn::ops::{RowRef, TempRowId, TxnCellValue}; + +use crate::id::RowId; +use crate::value::Value; +use crate::{backend, CommittedTx, RowStream, Storage, StorageError, Transaction}; + +const GM_ROW_ID_LEN: usize = 32 + 4; +const PENDING_ROW_ID_LEN: usize = 4; + +fn validation(msg: impl Into) -> StorageError { + StorageError::Validation(msg.into()) +} + +fn encode_gm_row_id(id: &GmRowId) -> RowId { + let mut bytes = [0u8; GM_ROW_ID_LEN]; + bytes[..32].copy_from_slice(&id.commit.0); + bytes[32..].copy_from_slice(&id.counter.to_be_bytes()); + RowId::new(bytes) +} + +fn decode_gm_row_id(bytes: &[u8]) -> Result { + if bytes.len() != GM_ROW_ID_LEN { + return Err(validation(format!( + "expected {GM_ROW_ID_LEN}-byte geomerge RowId, got {} bytes", + bytes.len() + ))); + } + let mut hash = [0u8; 32]; + hash.copy_from_slice(&bytes[..32]); + let mut counter_buf = [0u8; 4]; + counter_buf.copy_from_slice(&bytes[32..]); + Ok(GmRowId { + commit: CommitHash(hash), + counter: u32::from_be_bytes(counter_buf), + }) +} + +fn encode_pending_row_id(counter: u32) -> RowId { + RowId::new(counter.to_be_bytes()) +} + +fn decode_pending_row_id(bytes: &[u8]) -> Result { + if bytes.len() != PENDING_ROW_ID_LEN { + return Err(validation(format!( + "expected {PENDING_ROW_ID_LEN}-byte pending RowId, got {} bytes", + bytes.len() + ))); + } + let mut counter_buf = [0u8; 4]; + counter_buf.copy_from_slice(bytes); + Ok(TempRowId::from(u32::from_be_bytes(counter_buf))) +} + +/// Geomerge-backed [`Storage`] implementation. +pub struct GeomergeStorage { + store: Store, + declared: HashSet, +} + +impl Default for GeomergeStorage { + fn default() -> Self { + Self::new() + } +} + +impl GeomergeStorage { + /// Build an empty store. No relations are available until the store is + /// rebuilt via a theory. + #[must_use] + pub fn new() -> Self { + Self { + store: Store::new(), + declared: HashSet::new(), + } + } + + /// Build a store from a pre-defined `FlatTheory`. All `create_relation` + /// calls must reference relations declared in the theory. + /// + /// # Errors + /// Returns [`StorageError::Backend`] if geomerge rejects the theory. + pub fn from_theory(theory: ir::FlatTheory) -> Result { + let store = Store::try_from_theory(theory).map_err(|e| backend(*e))?; + Ok(Self { + store, + declared: HashSet::new(), + }) + } + + /// Wrap an existing `Store`, e.g. after decoding via + /// `geomerge::commit::pst::decode_store`. + #[must_use] + pub fn from_store(store: Store) -> Self { + Self { + store, + declared: HashSet::new(), + } + } + + /// Borrow the underlying geomerge store (for backend-specific operations + /// like persistence, dump, or law inspection that aren't on the trait). + #[must_use] + pub fn store(&self) -> &Store { + &self.store + } +} + +impl Storage for GeomergeStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + if self.declared.contains(name) { + return Err(StorageError::RelationExists(name.to_string())); + } + let path: Path = name.into(); + let table = self.store.table_at(&path).ok_or_else(|| { + validation(format!( + "relation '{name}' is not declared in the loaded geomerge theory; \ + geomerge does not support runtime relation creation" + )) + })?; + let declared_arity = table.schema().columns.len(); + if declared_arity != arity { + return Err(StorageError::ArityMismatch { + expected: declared_arity, + got: arity, + }); + } + self.declared.insert(name.to_string()); + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + let path: Path = name.into(); + let table = self + .store + .table_at(&path) + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + Ok(table.schema().columns.len()) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let path: Path = name.into(); + let table = self + .store + .table_at(&path) + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let arity = table.schema().columns.len(); + let mut rows = Vec::with_capacity(table.row_count()); + for r in 0..table.row_count() { + let gm_id = table + .row_id_at(r) + .ok_or_else(|| validation(format!("missing row id at {r} in '{name}'")))?; + let id = encode_gm_row_id(&gm_id); + let mut row = Vec::with_capacity(arity); + for c in 0..arity { + let cell = table + .cell_at(r, c) + .ok_or_else(|| validation(format!("missing cell at ({r}, {c}) in '{name}'")))?; + row.push(cell_to_value(cell)); + } + rows.push((id, row)); + } + Ok(Box::new(rows.into_iter().map(Ok))) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + let txn = self.store.transaction(); + Ok(Box::new(GeomergeTx { + txn: Some(txn), + counter: 0, + })) + } +} + +pub(crate) struct GeomergeTx<'a> { + txn: Option>, + /// Mirrors geomerge's internal `TempRowId` counter for this transaction. + /// Both start at 0 and increment by 1 per `Transaction::add`. + counter: u32, +} + +impl Transaction for GeomergeTx<'_> { + fn insert(&mut self, name: &str, row: Vec) -> Result { + let path: Path = name.into(); + let values: Vec = row + .into_iter() + .map(value_to_txn_cell) + .collect::, _>>()?; + let Some(txn) = self.txn.as_mut() else { + unreachable!("transaction was already committed") + }; + txn.add(&path, values) + .map_err(|e| validation(e.to_string()))?; + let id = encode_pending_row_id(self.counter); + self.counter += 1; + Ok(id) + } + + fn commit(self: Box) -> Result { + let mut this = self; + let Some(txn) = this.txn.take() else { + unreachable!("transaction was already committed") + }; + // Law violations (totality, foreign-key, etc.) surface here. + let commit_hash = txn.commit().map_err(|e| validation(e.to_string()))?; + // Every counter we returned during this tx (0..self.counter) now + // corresponds to a real RowId { commit: commit_hash, counter }. + let mut resolutions = HashMap::with_capacity(this.counter as usize); + for counter in 0..this.counter { + let pending = encode_pending_row_id(counter); + let real = encode_gm_row_id(&GmRowId { + commit: commit_hash, + counter, + }); + resolutions.insert(pending, real); + } + Ok(CommittedTx::from_mappings(resolutions)) + } +} + +fn cell_to_value(cell: &CellValue) -> Value { + match cell { + CellValue::Int(i) => Value::Int(*i), + CellValue::Str(s) => Value::Str(s.clone()), + CellValue::Id(id) => Value::Id(encode_gm_row_id(id)), + } +} + +fn value_to_txn_cell(value: Value) -> Result { + match value { + Value::Int(i) => Ok(TxnCellValue::Int(i)), + Value::Str(s) => Ok(TxnCellValue::Str(s)), + Value::Id(id) => { + let bytes = id.as_bytes(); + match bytes.len() { + PENDING_ROW_ID_LEN => Ok(TxnCellValue::Id(RowRef::Pending(decode_pending_row_id( + bytes, + )?))), + GM_ROW_ID_LEN => Ok(TxnCellValue::Id(RowRef::Existing(decode_gm_row_id(bytes)?))), + len => Err(validation(format!( + "geomerge RowId must be {PENDING_ROW_ID_LEN} (pending) or \ + {GM_ROW_ID_LEN} (existing) bytes, got {len}" + ))), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use geomerge::ir::{ColType, FlatTheory, PrimType, Schema, TableEntry}; + + fn i(x: i64) -> Value { + Value::Int(x) + } + + fn int_schema(arity: usize) -> Schema { + Schema { + columns: (0..arity) + .map(|_| ColType::PrimType { + prim: PrimType::PrimInt, + }) + .collect(), + primary_key: None, + } + } + + fn theory_with_one_int_table(name: &str, arity: usize) -> FlatTheory { + FlatTheory { + tables: vec![TableEntry { + path: name.into(), + table: int_schema(arity), + }], + laws: Vec::new(), + } + } + + #[test] + fn empty_store_has_no_relations() { + let storage = GeomergeStorage::new(); + assert!(matches!( + storage.arity("edge"), + Err(StorageError::RelationNotFound(_)) + )); + } + + #[test] + fn create_relation_on_undeclared_returns_validation_error() { + let mut storage = GeomergeStorage::new(); + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::Validation(_)) + )); + } + + #[test] + fn theory_loaded_insert_scan_roundtrip() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 2); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 2)?; + storage.insert("edge", vec![i(1), i(2)])?; + storage.insert("edge", vec![i(3), i(4)])?; + let rows = storage.scan("edge")?; + assert_eq!(rows.len(), 2); + assert_eq!(rows[0].1, vec![i(1), i(2)]); + assert_eq!(rows[1].1, vec![i(3), i(4)]); + // Scanned IDs use the 36-byte existing form. + assert_eq!(rows[0].0.as_bytes().len(), GM_ROW_ID_LEN); + assert_eq!(rows[1].0.as_bytes().len(), GM_ROW_ID_LEN); + Ok(()) + } + + #[test] + fn single_storage_insert_returns_post_commit_row_id() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 1); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 1)?; + // `Storage::insert` opens its own tx, commits, and resolves the + // pending RowId to its post-commit form. The returned id should be + // the 36-byte (existing) shape, not the 4-byte (pending) shape. + let id = storage.insert("edge", vec![i(1)])?; + assert_eq!(id.as_bytes().len(), GM_ROW_ID_LEN); + // And it should equal what scan returns. + let rows = storage.scan("edge")?; + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].0, id); + Ok(()) + } + + #[test] + fn committed_tx_resolves_pending_ids() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 1); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 1)?; + let (committed, pending_a, pending_b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1)])?; + let b = tx.insert("edge", vec![i(2)])?; + (tx.commit()?, a, b) + }; + let real_a = committed.resolve(&pending_a); + let real_b = committed.resolve(&pending_b); + assert_eq!(real_a.as_bytes().len(), GM_ROW_ID_LEN); + assert_eq!(real_b.as_bytes().len(), GM_ROW_ID_LEN); + assert_ne!(real_a, real_b); + // The resolved ids should match what scan reports. + let rows = storage.scan("edge")?; + assert!(rows.iter().any(|(id, _)| id == &real_a)); + assert!(rows.iter().any(|(id, _)| id == &real_b)); + Ok(()) + } + + #[test] + fn batched_inserts_in_one_transaction() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 2); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + let p0 = tx.insert("edge", vec![i(1), i(2)])?; + let p1 = tx.insert("edge", vec![i(3), i(4)])?; + // Pending ids are 4-byte counters within the tx. + assert_eq!(p0.as_bytes().len(), PENDING_ROW_ID_LEN); + assert_eq!(p1.as_bytes().len(), PENDING_ROW_ID_LEN); + tx.commit()?; + } + let rows = storage.scan("edge")?; + assert_eq!(rows.len(), 2); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 2); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + } + assert!(storage.scan("edge")?.is_empty()); + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 2); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } + + #[test] + fn delete_is_not_supported() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 1); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 1)?; + let id = storage.insert("edge", vec![i(1)])?; + let result = storage.delete("edge", &id); + assert!(matches!(result, Err(StorageError::Unsupported(_)))); + Ok(()) + } + + #[test] + fn insert_wrong_type_returns_validation_error() -> Result<(), StorageError> { + let theory = theory_with_one_int_table("edge", 2); + let mut storage = GeomergeStorage::from_theory(theory)?; + storage.create_relation("edge", 2)?; + let result = storage.insert("edge", vec![Value::Str("not an int".to_string()), i(2)]); + assert!(matches!(result, Err(StorageError::Validation(_)))); + Ok(()) + } +} diff --git a/crates/storage/src/adapters/lmdb.rs b/crates/storage/src/adapters/lmdb.rs new file mode 100644 index 0000000..08fa324 --- /dev/null +++ b/crates/storage/src/adapters/lmdb.rs @@ -0,0 +1,322 @@ +//! LMDB adapter via the `heed` crate. +//! +//! Maps each relation onto a named LMDB sub-database of the same name. A +//! reserved sub-database named `__meta` carries per-relation metadata (arity +//! and next synthetic row ID). +//! +//! [`LmdbTx`] wraps a real `heed::RwTxn`. Inserts go through the transaction; +//! [`Transaction::commit`] commits it; dropping the tx without commit lets +//! `heed` abort the transaction. + +use std::collections::HashMap; + +use heed::types::Bytes; +use heed::{Database, Env, EnvOpenOptions, RwTxn}; + +use crate::codec::{decode_meta, decode_row, encode_meta, encode_row, row_key}; +use crate::id::RowId; +use crate::value::Value; +use crate::{backend, CommittedTx, RowStream, Storage, StorageError, Transaction}; + +const META_DB: &str = "__meta"; +const DEFAULT_MAX_DBS: u32 = 128; +const DEFAULT_MAP_SIZE: usize = 100 * 1024 * 1024; + +/// LMDB-backed [`Storage`] implementation. +pub struct LmdbStorage { + env: Env, + meta: Database, +} + +impl LmdbStorage { + /// Open or create an LMDB environment at `path`. + /// + /// # Errors + /// Returns [`StorageError::Backend`] if LMDB fails to open. + /// + /// # Safety + /// Uses `EnvOpenOptions::open`, which `heed` marks unsafe because the + /// memory-mapped file's contents can be modified by other processes, + /// violating Rust's aliasing rules. This adapter assumes single-process + /// exclusive access to the path. + #[allow(unsafe_code)] + pub fn open(path: impl AsRef) -> Result { + // SAFETY: see method-level doc above. + let env = unsafe { + EnvOpenOptions::new() + .max_dbs(DEFAULT_MAX_DBS) + .map_size(DEFAULT_MAP_SIZE) + .open(path) + .map_err(backend)? + }; + let mut wtxn = env.write_txn().map_err(backend)?; + let meta: Database = env + .create_database(&mut wtxn, Some(META_DB)) + .map_err(backend)?; + wtxn.commit().map_err(backend)?; + Ok(Self { env, meta }) + } +} + +impl Storage for LmdbStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + if name == META_DB { + return Err(StorageError::Validation(format!( + "relation name '{name}' is reserved" + ))); + } + let Ok(arity_u32) = u32::try_from(arity) else { + unreachable!("arity exceeds u32::MAX") + }; + let mut wtxn = self.env.write_txn().map_err(backend)?; + if self + .meta + .get(&wtxn, name.as_bytes()) + .map_err(backend)? + .is_some() + { + return Err(StorageError::RelationExists(name.to_string())); + } + let encoded = encode_meta(arity_u32, 0); + self.meta + .put(&mut wtxn, name.as_bytes(), &encoded[..]) + .map_err(backend)?; + let _ = self + .env + .create_database::(&mut wtxn, Some(name)) + .map_err(backend)?; + wtxn.commit().map_err(backend)?; + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + let rtxn = self.env.read_txn().map_err(backend)?; + let raw = self + .meta + .get(&rtxn, name.as_bytes()) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let (arity, _) = decode_meta(raw)?; + Ok(arity as usize) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let rtxn = self.env.read_txn().map_err(backend)?; + if self + .meta + .get(&rtxn, name.as_bytes()) + .map_err(backend)? + .is_none() + { + return Err(StorageError::RelationNotFound(name.to_string())); + } + let db: Database = self + .env + .open_database(&rtxn, Some(name)) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let mut rows = Vec::new(); + for entry in db.iter(&rtxn).map_err(backend)? { + let (key, value) = entry.map_err(backend)?; + rows.push((RowId::new(key), decode_row(value)?)); + } + Ok(Box::new(rows.into_iter().map(Ok))) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + let wtxn = self.env.write_txn().map_err(backend)?; + Ok(Box::new(LmdbTx { + env: &self.env, + meta: self.meta, + wtxn: Some(wtxn), + dbs: HashMap::new(), + next_ids: HashMap::new(), + })) + } +} + +pub(crate) struct LmdbTx<'a> { + env: &'a Env, + meta: Database, + wtxn: Option>, + /// Per-relation sub-database handles opened within this transaction. + dbs: HashMap>, + next_ids: HashMap, +} + +impl Transaction for LmdbTx<'_> { + fn insert(&mut self, name: &str, row: Vec) -> Result { + // Load meta on first access to this relation; subsequent calls within + // the tx read the cached entry. + let (arity, next_id) = if let Some(&entry) = self.next_ids.get(name) { + entry + } else { + let Some(wtxn) = self.wtxn.as_ref() else { + unreachable!("transaction was already committed") + }; + let raw = self + .meta + .get(wtxn, name.as_bytes()) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let entry = decode_meta(raw)?; + self.next_ids.insert(name.to_string(), entry); + entry + }; + if row.len() != arity as usize { + return Err(StorageError::ArityMismatch { + expected: arity as usize, + got: row.len(), + }); + } + // Open the per-relation sub-database (cached for subsequent inserts). + let db = if let Some(&db) = self.dbs.get(name) { + db + } else { + let Some(wtxn) = self.wtxn.as_mut() else { + unreachable!("transaction was already committed") + }; + let db = self + .env + .create_database::(wtxn, Some(name)) + .map_err(backend)?; + self.dbs.insert(name.to_string(), db); + db + }; + let key = row_key(next_id); + let value = encode_row(&row); + let Some(wtxn) = self.wtxn.as_mut() else { + unreachable!("transaction was already committed") + }; + db.put(wtxn, &key[..], &value[..]).map_err(backend)?; + self.next_ids.insert(name.to_string(), (arity, next_id + 1)); + Ok(RowId::from(next_id)) + } + + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + // Verify relation existence via meta. + let Some(wtxn) = self.wtxn.as_ref() else { + unreachable!("transaction was already committed") + }; + if self + .meta + .get(wtxn, name.as_bytes()) + .map_err(backend)? + .is_none() + { + return Err(StorageError::RelationNotFound(name.to_string())); + } + // Open or reuse the per-relation sub-database. + let db = if let Some(&db) = self.dbs.get(name) { + db + } else { + let Some(wtxn) = self.wtxn.as_mut() else { + unreachable!("transaction was already committed") + }; + let db = self + .env + .create_database::(wtxn, Some(name)) + .map_err(backend)?; + self.dbs.insert(name.to_string(), db); + db + }; + let Some(wtxn) = self.wtxn.as_mut() else { + unreachable!("transaction was already committed") + }; + let _ = db.delete(wtxn, id.as_bytes()).map_err(backend)?; + Ok(()) + } + + fn commit(self: Box) -> Result { + let mut this = self; + let Some(mut wtxn) = this.wtxn.take() else { + unreachable!("transaction was already committed") + }; + for (name, (arity, next_id)) in this.next_ids.drain() { + let encoded = encode_meta(arity, next_id); + this.meta + .put(&mut wtxn, name.as_bytes(), &encoded[..]) + .map_err(backend)?; + } + wtxn.commit().map_err(backend)?; + Ok(CommittedTx::empty()) + } +} + +#[cfg(test)] +mod tests { + fn i(x: i64) -> Value { + Value::Int(x) + } + + fn open_temp() -> Result { + let dir = tempfile::tempdir().map_err(backend)?; + let storage = LmdbStorage::open(dir.path())?; + std::mem::forget(dir); + Ok(storage) + } + + #[test] + fn create_insert_scan_roundtrip() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let id0 = storage.insert("edge", vec![i(1), i(2)])?; + let id1 = storage.insert("edge", vec![i(2), i(3)])?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(id0, vec![i(1), i(2)]), (id1, vec![i(2), i(3)])]); + assert_eq!(storage.arity("edge")?, 2); + Ok(()) + } + + #[test] + fn batched_inserts_share_one_commit() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(3), i(4)])?; + tx.commit()?; + (a, b) + }; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(3), i(4)])]); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + } + assert!(storage.scan("edge")?.is_empty()); + Ok(()) + } + + #[test] + fn delete_removes_row() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let b = storage.insert("edge", vec![i(2)])?; + storage.delete("edge", &a)?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(b, vec![i(2)])]); + storage.delete("edge", &a)?; + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } +} diff --git a/crates/storage/src/adapters/memory.rs b/crates/storage/src/adapters/memory.rs new file mode 100644 index 0000000..1401474 --- /dev/null +++ b/crates/storage/src/adapters/memory.rs @@ -0,0 +1,343 @@ +//! In-memory backend, keyed by relation name. Always available. + +use std::collections::HashMap; + +use crate::id::RowId; +use crate::value::Value; +use crate::{CommittedTx, RowStream, Storage, StorageError, Transaction}; + +/// In-memory backend, useful as the default in tests and as a correctness +/// oracle for other backends. +#[derive(Debug, Default)] +pub struct MemoryStorage { + relations: HashMap, +} + +#[derive(Debug)] +pub(crate) struct MemoryRelation { + pub(crate) arity: usize, + pub(crate) next_id: u64, + pub(crate) rows: Vec<(RowId, Vec)>, +} + +impl MemoryStorage { + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +impl Storage for MemoryStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + if self.relations.contains_key(name) { + return Err(StorageError::RelationExists(name.to_string())); + } + self.relations.insert( + name.to_string(), + MemoryRelation { + arity, + next_id: 0, + rows: Vec::new(), + }, + ); + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + self.relations + .get(name) + .map(|r| r.arity) + .ok_or_else(|| StorageError::RelationNotFound(name.to_string())) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let relation = self + .relations + .get(name) + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + Ok(Box::new(relation.rows.iter().cloned().map(Ok))) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + Ok(Box::new(MemoryTx { + storage: self, + next_ids: HashMap::new(), + pending: Vec::new(), + deletes: Vec::new(), + })) + } +} + +/// In-flight memory transaction. Buffers inserts and deletes; applies on commit. +pub(crate) struct MemoryTx<'a> { + storage: &'a mut MemoryStorage, + /// Local next-id-per-relation; initialized lazily from storage on first + /// insert into a relation, then incremented per buffered row. + next_ids: HashMap, + /// (relation name, assigned `RowId`, row cells) for each buffered insert. + pending: Vec<(String, RowId, Vec)>, + /// (relation name, `RowId`) for each buffered delete. Applied after + /// inserts on commit, so insert+delete of the same id in one tx is a + /// net no-op. + deletes: Vec<(String, RowId)>, +} + +impl MemoryTx<'_> { + fn next_id_for(&mut self, name: &str) -> Result { + if let Some(id) = self.next_ids.get(name) { + return Ok(*id); + } + let relation = self + .storage + .relations + .get(name) + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let id = relation.next_id; + self.next_ids.insert(name.to_string(), id); + Ok(id) + } +} + +impl Transaction for MemoryTx<'_> { + fn insert(&mut self, name: &str, row: Vec) -> Result { + let arity = self.storage.arity(name)?; + if row.len() != arity { + return Err(StorageError::ArityMismatch { + expected: arity, + got: row.len(), + }); + } + let next_id = self.next_id_for(name)?; + let id = RowId::from(next_id); + self.next_ids.insert(name.to_string(), next_id + 1); + self.pending.push((name.to_string(), id.clone(), row)); + Ok(id) + } + + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + // Verify the relation exists; the actual removal is deferred to commit. + let _ = self.storage.arity(name)?; + self.deletes.push((name.to_string(), id.clone())); + Ok(()) + } + + fn commit(self: Box) -> Result { + let MemoryTx { + storage, + next_ids, + pending, + deletes, + } = *self; + for (name, id, row) in pending { + let relation = storage + .relations + .get_mut(&name) + .ok_or_else(|| StorageError::RelationNotFound(name.clone()))?; + relation.rows.push((id, row)); + } + for (name, id) in deletes { + let relation = storage + .relations + .get_mut(&name) + .ok_or_else(|| StorageError::RelationNotFound(name.clone()))?; + relation.rows.retain(|(rid, _)| rid != &id); + } + for (name, next_id) in next_ids { + if let Some(relation) = storage.relations.get_mut(&name) { + relation.next_id = next_id; + } + } + // Pending RowIds returned during the tx are already the real ids. + Ok(CommittedTx::empty()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::scan_as_table; + + fn i(x: i64) -> Value { + Value::Int(x) + } + + #[test] + fn create_insert_scan_roundtrip() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + let id0 = storage.insert("edge", vec![i(1), i(2)])?; + let id1 = storage.insert("edge", vec![i(2), i(3)])?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(id0, vec![i(1), i(2)]), (id1, vec![i(2), i(3)])],); + Ok(()) + } + + #[test] + fn batched_inserts_share_one_commit() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(3), i(4)])?; + tx.commit()?; + (a, b) + }; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(3), i(4)])],); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + tx.insert("edge", vec![i(3), i(4)])?; + // dropped without commit + } + let rows = storage.scan("edge")?; + assert!(rows.is_empty()); + Ok(()) + } + + #[test] + fn inserted_row_ids_are_distinct_and_increment() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 1)?; + let id0 = storage.insert("edge", vec![i(1)])?; + let id1 = storage.insert("edge", vec![i(2)])?; + assert_ne!(id0, id1); + assert_eq!(id0, RowId::from(0u64)); + assert_eq!(id1, RowId::from(1u64)); + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } + + #[test] + fn scan_unknown_relation_returns_err() { + let storage = MemoryStorage::new(); + assert!(matches!( + storage.scan("missing"), + Err(StorageError::RelationNotFound(_)) + )); + } + + #[test] + fn arity_unknown_relation_returns_err() { + let storage = MemoryStorage::new(); + assert!(matches!( + storage.arity("missing"), + Err(StorageError::RelationNotFound(_)) + )); + } + + #[test] + fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.insert("edge", vec![i(1)]), + Err(StorageError::ArityMismatch { + expected: 2, + got: 1 + }) + )); + Ok(()) + } + + #[test] + fn delete_removes_row_then_idempotent_on_missing() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let b = storage.insert("edge", vec![i(2)])?; + storage.delete("edge", &a)?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(b.clone(), vec![i(2)])]); + // Idempotent: deleting `a` again is fine. + storage.delete("edge", &a)?; + assert_eq!(storage.scan("edge")?, vec![(b, vec![i(2)])]); + Ok(()) + } + + #[test] + fn delete_within_transaction_is_atomic() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let _b = storage.insert("edge", vec![i(2)])?; + { + let mut tx = storage.transaction()?; + tx.delete("edge", &a)?; + // Drop without commit: deletion rolled back. + } + assert_eq!(storage.scan("edge")?.len(), 2); + Ok(()) + } + + #[test] + fn scan_where_filters_by_column_value() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + storage.insert("edge", vec![i(1), i(10)])?; + let target = storage.insert("edge", vec![i(2), i(20)])?; + storage.insert("edge", vec![i(3), i(10)])?; + let target2 = storage.insert("edge", vec![i(2), i(30)])?; + // Filter on column 0 = 2. + let matches: Vec<_> = storage + .scan_where("edge", 0, &i(2))? + .collect::>()?; + assert_eq!( + matches, + vec![(target, vec![i(2), i(20)]), (target2, vec![i(2), i(30)])], + ); + // Out-of-range column = no matches. + let none: Vec<_> = storage + .scan_where("edge", 5, &i(2))? + .collect::>()?; + assert!(none.is_empty()); + Ok(()) + } + + #[test] + fn scan_iter_yields_rows_lazily() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 1)?; + storage.insert("edge", vec![i(10)])?; + storage.insert("edge", vec![i(20)])?; + storage.insert("edge", vec![i(30)])?; + // Take only the first two rows without scanning the whole relation. + let prefix: Vec<_> = storage + .scan_iter("edge")? + .take(2) + .collect::>()?; + assert_eq!(prefix.len(), 2); + assert_eq!(prefix[0].1, vec![i(10)]); + assert_eq!(prefix[1].1, vec![i(20)]); + Ok(()) + } + + #[test] + fn scan_as_table_drops_row_ids() -> Result<(), StorageError> { + let mut storage = MemoryStorage::new(); + storage.create_relation("edge", 2)?; + storage.insert("edge", vec![i(1), i(2)])?; + let table = scan_as_table(&storage, "edge")?; + assert_eq!(table.arity, 2); + assert_eq!(table.rows, vec![vec![i(1), i(2)]]); + Ok(()) + } +} diff --git a/crates/storage/src/adapters/redb.rs b/crates/storage/src/adapters/redb.rs new file mode 100644 index 0000000..faf7b34 --- /dev/null +++ b/crates/storage/src/adapters/redb.rs @@ -0,0 +1,298 @@ +//! redb adapter. +//! +//! Each relation gets a redb table named after it, keyed by `u64` row IDs. +//! A reserved table named `__meta`, keyed by relation name, carries per-relation +//! metadata (arity and next synthetic row ID). +//! +//! [`RedbTx`] wraps a real `redb::WriteTransaction`. Inserts go through the +//! transaction; [`Transaction::commit`] commits it; dropping the tx without +//! commit rolls back (redb's `WriteTransaction` drops the pending writes). + +use std::collections::HashMap; + +use redb::{Database, ReadableTable, TableDefinition, WriteTransaction}; + +use crate::codec::{decode_meta, decode_row, encode_meta, encode_row}; +use crate::id::RowId; +use crate::value::Value; +use crate::{backend, CommittedTx, RowStream, Storage, StorageError, Transaction}; + +const META_TABLE: &str = "__meta"; + +fn meta_def() -> TableDefinition<'static, &'static str, &'static [u8]> { + TableDefinition::new(META_TABLE) +} + +fn rows_def(name: &str) -> TableDefinition<'_, u64, &'static [u8]> { + TableDefinition::new(name) +} + +/// redb-backed [`Storage`] implementation. +pub struct RedbStorage { + db: Database, +} + +impl RedbStorage { + /// Open or create a redb database at `path`. + /// + /// # Errors + /// Returns [`StorageError::Backend`] if redb fails to open the file. + pub fn open(path: impl AsRef) -> Result { + let db = Database::create(path).map_err(backend)?; + Ok(Self { db }) + } +} + +impl Storage for RedbStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + if name == META_TABLE { + return Err(StorageError::Validation(format!( + "relation name '{name}' is reserved" + ))); + } + let Ok(arity_u32) = u32::try_from(arity) else { + unreachable!("arity exceeds u32::MAX") + }; + let txn = self.db.begin_write().map_err(backend)?; + { + let mut meta = txn.open_table(meta_def()).map_err(backend)?; + if meta.get(name).map_err(backend)?.is_some() { + return Err(StorageError::RelationExists(name.to_string())); + } + let encoded = encode_meta(arity_u32, 0); + meta.insert(name, &encoded[..]).map_err(backend)?; + let _ = txn.open_table(rows_def(name)).map_err(backend)?; + } + txn.commit().map_err(backend)?; + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + let txn = self.db.begin_read().map_err(backend)?; + let meta = txn.open_table(meta_def()).map_err(backend)?; + let entry = meta + .get(name) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + let (arity, _) = decode_meta(entry.value())?; + Ok(arity as usize) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let txn = self.db.begin_read().map_err(backend)?; + let meta = txn.open_table(meta_def()).map_err(backend)?; + if meta.get(name).map_err(backend)?.is_none() { + return Err(StorageError::RelationNotFound(name.to_string())); + } + let table = txn.open_table(rows_def(name)).map_err(backend)?; + let mut rows = Vec::new(); + for entry in table.iter().map_err(backend)? { + let (key, value) = entry.map_err(backend)?; + let id = RowId::from(key.value()); + rows.push((id, decode_row(value.value())?)); + } + Ok(Box::new(rows.into_iter().map(Ok))) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + let wtxn = self.db.begin_write().map_err(backend)?; + Ok(Box::new(RedbTx { + wtxn: Some(wtxn), + next_ids: HashMap::new(), + })) + } +} + +pub(crate) struct RedbTx { + wtxn: Option, + next_ids: HashMap, +} + +impl RedbTx { + /// Borrow the live `WriteTransaction`. Panics if commit already + /// consumed it: unreachable via the public API since + /// [`Transaction::commit`] consumes the boxed tx. + fn live(&self) -> &WriteTransaction { + match self.wtxn.as_ref() { + Some(t) => t, + None => unreachable!("transaction was already committed"), + } + } + + fn meta_for(&mut self, name: &str) -> Result<(u32, u64), StorageError> { + if let Some(&entry) = self.next_ids.get(name) { + return Ok(entry); + } + let decoded = { + let meta = self.live().open_table(meta_def()).map_err(backend)?; + let entry = meta + .get(name) + .map_err(backend)? + .ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + decode_meta(entry.value())? + }; + self.next_ids.insert(name.to_string(), decoded); + Ok(decoded) + } +} + +fn row_id_as_u64(id: &RowId) -> Result { + let bytes = id.as_bytes(); + if bytes.len() != 8 { + return Err(StorageError::Backend( + format!("redb row id must be 8 bytes, got {}", bytes.len()).into(), + )); + } + let mut buf = [0u8; 8]; + buf.copy_from_slice(bytes); + Ok(u64::from_be_bytes(buf)) +} + +impl Transaction for RedbTx { + fn insert(&mut self, name: &str, row: Vec) -> Result { + let (arity, next_id) = self.meta_for(name)?; + if row.len() != arity as usize { + return Err(StorageError::ArityMismatch { + expected: arity as usize, + got: row.len(), + }); + } + { + let mut rows = self.live().open_table(rows_def(name)).map_err(backend)?; + let encoded = encode_row(&row); + rows.insert(next_id, &encoded[..]).map_err(backend)?; + } + self.next_ids.insert(name.to_string(), (arity, next_id + 1)); + Ok(RowId::from(next_id)) + } + + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + let key = row_id_as_u64(id)?; + let wtxn = self.live(); + // Verify the relation exists by checking meta. + let meta = wtxn.open_table(meta_def()).map_err(backend)?; + if meta.get(name).map_err(backend)?.is_none() { + return Err(StorageError::RelationNotFound(name.to_string())); + } + drop(meta); + let mut rows = wtxn.open_table(rows_def(name)).map_err(backend)?; + let _ = rows.remove(key).map_err(backend)?; + Ok(()) + } + + fn commit(self: Box) -> Result { + let mut this = self; + let Some(wtxn) = this.wtxn.take() else { + unreachable!("transaction was already committed") + }; + { + let mut meta = wtxn.open_table(meta_def()).map_err(backend)?; + for (name, (arity, next_id)) in this.next_ids.drain() { + let encoded = encode_meta(arity, next_id); + meta.insert(name.as_str(), &encoded[..]).map_err(backend)?; + } + } + wtxn.commit().map_err(backend)?; + Ok(CommittedTx::empty()) + } +} + +#[cfg(test)] +mod tests { + fn i(x: i64) -> Value { + Value::Int(x) + } + + fn s(x: &str) -> Value { + Value::Str(x.to_string()) + } + + fn open_temp() -> Result { + let dir = tempfile::tempdir().map_err(backend)?; + let path = dir.path().join("test.redb"); + let storage = RedbStorage::open(&path)?; + std::mem::forget(dir); + Ok(storage) + } + + #[test] + fn create_insert_scan_roundtrip() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let id0 = storage.insert("edge", vec![i(1), i(2)])?; + let id1 = storage.insert("edge", vec![s("hello"), i(7)])?; + let rows = storage.scan("edge")?; + assert_eq!( + rows, + vec![(id0, vec![i(1), i(2)]), (id1, vec![s("hello"), i(7)])], + ); + assert_eq!(storage.arity("edge")?, 2); + Ok(()) + } + + #[test] + fn batched_inserts_share_one_commit() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(3), i(4)])?; + tx.commit()?; + (a, b) + }; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(3), i(4)])]); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + } + assert!(storage.scan("edge")?.is_empty()); + Ok(()) + } + + #[test] + fn delete_removes_row() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let b = storage.insert("edge", vec![i(2)])?; + storage.delete("edge", &a)?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(b, vec![i(2)])]); + storage.delete("edge", &a)?; + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } + + #[test] + fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { + let mut storage = open_temp()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.insert("edge", vec![i(1)]), + Err(StorageError::ArityMismatch { + expected: 2, + got: 1, + }) + )); + Ok(()) + } +} diff --git a/crates/storage/src/adapters/sqlite.rs b/crates/storage/src/adapters/sqlite.rs new file mode 100644 index 0000000..0496e13 --- /dev/null +++ b/crates/storage/src/adapters/sqlite.rs @@ -0,0 +1,308 @@ +//! SQLite adapter via the `rusqlite` crate (bundled libsqlite3). +//! +//! Storage layout: +//! +//! - `__meta(name TEXT PRIMARY KEY, arity INTEGER, next_id INTEGER)` tracks +//! per-relation metadata. +//! - `__rows(rel TEXT, row_id BLOB, row_bytes BLOB, PRIMARY KEY (rel, row_id))` +//! holds every row across every relation. The single-table layout avoids +//! per-relation DDL and keeps schema operations out of insert paths. +//! +//! [`SqliteTx`] wraps a real `rusqlite::Transaction`. Inserts and deletes +//! execute through the transaction; [`Transaction::commit`] commits it; +//! dropping the tx rolls back via rusqlite's `Transaction::drop`. + +use std::collections::HashMap; + +use rusqlite::{params, Connection, OptionalExtension}; + +use crate::codec::{decode_row, encode_row}; +use crate::id::RowId; +use crate::value::Value; +use crate::{backend, CommittedTx, RowStream, Storage, StorageError, Transaction}; + +const SCHEMA_SQL: &str = " +CREATE TABLE IF NOT EXISTS __meta ( + name TEXT PRIMARY KEY NOT NULL, + arity INTEGER NOT NULL, + next_id INTEGER NOT NULL +); +CREATE TABLE IF NOT EXISTS __rows ( + rel TEXT NOT NULL, + row_id BLOB NOT NULL, + row_bytes BLOB NOT NULL, + PRIMARY KEY (rel, row_id) +); +"; + +/// SQLite-backed [`Storage`] implementation. +pub struct SqliteStorage { + conn: Connection, +} + +impl SqliteStorage { + /// Open or create a SQLite database at `path`. Pass `":memory:"` for + /// an in-process database (useful in tests). + /// + /// # Errors + /// Returns [`StorageError::Backend`] if `rusqlite` fails to open the + /// path or to initialize the schema. + pub fn open(path: impl AsRef) -> Result { + let conn = Connection::open(path).map_err(backend)?; + conn.execute_batch(SCHEMA_SQL).map_err(backend)?; + Ok(Self { conn }) + } +} + +impl Storage for SqliteStorage { + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError> { + let Ok(arity_u32) = u32::try_from(arity) else { + unreachable!("arity exceeds u32::MAX") + }; + let exists: bool = self + .conn + .query_row("SELECT 1 FROM __meta WHERE name = ?", params![name], |_| { + Ok(()) + }) + .optional() + .map_err(backend)? + .is_some(); + if exists { + return Err(StorageError::RelationExists(name.to_string())); + } + self.conn + .execute( + "INSERT INTO __meta (name, arity, next_id) VALUES (?, ?, 0)", + params![name, arity_u32], + ) + .map_err(backend)?; + Ok(()) + } + + fn arity(&self, name: &str) -> Result { + let arity: Option = self + .conn + .query_row( + "SELECT arity FROM __meta WHERE name = ?", + params![name], + |row| row.get::<_, u32>(0), + ) + .optional() + .map_err(backend)?; + let arity = arity.ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + Ok(arity as usize) + } + + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError> { + let _ = self.arity(name)?; + let mut stmt = self + .conn + .prepare("SELECT row_id, row_bytes FROM __rows WHERE rel = ? ORDER BY row_id") + .map_err(backend)?; + let mapped = stmt + .query_map(params![name], |row| { + let row_id: Vec = row.get(0)?; + let row_bytes: Vec = row.get(1)?; + Ok((row_id, row_bytes)) + }) + .map_err(backend)?; + let mut rows = Vec::new(); + for entry in mapped { + let (id_bytes, value_bytes) = entry.map_err(backend)?; + rows.push((RowId::new(&id_bytes), decode_row(&value_bytes)?)); + } + Ok(Box::new(rows.into_iter().map(Ok))) + } + + fn transaction<'a>(&'a mut self) -> Result, StorageError> { + let txn = self.conn.transaction().map_err(backend)?; + Ok(Box::new(SqliteTx { + txn: Some(txn), + next_ids: HashMap::new(), + })) + } +} + +pub(crate) struct SqliteTx<'a> { + txn: Option>, + next_ids: HashMap, +} + +impl<'a> SqliteTx<'a> { + /// Borrow the live `rusqlite::Transaction`. Panics if commit already + /// consumed it: unreachable via the public API since + /// [`Transaction::commit`] consumes the boxed tx. + fn live(&self) -> &rusqlite::Transaction<'a> { + match self.txn.as_ref() { + Some(t) => t, + None => unreachable!("transaction was already committed"), + } + } + + fn meta_for(&mut self, name: &str) -> Result<(u32, u64), StorageError> { + if let Some(&entry) = self.next_ids.get(name) { + return Ok(entry); + } + let entry: Option<(u32, u64)> = self + .live() + .query_row( + "SELECT arity, next_id FROM __meta WHERE name = ?", + params![name], + |row| Ok((row.get::<_, u32>(0)?, row.get::<_, u64>(1)?)), + ) + .optional() + .map_err(backend)?; + let entry = entry.ok_or_else(|| StorageError::RelationNotFound(name.to_string()))?; + self.next_ids.insert(name.to_string(), entry); + Ok(entry) + } +} + +impl Transaction for SqliteTx<'_> { + fn insert(&mut self, name: &str, row: Vec) -> Result { + let (arity, next_id) = self.meta_for(name)?; + if row.len() != arity as usize { + return Err(StorageError::ArityMismatch { + expected: arity as usize, + got: row.len(), + }); + } + let row_id = RowId::from(next_id); + let encoded = encode_row(&row); + self.live() + .execute( + "INSERT INTO __rows (rel, row_id, row_bytes) VALUES (?, ?, ?)", + params![name, row_id.as_bytes(), encoded], + ) + .map_err(backend)?; + self.next_ids.insert(name.to_string(), (arity, next_id + 1)); + Ok(row_id) + } + + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + let exists: bool = self + .live() + .query_row("SELECT 1 FROM __meta WHERE name = ?", params![name], |_| { + Ok(()) + }) + .optional() + .map_err(backend)? + .is_some(); + if !exists { + return Err(StorageError::RelationNotFound(name.to_string())); + } + self.live() + .execute( + "DELETE FROM __rows WHERE rel = ? AND row_id = ?", + params![name, id.as_bytes()], + ) + .map_err(backend)?; + Ok(()) + } + + fn commit(self: Box) -> Result { + let mut this = self; + let Some(txn) = this.txn.take() else { + unreachable!("transaction was already committed") + }; + for (name, (arity, next_id)) in this.next_ids.drain() { + txn.execute( + "UPDATE __meta SET arity = ?, next_id = ? WHERE name = ?", + params![arity, next_id, name], + ) + .map_err(backend)?; + } + txn.commit().map_err(backend)?; + Ok(CommittedTx::empty()) + } +} + +#[cfg(test)] +mod tests { + fn i(x: i64) -> Value { + Value::Int(x) + } + + fn open_memory() -> Result { + SqliteStorage::open(":memory:") + } + + #[test] + fn create_insert_scan_roundtrip() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 2)?; + let id0 = storage.insert("edge", vec![i(1), i(2)])?; + let id1 = storage.insert("edge", vec![i(2), i(3)])?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(id0, vec![i(1), i(2)]), (id1, vec![i(2), i(3)])]); + assert_eq!(storage.arity("edge")?, 2); + Ok(()) + } + + #[test] + fn batched_inserts_share_one_commit() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 2)?; + let (a, b) = { + let mut tx = storage.transaction()?; + let a = tx.insert("edge", vec![i(1), i(2)])?; + let b = tx.insert("edge", vec![i(3), i(4)])?; + tx.commit()?; + (a, b) + }; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(a, vec![i(1), i(2)]), (b, vec![i(3), i(4)])]); + Ok(()) + } + + #[test] + fn dropped_transaction_is_rolled_back() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 2)?; + { + let mut tx = storage.transaction()?; + tx.insert("edge", vec![i(1), i(2)])?; + } + assert!(storage.scan("edge")?.is_empty()); + Ok(()) + } + + #[test] + fn delete_removes_row() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 1)?; + let a = storage.insert("edge", vec![i(1)])?; + let b = storage.insert("edge", vec![i(2)])?; + storage.delete("edge", &a)?; + let rows = storage.scan("edge")?; + assert_eq!(rows, vec![(b, vec![i(2)])]); + // Idempotent on missing. + storage.delete("edge", &a)?; + Ok(()) + } + + #[test] + fn duplicate_create_returns_err() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.create_relation("edge", 2), + Err(StorageError::RelationExists(_)) + )); + Ok(()) + } + + #[test] + fn insert_wrong_arity_returns_err() -> Result<(), StorageError> { + let mut storage = open_memory()?; + storage.create_relation("edge", 2)?; + assert!(matches!( + storage.insert("edge", vec![i(1)]), + Err(StorageError::ArityMismatch { + expected: 2, + got: 1, + }) + )); + Ok(()) + } +} diff --git a/crates/query-storage/src/codec.rs b/crates/storage/src/codec.rs similarity index 90% rename from crates/query-storage/src/codec.rs rename to crates/storage/src/codec.rs index 8b3352d..4d90df7 100644 --- a/crates/query-storage/src/codec.rs +++ b/crates/storage/src/codec.rs @@ -17,6 +17,7 @@ //! |--------|---------------|--------------------------------------| //! | `0x00` | `Value::Int` | `i64 LE` (8 bytes) | //! | `0x01` | `Value::Str` | `[len: u32 LE] [bytes]` | +//! | `0x02` | `Value::Id` | `[len: u32 LE] [bytes]` | //! //! ## Row Key Format //! @@ -28,6 +29,7 @@ //! //! Per-relation metadata is `[arity: u32 LE] [next_id: u64 LE]` = 12 bytes. +use crate::id::RowId; use crate::value::Value; /// Errors raised by [`decode_row`] and [`decode_meta`]. @@ -81,6 +83,14 @@ pub fn encode_row(row: &[Value]) -> Vec { ); out.extend_from_slice(bytes); } + Value::Id(id) => { + out.push(0x02); + let bytes = id.as_bytes(); + out.extend_from_slice( + &u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(), + ); + out.extend_from_slice(bytes); + } } } out @@ -121,6 +131,18 @@ fn read_value(bytes: &mut &[u8]) -> Result { .to_string(); Ok(Value::Str(s)) } + 0x02 => { + let len = read_u32(bytes)? as usize; + if bytes.len() < len { + return Err(CodecError::LengthOverrun { + declared: len, + available: bytes.len(), + }); + } + let (head, tail) = bytes.split_at(len); + *bytes = tail; + Ok(Value::Id(RowId::new(head))) + } other => Err(CodecError::UnknownTag(other)), } } diff --git a/crates/storage/src/id.rs b/crates/storage/src/id.rs new file mode 100644 index 0000000..ad5b950 --- /dev/null +++ b/crates/storage/src/id.rs @@ -0,0 +1,47 @@ +//! Opaque row identifiers. +//! +//! Each backend assigns its own bytes for a [`RowId`]; the trait treats them +//! opaquely. The in-memory backend and the KV backends use a big-endian `u64` +//! row counter; the geomerge backend encodes its `(CommitHash, counter)` +//! pair. Callers do not interpret the bytes: they hand a `RowId` back to the +//! same backend to reference an existing row. +//! +//! Storage is inline up to 36 bytes (the geomerge `(CommitHash, counter)` +//! width), which covers every encoding the workspace produces today. Wider +//! row-id encodings spill to the heap automatically via [`SmallVec`]. + +use smallvec::SmallVec; + +const INLINE_BYTES: usize = 36; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct RowId(SmallVec<[u8; INLINE_BYTES]>); + +impl RowId { + /// Build a row id by copying the given bytes into inline storage when they + /// fit (≤ 36 bytes) or onto the heap when they don't. + pub fn new(bytes: impl AsRef<[u8]>) -> Self { + Self(SmallVec::from_slice(bytes.as_ref())) + } + + /// View the underlying bytes. + #[must_use] + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } +} + +impl std::fmt::Display for RowId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for byte in &self.0 { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + +impl From for RowId { + fn from(value: u64) -> Self { + Self(SmallVec::from_slice(&value.to_be_bytes())) + } +} diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs new file mode 100644 index 0000000..7ab9df3 --- /dev/null +++ b/crates/storage/src/lib.rs @@ -0,0 +1,323 @@ +//! Storage layer for the query-plan playground. +//! +//! This is the foundational crate of the workspace. It owns the [`Value`] cell +//! type and the [`Table`] container, defines the [`Storage`] trait, and ships +//! adapters for several backends behind Cargo features. Higher-level crates +//! such as `query-ops` depend on this crate for both the types and the trait. +//! +//! The trait covers relation creation, lazy and materialized scans, point +//! filters by column value, atomic transactions with batched inserts and +//! deletes, and a [`CommittedTx`] mechanism for resolving pending row IDs. +//! Delta streams and indexes are not modeled, and will be added when an +//! experiment demands them. +//! +//! ## Backends +//! +//! [`MemoryStorage`] is always available. Other backends are gated behind +//! Cargo features so users only pay for what they need: +//! +//! - `lmdb`: LMDB via the `heed` crate +//! - `redb`: pure-Rust embedded KV +//! - `fjall`: pure-Rust LSM-tree +//! - `sqlite`: `SQLite` via the `rusqlite` crate (bundled libsqlite3) +//! - `geomerge`: the workspace's `geomerge` crate + +use crate::id::RowId; +use crate::table::Table; +use crate::value::Value; + +pub mod adapters; +pub mod id; +pub mod table; +pub mod value; + +#[allow(dead_code)] // helpers are used by feature-gated kv adapters +pub(crate) mod codec; + +pub use adapters::memory::MemoryStorage; +pub use codec::CodecError; + +/// Errors returned by a [`Storage`] backend. +/// +/// Backend-specific failures (LMDB transaction aborts, fjall I/O errors, +/// `rusqlite` errors, etc.) are wrapped in [`StorageError::Backend`]. +#[derive(Debug)] +pub enum StorageError { + /// No relation with the given name exists in this backend. + RelationNotFound(String), + /// A relation with the given name already exists. + RelationExists(String), + /// A row was offered with the wrong number of columns. + ArityMismatch { expected: usize, got: usize }, + /// A backend-defined validation rule rejected the operation, for example + /// a `geomerge` law violation. + Validation(String), + /// A row decoded from storage was malformed. + Decode(codec::CodecError), + /// The backend does not implement the requested operation, for example + /// `geomerge`'s append-only commit log rejecting row deletion. The static + /// string describes the unsupported operation. + Unsupported(&'static str), + /// A backend-specific error wrapped for transport across the trait. + Backend(Box), +} + +impl std::fmt::Display for StorageError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::RelationNotFound(name) => write!(f, "relation not found: {name}"), + Self::RelationExists(name) => write!(f, "relation already exists: {name}"), + Self::ArityMismatch { expected, got } => { + write!(f, "arity mismatch: expected {expected}, got {got}") + } + Self::Validation(msg) => write!(f, "validation failed: {msg}"), + Self::Decode(err) => write!(f, "decode error: {err}"), + Self::Unsupported(op) => write!(f, "operation not supported by this backend: {op}"), + Self::Backend(err) => write!(f, "backend error: {err}"), + } + } +} + +impl std::error::Error for StorageError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Backend(err) => Some(err.as_ref()), + Self::Decode(err) => Some(err), + _ => None, + } + } +} + +impl From for StorageError { + fn from(err: codec::CodecError) -> Self { + Self::Decode(err) + } +} + +/// Wrap any `Error + Send + Sync` as [`StorageError::Backend`]. Used by +/// adapters in `.map_err(backend)?` chains. +pub(crate) fn backend(err: E) -> StorageError { + StorageError::Backend(Box::new(err)) +} + +/// Lazily-yielded sequence of `(RowId, row)` pairs returned by +/// [`Storage::scan_iter`]. Each item is a `Result` so backends can surface +/// errors mid-iteration. +pub type RowStream<'a> = Box), StorageError>> + 'a>; + +/// Backend-agnostic interface for storing and retrieving rows. +/// +/// Each relation has a fixed name, a fixed arity (row width), and an ordered +/// collection of rows whose cells are [`Value`]s. Concrete implementations +/// include [`MemoryStorage`] in this crate plus the feature-gated backends. +pub trait Storage { + /// Create a new relation with the given name and arity. + /// + /// # Errors + /// Returns [`StorageError::RelationExists`] if a relation with the given + /// name already exists. + fn create_relation(&mut self, name: &str, arity: usize) -> Result<(), StorageError>; + + /// Return the arity of the given relation. + /// + /// # Errors + /// Returns [`StorageError::RelationNotFound`] if no such relation exists. + fn arity(&self, name: &str) -> Result; + + /// Scan all rows of the given relation in storage order, paired with each + /// row's backend-assigned [`RowId`]. Yields rows lazily; backends that can + /// stream from disk (memory, fjall) do so; backends that must materialize + /// (sqlite, redb, lmdb, geomerge) build a `Vec` internally and return its + /// iterator. The returned iterator borrows from `self` for `'a`. + /// + /// # Errors + /// Returns [`StorageError::RelationNotFound`] if no such relation exists. + /// Iteration may also yield per-row errors when a backend fails mid-scan. + fn scan_iter<'a>(&'a self, name: &str) -> Result, StorageError>; + + /// Materialize a full scan of the given relation. The default + /// implementation collects [`Storage::scan_iter`]; backends with a + /// cheaper batch-read path may override. + /// + /// # Errors + /// Returns the same errors as [`Storage::scan_iter`]. + fn scan(&self, name: &str) -> Result)>, StorageError> { + self.scan_iter(name)?.collect() + } + + /// Scan rows of the given relation where the cell at `column` equals + /// `value`. The default implementation reads from [`Storage::scan_iter`] + /// and filters in memory, giving the query planner a single target to + /// push filters down to even when backends don't yet have indexes; + /// backends that gain column indexes can override for efficient lookup. + /// + /// Rows whose row width is `<= column` are silently skipped (no match). + /// + /// # Errors + /// Returns the same errors as [`Storage::scan_iter`]. + fn scan_where<'a>( + &'a self, + name: &str, + column: usize, + value: &Value, + ) -> Result, StorageError> { + let target = value.clone(); + let iter = self.scan_iter(name)?.filter_map(move |entry| match entry { + Ok((id, row)) => { + if row.get(column) == Some(&target) { + Some(Ok((id, row))) + } else { + None + } + } + Err(err) => Some(Err(err)), + }); + Ok(Box::new(iter)) + } + + /// Begin a transaction. Inserts within the returned transaction are + /// committed atomically when [`Transaction::commit`] is called; dropping + /// the transaction without committing rolls back any pending inserts. + /// + /// Backends with native transactions (LMDB, redb, `SQLite`, geomerge) wrap + /// their real write transactions; backends without (memory, fjall) buffer + /// inserts in memory and apply them on commit. + /// + /// # Errors + /// Returns [`StorageError::Backend`] if the backend can't begin a tx. + fn transaction<'a>(&'a mut self) -> Result, StorageError>; + + /// Append a row to the given relation, returning the new row's + /// backend-assigned [`RowId`]. The default implementation opens a fresh + /// transaction, inserts, and commits; for chains of related inserts that + /// must satisfy backend laws together (e.g. geomerge totality and + /// foreign-key laws), call [`Storage::transaction`] explicitly. + /// + /// # Errors + /// Returns the same errors as [`Transaction::insert`] and + /// [`Transaction::commit`]. + fn insert(&mut self, name: &str, row: Vec) -> Result { + let mut tx = self.transaction()?; + let pending = tx.insert(name, row)?; + let committed = tx.commit()?; + Ok(committed.resolve(&pending)) + } + + /// Remove the row with the given id from the given relation. The default + /// implementation opens a fresh transaction, deletes, and commits; + /// deleting a row that doesn't exist is a no-op. + /// + /// # Errors + /// Returns the same errors as [`Transaction::delete`] and + /// [`Transaction::commit`]. Backends that don't support deletion + /// return [`StorageError::Backend`]. + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + let mut tx = self.transaction()?; + tx.delete(name, id)?; + let _ = tx.commit()?; + Ok(()) + } +} + +/// A transaction over a [`Storage`] backend. Holds pending inserts; commits +/// them atomically on [`Transaction::commit`]; rolls back on drop. +/// +/// `RowId`s returned by [`Transaction::insert`] are valid for foreign-key +/// references in subsequent inserts within the same transaction. After commit, +/// callers can resolve those pending ids to their post-commit form via the +/// returned [`CommittedTx`]. +pub trait Transaction { + /// Append a row to the given relation as part of this transaction. + /// + /// # Errors + /// Returns [`StorageError::RelationNotFound`] if no such relation exists, + /// [`StorageError::ArityMismatch`] if the row's length differs from the + /// declared arity, or [`StorageError::Validation`] / [`StorageError::Backend`] + /// for backend-specific rejections. + fn insert(&mut self, name: &str, row: Vec) -> Result; + + /// Remove the row with the given id from the given relation as part of + /// this transaction. Idempotent: deleting an id that doesn't exist is + /// not an error. + /// + /// The default implementation returns [`StorageError::Unsupported`]; + /// backends that allow deletion override it. Geomerge, in particular, has + /// an append-only commit log and does not implement this. + /// + /// # Errors + /// Returns [`StorageError::RelationNotFound`] if no such relation exists, + /// or [`StorageError::Unsupported`] if the backend doesn't support + /// deletion. + fn delete(&mut self, name: &str, id: &RowId) -> Result<(), StorageError> { + let _ = (name, id); + Err(StorageError::Unsupported("row deletion")) + } + + /// Commit all pending inserts atomically. Returns a [`CommittedTx`] that + /// can resolve pending [`RowId`]s (those returned by + /// [`Transaction::insert`]) to their post-commit form. + /// + /// For KV backends, pending ids are stable across commit; the returned + /// `CommittedTx` is empty and [`CommittedTx::resolve`] returns the same + /// id. For the geomerge backend, the returned map carries each pending + /// 4-byte counter mapped to the post-commit 36-byte + /// `(CommitHash, counter)` form. + /// + /// # Errors + /// Returns [`StorageError::Validation`] if a backend law (e.g. a geomerge + /// totality or foreign-key law) is violated by the committed state, or + /// [`StorageError::Backend`] for other backend failures. + fn commit(self: Box) -> Result; +} + +/// The result of a successful [`Transaction::commit`]. Carries the +/// pending-to-real [`RowId`] mapping for backends that need it (currently +/// only `geomerge`); is empty for backends where pending ids are stable. +#[derive(Debug, Default, Clone)] +pub struct CommittedTx { + resolutions: std::collections::HashMap, +} + +impl CommittedTx { + /// Empty resolution table. Backends where pending ids are already stable + /// return this from `commit`. + #[must_use] + pub fn empty() -> Self { + Self::default() + } + + /// Construct a resolution table from explicit pending-to-real mappings. + /// Crate-internal: only the `geomerge` adapter builds non-empty + /// resolution tables. + #[must_use] + pub(crate) fn from_mappings(resolutions: std::collections::HashMap) -> Self { + Self { resolutions } + } + + /// Resolve a [`RowId`] returned by [`Transaction::insert`] to its + /// post-commit form. If no explicit mapping is recorded, the pending id + /// is returned unchanged (i.e. KV backends where pending == real). + #[must_use] + pub fn resolve(&self, pending: &RowId) -> RowId { + self.resolutions + .get(pending) + .cloned() + .unwrap_or_else(|| pending.clone()) + } +} + +/// Materialize a relation from a [`Storage`] backend as a [`Table`] that +/// query-language operators can consume. Row IDs returned by [`Storage::scan`] +/// are dropped; the resulting [`Table`] carries only cell values. +/// +/// # Errors +/// Returns any error produced by [`Storage::arity`] or [`Storage::scan`]. +pub fn scan_as_table(storage: &dyn Storage, name: &str) -> Result { + let arity = storage.arity(name)?; + let rows = storage + .scan(name)? + .into_iter() + .map(|(_, row)| row) + .collect(); + Ok(Table::from_rows(arity, rows)) +} diff --git a/crates/query-storage/src/table.rs b/crates/storage/src/table.rs similarity index 100% rename from crates/query-storage/src/table.rs rename to crates/storage/src/table.rs diff --git a/crates/query-storage/src/value.rs b/crates/storage/src/value.rs similarity index 80% rename from crates/query-storage/src/value.rs rename to crates/storage/src/value.rs index db7aaa8..965eaa8 100644 --- a/crates/query-storage/src/value.rs +++ b/crates/storage/src/value.rs @@ -1,7 +1,10 @@ //! Cell values shared by tables and binding relations. +use crate::id::RowId; + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Value { Int(i64), Str(String), + Id(RowId), }