From ac2f202594e23e5c4546faaef99dcb46257c17fa Mon Sep 17 00:00:00 2001 From: Hassan Abedi Date: Thu, 26 Feb 2026 11:50:51 +0100 Subject: [PATCH] The base commit --- .gitattributes | 3 + .gitignore | 4 + Cargo.lock | 1282 +++++++++++++ Cargo.toml | 31 + NOTES.md | 321 ++++ README.md | 1314 +++++++++++++ architecture.dot | 227 +++ architecture.svg | 770 ++++++++ docs/ARCHITECTURE.md | 255 +++ docs/SYNTAX.md | 336 ++++ examples/elaborate.rs | 168 ++ examples/full_petri.rs | 132 ++ examples/geolog/category.geolog | 86 + .../geolog/field_projection_chase_test.geolog | 27 + examples/geolog/field_projection_test.geolog | 12 + examples/geolog/graph.geolog | 79 + examples/geolog/iso_instance_test.geolog | 29 + examples/geolog/iso_theory_test.geolog | 9 + examples/geolog/monoid.geolog | 78 + examples/geolog/nested_instance_test.geolog | 33 + examples/geolog/petri_net.geolog | 135 ++ examples/geolog/petri_net_full.geolog | 195 ++ examples/geolog/petri_net_showcase.geolog | 345 ++++ examples/geolog/petri_net_solution.geolog | 188 ++ examples/geolog/petri_reachability.geolog | 164 ++ .../petri_reachability_full_vision.geolog | 72 + .../geolog/petri_reachability_vision.geolog | 94 + examples/geolog/petri_trace_axioms.geolog | 66 + .../geolog/petri_trace_coverage_test.geolog | 36 + .../geolog/petri_trace_full_vision.geolog | 57 + examples/geolog/petri_trace_test.geolog | 58 + examples/geolog/preorder.geolog | 42 + .../product_codomain_equality_test.geolog | 23 + examples/geolog/product_codomain_test.geolog | 51 + .../geolog/record_existential_test.geolog | 18 + examples/geolog/record_in_axiom_test.geolog | 12 + .../geolog/record_premise_chase_test.geolog | 23 + examples/geolog/relalg_simple.geolog | 130 ++ examples/geolog/solver_demo.geolog | 132 ++ examples/geolog/sort_param_simple.geolog | 31 + examples/geolog/todo_list.geolog | 44 + examples/geolog/transitive_closure.geolog | 77 + examples/main.rs | 3 + examples/roundtrip.rs | 216 +++ fuzz/.gitignore | 4 + fuzz/Cargo.toml | 30 + fuzz/README.md | 60 + fuzz/fuzz_targets/fuzz_parser.rs | 17 + fuzz/fuzz_targets/fuzz_repl.rs | 22 + proofs/.gitignore | 2 + proofs/GeologProofs.lean | 1 + proofs/GeologProofs/MonotonicSubmodel.lean | 1520 +++++++++++++++ proofs/lake-manifest.json | 115 ++ proofs/lakefile.lean | 15 + proofs/lean-toolchain | 1 + src/ast.rs | 331 ++++ src/bin/geolog.rs | 1288 +++++++++++++ src/cc.rs | 258 +++ src/core.rs | 1511 +++++++++++++++ src/elaborate/env.rs | 315 ++++ src/elaborate/error.rs | 185 ++ src/elaborate/instance.rs | 1431 ++++++++++++++ src/elaborate/mod.rs | 17 + src/elaborate/theory.rs | 739 ++++++++ src/elaborate/types.rs | 265 +++ src/error.rs | 211 +++ src/id.rs | 114 ++ src/lexer.rs | 143 ++ src/lib.rs | 51 + src/meta.rs | 1106 +++++++++++ src/naming.rs | 355 ++++ src/overlay.rs | 574 ++++++ src/parser.rs | 761 ++++++++ src/patch.rs | 688 +++++++ src/pretty.rs | 424 +++++ src/query/backend.rs | 1650 ++++++++++++++++ src/query/chase.rs | 710 +++++++ src/query/compile.rs | 702 +++++++ src/query/exec.rs | 243 +++ src/query/from_relalg.rs | 1239 ++++++++++++ src/query/mod.rs | 43 + src/query/optimize.rs | 308 +++ src/query/pattern.rs | 171 ++ src/query/store_queries.rs | 672 +++++++ src/query/to_relalg.rs | 1386 ++++++++++++++ src/repl.rs | 1659 +++++++++++++++++ src/serialize.rs | 294 +++ src/solver/mod.rs | 415 +++++ src/solver/tactics.rs | 1398 ++++++++++++++ src/solver/tree.rs | 465 +++++ src/solver/types.rs | 131 ++ src/store/append.rs | 31 + src/store/batch.rs | 355 ++++ src/store/bootstrap_queries.rs | 1017 ++++++++++ src/store/columnar.rs | 208 +++ src/store/commit.rs | 209 +++ src/store/instance.rs | 356 ++++ src/store/materialize.rs | 238 +++ src/store/mod.rs | 585 ++++++ src/store/query.rs | 127 ++ src/store/schema.rs | 345 ++++ src/store/theory.rs | 750 ++++++++ src/tensor/builder.rs | 392 ++++ src/tensor/check.rs | 580 ++++++ src/tensor/compile.rs | 1229 ++++++++++++ src/tensor/expr.rs | 454 +++++ src/tensor/mod.rs | 31 + src/tensor/sparse.rs | 223 +++ src/universe.rs | 207 ++ src/version.rs | 272 +++ src/zerocopy.rs | 422 +++++ tests/examples_integration.rs | 960 ++++++++++ tests/generators.rs | 375 ++++ tests/manual_fuzz.rs | 188 ++ .../solution2_incomplete_negative_test.geolog | 218 +++ tests/proptest_naming.proptest-regressions | 7 + tests/proptest_naming.rs | 218 +++ tests/proptest_overlay.proptest-regressions | 10 + tests/proptest_overlay.rs | 614 ++++++ tests/proptest_patch.proptest-regressions | 7 + tests/proptest_patch.rs | 334 ++++ tests/proptest_query.proptest-regressions | 7 + tests/proptest_query.rs | 946 ++++++++++ tests/proptest_query_backend.rs | 870 +++++++++ tests/proptest_solver.rs | 382 ++++ tests/proptest_structure.rs | 239 +++ tests/proptest_tensor.rs | 476 +++++ tests/proptest_universe.rs | 159 ++ tests/unit_chase.rs | 426 +++++ tests/unit_elaborate.rs | 837 +++++++++ tests/unit_meta.rs | 265 +++ tests/unit_parsing.rs | 163 ++ tests/unit_pretty.rs | 36 + tests/unit_relations.rs | 183 ++ tests/unit_version.rs | 133 ++ tests/unit_workspace.rs | 68 + theories/GeologMeta.geolog | 400 ++++ theories/RelAlgIR.geolog | 592 ++++++ 138 files changed, 48592 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 NOTES.md create mode 100644 README.md create mode 100644 architecture.dot create mode 100644 architecture.svg create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/SYNTAX.md create mode 100644 examples/elaborate.rs create mode 100644 examples/full_petri.rs create mode 100644 examples/geolog/category.geolog create mode 100644 examples/geolog/field_projection_chase_test.geolog create mode 100644 examples/geolog/field_projection_test.geolog create mode 100644 examples/geolog/graph.geolog create mode 100644 examples/geolog/iso_instance_test.geolog create mode 100644 examples/geolog/iso_theory_test.geolog create mode 100644 examples/geolog/monoid.geolog create mode 100644 examples/geolog/nested_instance_test.geolog create mode 100644 examples/geolog/petri_net.geolog create mode 100644 examples/geolog/petri_net_full.geolog create mode 100644 examples/geolog/petri_net_showcase.geolog create mode 100644 examples/geolog/petri_net_solution.geolog create mode 100644 examples/geolog/petri_reachability.geolog create mode 100644 examples/geolog/petri_reachability_full_vision.geolog create mode 100644 examples/geolog/petri_reachability_vision.geolog create mode 100644 examples/geolog/petri_trace_axioms.geolog create mode 100644 examples/geolog/petri_trace_coverage_test.geolog create mode 100644 examples/geolog/petri_trace_full_vision.geolog create mode 100644 examples/geolog/petri_trace_test.geolog create mode 100644 examples/geolog/preorder.geolog create mode 100644 examples/geolog/product_codomain_equality_test.geolog create mode 100644 examples/geolog/product_codomain_test.geolog create mode 100644 examples/geolog/record_existential_test.geolog create mode 100644 examples/geolog/record_in_axiom_test.geolog create mode 100644 examples/geolog/record_premise_chase_test.geolog create mode 100644 examples/geolog/relalg_simple.geolog create mode 100644 examples/geolog/solver_demo.geolog create mode 100644 examples/geolog/sort_param_simple.geolog create mode 100644 examples/geolog/todo_list.geolog create mode 100644 examples/geolog/transitive_closure.geolog create mode 100644 examples/main.rs create mode 100644 examples/roundtrip.rs create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/README.md create mode 100644 fuzz/fuzz_targets/fuzz_parser.rs create mode 100644 fuzz/fuzz_targets/fuzz_repl.rs create mode 100644 proofs/.gitignore create mode 100644 proofs/GeologProofs.lean create mode 100644 proofs/GeologProofs/MonotonicSubmodel.lean create mode 100644 proofs/lake-manifest.json create mode 100644 proofs/lakefile.lean create mode 100644 proofs/lean-toolchain create mode 100644 src/ast.rs create mode 100644 src/bin/geolog.rs create mode 100644 src/cc.rs create mode 100644 src/core.rs create mode 100644 src/elaborate/env.rs create mode 100644 src/elaborate/error.rs create mode 100644 src/elaborate/instance.rs create mode 100644 src/elaborate/mod.rs create mode 100644 src/elaborate/theory.rs create mode 100644 src/elaborate/types.rs create mode 100644 src/error.rs create mode 100644 src/id.rs create mode 100644 src/lexer.rs create mode 100644 src/lib.rs create mode 100644 src/meta.rs create mode 100644 src/naming.rs create mode 100644 src/overlay.rs create mode 100644 src/parser.rs create mode 100644 src/patch.rs create mode 100644 src/pretty.rs create mode 100644 src/query/backend.rs create mode 100644 src/query/chase.rs create mode 100644 src/query/compile.rs create mode 100644 src/query/exec.rs create mode 100644 src/query/from_relalg.rs create mode 100644 src/query/mod.rs create mode 100644 src/query/optimize.rs create mode 100644 src/query/pattern.rs create mode 100644 src/query/store_queries.rs create mode 100644 src/query/to_relalg.rs create mode 100644 src/repl.rs create mode 100644 src/serialize.rs create mode 100644 src/solver/mod.rs create mode 100644 src/solver/tactics.rs create mode 100644 src/solver/tree.rs create mode 100644 src/solver/types.rs create mode 100644 src/store/append.rs create mode 100644 src/store/batch.rs create mode 100644 src/store/bootstrap_queries.rs create mode 100644 src/store/columnar.rs create mode 100644 src/store/commit.rs create mode 100644 src/store/instance.rs create mode 100644 src/store/materialize.rs create mode 100644 src/store/mod.rs create mode 100644 src/store/query.rs create mode 100644 src/store/schema.rs create mode 100644 src/store/theory.rs create mode 100644 src/tensor/builder.rs create mode 100644 src/tensor/check.rs create mode 100644 src/tensor/compile.rs create mode 100644 src/tensor/expr.rs create mode 100644 src/tensor/mod.rs create mode 100644 src/tensor/sparse.rs create mode 100644 src/universe.rs create mode 100644 src/version.rs create mode 100644 src/zerocopy.rs create mode 100644 tests/examples_integration.rs create mode 100644 tests/generators.rs create mode 100644 tests/manual_fuzz.rs create mode 100644 tests/negative/solution2_incomplete_negative_test.geolog create mode 100644 tests/proptest_naming.proptest-regressions create mode 100644 tests/proptest_naming.rs create mode 100644 tests/proptest_overlay.proptest-regressions create mode 100644 tests/proptest_overlay.rs create mode 100644 tests/proptest_patch.proptest-regressions create mode 100644 tests/proptest_patch.rs create mode 100644 tests/proptest_query.proptest-regressions create mode 100644 tests/proptest_query.rs create mode 100644 tests/proptest_query_backend.rs create mode 100644 tests/proptest_solver.rs create mode 100644 tests/proptest_structure.rs create mode 100644 tests/proptest_tensor.rs create mode 100644 tests/proptest_universe.rs create mode 100644 tests/unit_chase.rs create mode 100644 tests/unit_elaborate.rs create mode 100644 tests/unit_meta.rs create mode 100644 tests/unit_parsing.rs create mode 100644 tests/unit_pretty.rs create mode 100644 tests/unit_relations.rs create mode 100644 tests/unit_version.rs create mode 100644 tests/unit_workspace.rs create mode 100644 theories/GeologMeta.geolog create mode 100644 theories/RelAlgIR.geolog diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..807d598 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ + +# Use bd merge for beads JSONL files +.beads/issues.jsonl merge=beads diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6183e86 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +target/ +.claude/ +.idea + diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a0b6abf --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1282 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "ariadne" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44055e597c674aef7cb903b2b9f6e4cba1277ed0d2d61dae7cd52d7ffa81f8e2" +dependencies = [ + "unicode-width 0.1.14", + "yansi", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", + "uuid", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytemuck" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", + "stacker", +] + +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "egglog-concurrency" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7462d094fd0d9da45a7bd2c4b09ab530b8935ba060cd15c181d94e480f9add" +dependencies = [ + "arc-swap", + "rayon", +] + +[[package]] +name = "egglog-numeric-id" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f81100cddb02741105fe8c445f0f2dc66919dbf65aab380ff903ff54e458805" +dependencies = [ + "rayon", +] + +[[package]] +name = "egglog-union-find" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c4e41ab6ea1bec16de378bd2acaf374997a02ce7f88ef084f7b00f7d2be9e7b" +dependencies = [ + "crossbeam", + "egglog-concurrency", + "egglog-numeric-id", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "geolog" +version = "0.1.0" +dependencies = [ + "ariadne", + "chumsky", + "egglog-numeric-id", + "egglog-union-find", + "indexmap 2.12.1", + "insta", + "itertools", + "memmap2", + "nonminmax", + "proptest", + "rand", + "rkyv", + "roaring", + "rustyline", + "serde", + "tempfile", + "tinyvec", + "toml", + "uuid", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "insta" +version = "1.44.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698" +dependencies = [ + "console", + "once_cell", + "similar", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.178" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nonminmax" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41a1ba30985f2c6f9cd55cdf24e9e521ff4aa4b3d238349866e262c338a64c1" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "indexmap 1.9.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "roaring" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "rustyline" +version = "15.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" +dependencies = [ + "bitflags", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width 0.2.2", + "utf8parse", + "windows-sys 0.59.0", +] + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.12.1", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.111", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..337e032 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "geolog" +version = "0.1.0" +edition = "2024" + +[dependencies] +chumsky = "0.9" +ariadne = "0.4" # for nice error reporting +uuid = { version = "1", features = ["v7"] } +roaring = "0.10" +nonminmax = "0.1" +rkyv = { version = "0.7", features = ["validation", "uuid", "indexmap"] } +tinyvec = { version = "1.6", features = ["alloc"] } +indexmap = "2.0" +memmap2 = "0.9" +rustyline = "15" # readline for REPL +toml = "0.8" # workspace.toml parsing +serde = { version = "1", features = ["derive"] } # for toml +egglog-union-find = "1.0" # union-find for congruence closure +egglog-numeric-id = "1.0" # newtype IDs with define_id! macro +itertools = "0.13" # Either type for zero-copy iterators + +[dev-dependencies] +insta = "1.40" # snapshot testing +proptest = "1.4" # property-based testing +rand = "0.9.2" +tempfile = "3.10" # temp dirs for persistence tests + +[[bin]] +name = "geolog" +path = "src/bin/geolog.rs" diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..bed653b --- /dev/null +++ b/NOTES.md @@ -0,0 +1,321 @@ +# Geolog Project Notes + +## Overview + +**Geolog** is a **Geometric Logic REPL** — a type theory with semantics in topoi, designed for formal specifications using geometric logic. + +### Core Capabilities + +- **Geometric logic programming** — encode mathematical structures, relationships, and constraints +- **Database schema definition** — define sorts, functions, relations, and axioms +- **Model/instance creation** — create concrete finite models satisfying theory axioms +- **Automated inference** — chase algorithm for automatic fact derivation +- **Version control** — git-like commits and tracking for instances +- **Persistence** — append-only storage with optional disk persistence + +### Use Cases + +- Business process workflow orchestration +- Formal verification via diagrammatic rewriting +- Database query design +- Petri net reachability and process modeling + +--- + +## Tech Stack + +**Primary Language**: Rust (2021 edition, Cargo-based) + +### Key Dependencies + +| Crate | Version | Purpose | +|-------|---------|---------| +| `chumsky` | 0.9 | Parser combinator library | +| `ariadne` | 0.4 | Error reporting with source spans | +| `rkyv` | 0.7 | Zero-copy serialization | +| `rustyline` | 15 | REPL readline interface | +| `egglog-union-find` | 1.0 | Union-find for congruence closure | +| `roaring` | 0.10 | Bitmap library for sparse relations | +| `indexmap` | 2.0 | Order-preserving hash maps | +| `uuid` | 1 | UUID generation | +| `memmap2` | 0.9 | Memory-mapped file I/O | + +### Testing Frameworks + +- `insta` — snapshot testing +- `proptest` — property-based testing +- `tempfile` — temporary directory management + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ REPL (interactive CLI) | Batch file loading │ +├─────────────────────────────────────────────────────┤ +│ PARSING LAYER (Lexer → Parser → AST) │ +│ chumsky-based lexer & parser, source error reporting│ +├─────────────────────────────────────────────────────┤ +│ ELABORATION LAYER (AST → Core IR) │ +│ Type checking, name resolution, theory/instance │ +├─────────────────────────────────────────────────────┤ +│ CORE LAYER (Typed Representation) │ +│ Signature, Term, Formula, Structure, ElaboratedTheory│ +├─────────────────────────────────────────────────────┤ +│ STORAGE LAYER (Persistence) │ +│ Append-only GeologMeta store with version control │ +├─────────────────────────────────────────────────────┤ +│ QUERY & SOLVER LAYER (Execution) │ +│ Chase algorithm, congruence closure, relational │ +│ algebra compiler, SMT-style model enumeration │ +├─────────────────────────────────────────────────────┤ +│ TENSOR ALGEBRA (Axiom Checking) │ +│ Sparse tensor evaluation for axiom validation │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Directory Structure + +| Path | Purpose | +|------|---------| +| `src/bin/geolog.rs` | CLI entry point | +| `src/lib.rs` | Library root, exports `parse()` | +| `src/repl.rs` | Interactive REPL state machine | +| `src/lexer.rs` | Tokenization using chumsky | +| `src/parser.rs` | Token stream → AST | +| `src/ast.rs` | Abstract syntax tree types | +| `src/core.rs` | Core IR: Signature, Term, Formula, Structure | +| `src/elaborate/` | AST → Core elaboration | +| `src/store/` | Persistence layer (append-only) | +| `src/query/` | Chase algorithm, relational algebra | +| `src/solver/` | SMT-style model enumeration | +| `src/tensor/` | Sparse tensor algebra for axiom checking | +| `src/cc.rs` | Congruence closure (union-find) | +| `src/id.rs` | Luid/Slid identity system | +| `src/universe.rs` | Global element registry | +| `examples/geolog/` | 30+ example `.geolog` files | +| `tests/` | 25+ test files | +| `docs/` | ARCHITECTURE.md, SYNTAX.md | +| `proofs/` | Lean4 formalization | +| `fuzz/` | Fuzzing targets | + +--- + +## Main Components + +### Parsing & Syntax (~1,200 lines) + +- `lexer.rs` — tokenization +- `parser.rs` — token stream → AST +- `ast.rs` — AST types (Theory, Instance, Axiom, etc.) +- `error.rs` — error formatting with source spans +- `pretty.rs` — Core → Geolog source roundtrip printing + +### Elaboration (~2,200 lines) + +- `elaborate/mod.rs` — coordination +- `elaborate/theory.rs` — AST Theory → Core ElaboratedTheory +- `elaborate/instance.rs` — AST Instance → Core Structure +- `elaborate/env.rs` — environment with theory registry +- `elaborate/types.rs` — type expression evaluation +- `elaborate/error.rs` — type error reporting + +### Core Representation + +- `core.rs` — DerivedSort, Signature, Structure, Formula, Term, Sequent +- `id.rs` — Luid (global unique ID) and Slid (structure-local ID) +- `universe.rs` — global element registry with UUID ↔ Luid mapping +- `naming.rs` — bidirectional name ↔ Luid mapping + +### Storage Layer (~1,500 lines) + +- `store/mod.rs` — main Store struct +- `store/schema.rs` — cached sort/function/relation IDs +- `store/append.rs` — low-level element append operations +- `store/theory.rs` — theory CRUD +- `store/instance.rs` — instance CRUD +- `store/commit.rs` — git-like version control +- `store/materialize.rs` — indexed views for fast lookups + +### Query & Compilation (~3,500 lines) + +- `query/compile.rs` — Query → RelAlgIR plan compilation +- `query/to_relalg.rs` — Query → Relational Algebra IR +- `query/from_relalg.rs` — RelAlgIR → Executable QueryOp +- `query/chase.rs` — chase algorithm for fixpoint computation +- `query/backend.rs` — naive QueryOp executor +- `query/optimize.rs` — algebraic law rewriting + +### Solver & Model Enumeration (~1,300 lines) + +- `solver/mod.rs` — unified model enumeration API +- `solver/tree.rs` — explicit search tree for partial models +- `solver/tactics.rs` — automated search strategies: + - CheckTactic: axiom validation + - ForwardChainingTactic: Datalog-style inference + - PropagateEquationsTactic: congruence closure + - AutoTactic: composite fixpoint solver +- `solver/types.rs` — SearchNode, Obligation, NodeStatus types + +### Tensor Algebra (~2,600 lines) + +- `tensor/expr.rs` — lazy tensor expression trees +- `tensor/sparse.rs` — sparse tensor storage (RoaringBitmap-based) +- `tensor/builder.rs` — expression builders +- `tensor/compile.rs` — Formula → TensorExpr compilation +- `tensor/check.rs` — axiom checking via tensor evaluation + +--- + +## Key Entry Points + +1. **CLI**: `src/bin/geolog.rs` + ``` + Usage: geolog [-d ] [source_files...] + ``` + +2. **Parse Entry**: `src/lib.rs` exports `parse(input: &str) → Result` + +3. **REPL State**: `src/repl.rs` — `ReplState::process_line()` + +4. **Theory Elaboration**: `elaborate/theory.rs::elaborate_theory()` + +5. **Instance Elaboration**: `elaborate/instance.rs::elaborate_instance_ctx()` + +6. **Chase Algorithm**: `query/chase.rs::chase_fixpoint_with_cc()` + +7. **Model Enumeration**: `solver/mod.rs::enumerate_models()` + +--- + +## Design Decisions + +### Geometric Logic Foundation + +- **Axioms as Sequents**: `forall vars. premises |- conclusion` +- **Positive Conclusions**: Can have existentials, disjunctions, but never negations +- **Geometric Morphisms**: Preserved by design, enabling category-theoretic semantics + +### Identity System + +- **Luid** ("Local Universe ID"): Globally unique across all structures +- **Slid** ("Structure-Local ID"): Index within a single structure +- Bidirectional mapping enables persistent identity despite structure changes + +### Append-Only Storage + +- **GeologMeta**: Single homoiconic theory instance storing all data +- **Patch-based Versioning**: Each commit is a delta from parent +- **Never Delete**: Elements only tombstoned for perfect audit trails + +### Type System + +- **Postfix Application**: `x f` not `f(x)` — categorical style +- **Derived Sorts**: Products of base sorts for record domains +- **Product Domains**: Functions can take record arguments: `[x: M, y: M] -> M` +- **Relations → Prop**: Relations are functions to `Prop` (boolean predicates) + +### Chase Algorithm + +- **Fixpoint Iteration**: Derives all consequences until closure +- **Congruence Closure Integration**: Merges elements when axioms conclude `x = y` +- **Termination for Unit Laws**: Categories with unit laws no longer loop forever +- Uses tensor algebra for efficient axiom checking + +### Solver Architecture + +- **Explicit Search Tree**: Not implicit in call stack (AI-friendly for agent control) +- **Refinement Preorder**: Structures can grow (carriers, functions, relations) +- **Obligations vs Unsat**: Axiom obligation = need to witness conclusion (NOT failure) +- **True Unsat**: Only when deriving `⊢ False` from instantiated axioms +- **Tactics-based**: AutoTactic composes multiple tactics + +### Relational Algebra Compilation + +- **QueryOp Intermediate**: SQL-like operators (Scan, Filter, Join, Project, etc.) +- **Optimization Passes**: Filter fusion, projection pushdown +- **Store-aware**: Compiled directly to GeologMeta queries with indexing + +### Tensor Algebra for Axiom Checking + +- **Sparse Representation**: Roaring Bitmaps for efficient membership +- **Lazy Expression Trees**: Tensor products fused with contractions +- **Boolean Semiring**: AND for product, OR for sum + +--- + +## REPL Commands + +``` +:list, :inspect - Introspection +:add, :assert, :retract - Mutations +:query, :explain, :compile - Query analysis +:chase, :solve, :extend - Inference +:commit, :history - Version control +:source - Load programs +:help - Show help +``` + +--- + +## Parameterized Theories + +Theories can be parameterized by other instances: + +```geolog +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} +``` + +This enables rich type-theoretic modeling (e.g., Petri net reachability as dependent types). + +--- + +## Testing Infrastructure + +- **Property-based tests** (`proptest`): naming, overlay, patches, queries, structure, tensor, universe, solver +- **Unit tests**: parsing, elaboration, meta, pretty-printing, relations, version control, workspace +- **Integration tests**: 30+ `.geolog` example files +- **Fuzzing**: `fuzz/` directory with parser and REPL fuzzing targets + +--- + +## Project Status + +**Version**: 0.1.0 (Early production) + +### Completed + +- Core geometric logic implementation +- Parser, elaborator, and core IR +- Chase algorithm with equality saturation +- Solver with SMT-like model enumeration +- Persistence and version control +- Comprehensive test coverage + +### Active Development + +- Nested instance elaboration +- Homoiconic query plan representation +- Disjunction variable alignment for tensor builder +- Lean4 formalization of monotonic submodel proofs + +--- + +## Key Files Reference + +| File | Line Count (approx) | Description | +|------|---------------------|-------------| +| `src/core.rs` | ~800 | Core type definitions | +| `src/parser.rs` | ~600 | Parser implementation | +| `src/repl.rs` | ~1000 | REPL state machine | +| `src/query/chase.rs` | ~500 | Chase algorithm | +| `src/solver/mod.rs` | ~400 | Model enumeration API | +| `src/tensor/sparse.rs` | ~600 | Sparse tensor storage | +| `src/store/mod.rs` | ~400 | Storage coordination | diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee19784 --- /dev/null +++ b/README.md @@ -0,0 +1,1314 @@ +# Geolog + +> This README was synthesized automatically by Claude Opus 4.5. +> As was this entire project, really. + +**Geometric Logic REPL** - A language and runtime for formal specifications using geometric logic. + +Geolog aims to provide a highly customizable, efficient, concurrent, append-only, persistent memory and query infrastructure for everything from business process workflow orchestration to formal verification via diagrammatic rewriting. + +## Quick Start + +```bash +~/dev/geolog$ cargo install --path . + Compiling geolog v0.1.0 (/home/dev/geolog) + Finished release [optimized] target(s) in 12.34s + Installing ~/.cargo/bin/geolog + Installed package `geolog v0.1.0` (executable `geolog`) + +# Session 1: Define a theory +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + reachable : [from: V, to: V] -> Prop; + + ax/edge : forall e : E. |- [from: e src, to: e tgt] reachable; + ax/trans : forall x,y,z : V. + [from: x, to: y] reachable, [from: y, to: z] reachable + |- [from: x, to: z] reachable; +} +Defined theory Graph (2 sorts, 2 functions, 1 relations, 2 axioms) + +geolog> :quit +Goodbye! + +# Session 2: Create an instance with chase (theory auto-persisted!) +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> instance G : Graph = chase { + a, b, c : V; + e1, e2 : E; + e1 src = a; e1 tgt = b; + e2 src = b; e2 tgt = c; +} +Defined instance G : Graph (5 elements) + +geolog> :inspect G +instance G : Graph = { + // V (3): + a : V; + b : V; + c : V; + // E (2): + e1 : E; + e2 : E; + // src: + e1 src = a; + e2 src = b; + // tgt: + e1 tgt = b; + e2 tgt = c; + // reachable (3 tuples): + [from: a, to: b] reachable; + [from: b, to: c] reachable; + [from: a, to: c] reachable; +} + +geolog> :quit +Goodbye! + +# Session 3: Everything persisted automatically! +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> :list +Theories: + Graph (2 sorts, 2 functions, 1 relations, 2 axioms) +Instances: + G : Graph (5 elements) + +geolog> :inspect G +instance G : Graph = { + // V (3): + a : V; + b : V; + c : V; + // E (2): + e1 : E; + e2 : E; + // src: + e1 src = a; + e2 src = b; + // tgt: + e1 tgt = b; + e2 tgt = c; + // reachable (3 tuples): + [from: a, to: b] reachable; + [from: b, to: c] reachable; + [from: a, to: c] reachable; +} + +# Category theory with equality saturation +~/dev/geolog$ geolog examples/geolog/category.geolog +geolog> :show Arrow +instance Arrow : Category = { + // ob (2): + A : ob; + B : ob; + // mor (3): + f : mor; + #3 : mor; + #4 : mor; + // src: + f src = A; + #3 src = A; + #4 src = B; + // tgt: + f tgt = B; + #3 tgt = A; + #4 tgt = B; + // comp (4 tuples): + [f: f, g: #4, h: f] comp; + [f: #3, g: f, h: f] comp; + [f: #3, g: #3, h: #3] comp; + [f: #4, g: #4, h: #4] comp; + // id (2 tuples): + [a: A, f: #3] id; + [a: B, f: #4] id; +} +``` + +The `Arrow` instance declares only objects A, B and one morphism f : A → B. +The chase derives identity morphisms (#3 = idA, #4 = idB) and all compositions, +while **equality saturation** collapses infinite self-compositions via unit laws: +- `[f: #3, g: f, h: f]` means idA;f = f (left unit) +- `[f: f, g: #4, h: f]` means f;idB = f (right unit) +- `[f: #3, g: #3, h: #3]` means idA;idA = idA (collapsed by unit law) + +## Features + +- **Theories**: Define sorts (types), functions, relations, and axioms +- **Instances**: Create concrete models of theories +- **Parameterized Theories**: Theories can depend on instances of other theories +- **Nested Instances**: Inline instance definitions within instances +- **Relations**: Binary and n-ary predicates with product domains +- **Axioms**: Geometric sequents, automatically checked with tensor algebra +- **Chase Algorithm**: Automatic inference of derived facts +- **Interactive REPL**: Explore and modify instances dynamically +- **Version Control**: Commit and track changes to instances + +--- + +## Showcase: Petri Net Reachability as Dependent Types + +This showcase demonstrates geolog's core capabilities through a non-trivial domain: +encoding Petri net reachability as dependent types. A solution to a reachability +problem is NOT a yes/no boolean but a **constructive witness**: a diagrammatic proof +that tokens can flow from initial to target markings via a sequence of transition firings. + +**Key concepts demonstrated:** +- Parameterized theories (`Marking` depends on `PetriNet` instance) +- Nested instance types (`ReachabilityProblem` contains `Marking` instances) +- Sort-parameterized theories (`Iso` takes two sorts as parameters) +- Cross-instance references (solution's trace elements reference problem's tokens) + +> **Note**: This showcase is tested by `cargo test test_petri_net_showcase` and +> matches `examples/geolog/petri_net_showcase.geolog` exactly. + +### The Type-Theoretic Encoding + +```geolog +// ============================================================ +// THEORY: PetriNet - Places, transitions, and arcs +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace records transition firings and token flow via wires +// ============================================================ + +theory (N : PetriNet instance) Trace { + F : Sort; // Firings + F/of : F -> N/T; // Which transition each firing corresponds to + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence axioms (source/target arcs must match firing transitions) + ax/wire_src_coherent : forall w : W. |- w W/src_arc N/out/src = w W/src_firing F/of; + ax/wire_tgt_coherent : forall w : W. |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + ax/wire_place_coherent : forall w : W. |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals connect initial/target markings to firings + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: every input arc must be fed by a wire or input terminal + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured by a wire or output terminal + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// Isomorphism (bijection) between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A constructive witness that target is reachable from initial +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + // Bijection: input terminals <-> initial marking tokens + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection: output terminals <-> target marking tokens + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; +} +``` + +### Problem 0: Can we reach B from A with one token? + +```geolog +// ============================================================ +// The Petri Net: +// +---[ba]----+ +// v | +// (A) --[ab]->(B) --+ +// | | +// +----[abc]-------+--> (C) +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + // A -> B (via ab) + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + // A + B -> C (via abc) - note: two input arcs! + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// Initial: 1 token in A, Target: 1 token in B +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// SOLUTION 0: Yes! Fire transition 'ab' once. +// ============================================================ + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + f1 : F; + f1 F/of = ExampleNet/ab; + + // Input terminal feeds A-token into f1's ab_in arc + it : input_terminal; + it input_terminal/of = ExampleNet/A; + it input_terminal/tgt_firing = f1; + it input_terminal/tgt_arc = ExampleNet/ab_in; + + // Output terminal captures f1's B-token via ab_out arc + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + ot output_terminal/src_firing = f1; + ot output_terminal/src_arc = ExampleNet/ab_out; + }; + + initial_iso = { + trace/it fwd = problem0/initial_marking/tok; + problem0/initial_marking/tok bwd = trace/it; + }; + + target_iso = { + trace/ot fwd = problem0/target_marking/tok; + problem0/target_marking/tok bwd = trace/ot; + }; +} +``` + +### Problem 2: Can we reach C from two A-tokens? + +This is a more interesting case: the only path to C is via `abc`, which requires +tokens in BOTH A and B simultaneously. Starting with 2 tokens in A, we must +first move one to B, then fire `abc`. + +```geolog +// Initial: 2 tokens in A, Target: 1 token in C +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// SOLUTION 2: Fire 'ab' then 'abc'. +// +// Token flow diagram: +// [it1]--A-->[f1: ab]--B--wire-->[f2: abc]--C-->[ot] +// [it2]--A-----------------^ +// +// Step 1: Fire 'ab' to move one token A -> B +// Step 2: Fire 'abc' consuming one A-token and one B-token +// ============================================================ + +instance solution2 : ExampleNet problem2 Solution = { + trace = { + // Two firings + f1 : F; f1 F/of = ExampleNet/ab; // First: A -> B + f2 : F; f2 F/of = ExampleNet/abc; // Second: A + B -> C + + // Wire connecting f1's B-output to f2's B-input + w1 : W; + w1 W/src_firing = f1; + w1 W/src_arc = ExampleNet/ab_out; + w1 W/tgt_firing = f2; + w1 W/tgt_arc = ExampleNet/abc_in2; + + // Input terminal 1: feeds first A-token into f1 + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + // Input terminal 2: feeds second A-token into f2 + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal: captures f2's C-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + }; + + // Bijection: 2 input terminals <-> 2 initial tokens + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + // Bijection: 1 output terminal <-> 1 target token + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} +``` + +Each `Solution` instance is a **constructive diagrammatic proof**: +- The trace contains firing(s) of specific transitions +- Input terminals witness that initial tokens feed into firings +- Output terminals witness that firings produce target tokens +- The isomorphisms prove the token counts match exactly + +--- + +## Table of Contents + +1. [Basic Concepts](#basic-concepts) +2. [Theory Definitions](#theory-definitions) +3. [Instance Definitions](#instance-definitions) +4. [Relations and Axioms](#relations-and-axioms) +5. [The Chase Algorithm](#the-chase-algorithm) +6. [REPL Commands](#repl-commands) +7. [Complete Examples](#complete-examples) + +--- + +## Basic Concepts + +Geolog is based on **geometric logic**, a fragment of first-order logic that: +- Allows existential quantification in conclusions +- Allows disjunctions in conclusions +- Is preserved by geometric morphisms (structure-preserving maps) + +A **theory** defines: +- **Sorts**: Types of elements +- **Function symbols**: Function-typed variables with domain and codomain derived from sorts +- **Relation symbols**: Predicate-typed variables with domain derived from sorts, and codomain `-> Prop` +- **Axioms**: Geometric sequents (first universal quantifiers, then an implication between two propositions which are then purely positive) + +An **instance** is a concrete finite model, which means it assigns to each sort a finite set, to each function a finite function, and to each relation a Boolean-valued tensor, such that all axioms evaluate to true. + +--- + +## Theory Definitions + +### Simple Theory with Sorts and Functions + +```geolog +// Directed Graph: vertices and edges with source/target functions +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} +``` + +### Theory with Product Domain Functions + +```geolog +// Monoid: a set with an associative binary operation +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element + id : M -> M; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; +} +``` + +### REPL Session: Defining a Theory Inline + +``` +geolog> theory Counter { +...... C : Sort; +...... next : C -> C; +...... } +Defined theory Counter (1 sorts, 1 functions) + +geolog> :inspect Counter +theory Counter { + C : Sort; + next : C -> C; +} +``` + +--- + +## Instance Definitions + +### Basic Instance + +```geolog +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + // Vertices + A : V; + B : V; + C : V; + + // Edges + ab : E; + bc : E; + ca : E; + + // Edge endpoints (function definitions) + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} +``` + +### Instance with Product Domain Functions + +```geolog +// Boolean "And" monoid: {T, F} with T as identity +instance BoolAnd : Monoid = { + T : M; + F : M; + + // Identity: T is the identity element + T id = T; + F id = T; + + // Multiplication table for "and": + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} +``` + +### REPL Session: Loading and Inspecting + +``` +geolog> :source examples/geolog/graph.geolog +Loading examples/geolog/graph.geolog... +Defined theory Graph (2 sorts, 2 functions) + +geolog> :list +Theories: + Graph (2 sorts, 2 functions) +Instances: + Diamond : Graph (8 elements) + Arrow : Graph (3 elements) + Loop : Graph (2 elements) + Triangle : Graph (6 elements) + +geolog> :inspect Triangle +instance Triangle : Graph = { + // V (3): + A : V; + B : V; + C : V; + // E (3): + ab : E; + bc : E; + ca : E; + // src: + ab src = A; + bc src = B; + ca src = C; + // tgt: + ab tgt = B; + bc tgt = C; + ca tgt = A; +} + +geolog> :query Triangle V +Elements of V in Triangle: + A + B + C +``` + +--- + +## Relations and Axioms + +Relations are predicates on sorts, declared with `-> Prop`. + +### Unary Relations + +```geolog +theory TodoList { + Item : Sort; + + // Unary relations use simple arrow syntax + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; +} +``` + +### Binary Relations + +```geolog +theory Preorder { + X : Sort; + + // Binary relation: x ≤ y (field names document the relation) + leq : [lo: X, hi: X] -> Prop; + + // Reflexivity axiom: x ≤ x + ax/refl : forall x : X. + |- [lo: x, hi: x] leq; + + // Transitivity axiom: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [lo: x, hi: y] leq, [lo: y, hi: z] leq |- [lo: x, hi: z] leq; +} +``` + +### Asserting Relation Tuples in Instances + +```geolog +instance SampleTodos : TodoList = { + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Assert unary relation: buy_groceries is completed + buy_groceries completed; + + // Assert unary relation: cook_dinner is high priority + cook_dinner high_priority; + + // Binary relation using mixed positional/named syntax: + // First positional arg maps to 'item' field, named arg for 'on' + [cook_dinner, on: buy_groceries] depends; +} +``` + +### REPL Session: Asserting Relations Dynamically + +``` +geolog> :source examples/geolog/todo_list.geolog +Loading examples/geolog/todo_list.geolog... +Defined theory TodoList (1 sorts, 4 relations) + +geolog> :inspect SampleTodos +instance SampleTodos : TodoList = { + // Item (4): + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + // completed (1 tuples): + [buy_groceries] completed; + // high_priority (1 tuples): + [cook_dinner] high_priority; + // depends (1 tuples): + [cook_dinner, buy_groceries] depends; +} + +geolog> :assert SampleTodos completed cook_dinner +Asserted completed(cook_dinner) in instance 'SampleTodos' + +geolog> :inspect SampleTodos +instance SampleTodos : TodoList = { + // Item (4): + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + // completed (2 tuples): + [buy_groceries] completed; + [cook_dinner] completed; + // high_priority (1 tuples): + [cook_dinner] high_priority; + // depends (1 tuples): + [cook_dinner, buy_groceries] depends; +} +``` + +--- + +## The Chase Algorithm + +The **chase algorithm** computes the closure of an instance under the theory's axioms. It derives all facts that logically follow from the base facts and axioms. + +### Transitive Closure Example + +```geolog +// Graph with reachability (transitive closure) +theory Graph { + V : Sort; + + // Direct edges + Edge : [src: V, tgt: V] -> Prop; + + // Reachability (transitive closure of Edge) + Path : [src: V, tgt: V] -> Prop; + + // Base case: every edge is a path + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Inductive case: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// A linear chain: a -> b -> c -> d +// Using `= chase { ... }` to automatically apply axioms during elaboration. +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + // Initial edges (chase derives Path tuples) + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} +``` + +### REPL Session: Running the Chase + +When using `= chase { ... }` syntax, the chase runs automatically during elaboration: + +``` +geolog> :source examples/geolog/transitive_closure.geolog +Loading examples/geolog/transitive_closure.geolog... +Defined theory Graph (1 sorts, 2 relations) +Defined instance Chain : Graph (4 elements) [chase: 6 Path tuples derived] + +geolog> :inspect Chain +instance Chain : Graph = { + // V (4): + a : V; + b : V; + c : V; + d : V; + // Edge (3 tuples): + [a, b] Edge; + [b, c] Edge; + [c, d] Edge; + // Path (6 tuples): + [a, b] Path; + [b, c] Path; + [c, d] Path; + [a, c] Path; // Derived: a->b + b->c + [b, d] Path; // Derived: b->c + c->d + [a, d] Path; // Derived: a->c + c->d (or a->b + b->d) +} +``` + +You can also run chase manually with `:chase` on non-chase instances: + +``` +geolog> :chase MyInstance +Running chase on instance 'MyInstance' (theory 'Graph')... +✓ Chase completed in 3 iterations (0.15ms) +``` + +The chase derived: +- **3 base paths** from the Edge → Path axiom +- **2 one-step transitive paths**: (a,c) and (b,d) +- **1 two-step transitive path**: (a,d) + +--- + +## REPL Commands + +### General Commands + +| Command | Description | +|---------|-------------| +| `:help [topic]` | Show help (topics: syntax, examples) | +| `:quit` | Exit the REPL | +| `:list [target]` | List theories/instances | +| `:inspect ` | Show details of a theory or instance | +| `:source ` | Load and execute a .geolog file | +| `:clear` | Clear the screen | +| `:reset` | Reset all state | + +### Instance Mutation + +| Command | Description | +|---------|-------------| +| `:add ` | Add element to instance | +| `:assert [args]` | Assert relation tuple | +| `:retract ` | Retract element | + +### Query Commands + +| Command | Description | +|---------|-------------| +| `:query ` | List all elements of a sort | +| `:explain ` | Show query execution plan | +| `:compile ` | Show RelAlgIR compilation | +| `:chase [max_iter]` | Run chase algorithm | + +### Version Control + +| Command | Description | +|---------|-------------| +| `:commit [msg]` | Commit current changes | +| `:history` | Show commit history | + +### Solver + +| Command | Description | +|---------|-------------| +| `:solve [budget_ms]` | Find model of theory | +| `:extend [budget_ms]` | Extend instance to theory | + +### REPL Session: Query Explanation + +``` +geolog> :source examples/geolog/graph.geolog +Loading examples/geolog/graph.geolog... +Defined theory Graph (2 sorts, 2 functions) + +geolog> :explain Triangle V +Query plan for ':query Triangle V': + +Scan(sort=0) + +Sort: V (index 0) +Instance: Triangle (theory: Graph) + +geolog> :explain Triangle E +Query plan for ':query Triangle E': + +Scan(sort=1) + +Sort: E (index 1) +Instance: Triangle (theory: Graph) +``` + +--- + +## Complete Examples + +### Example 1: Directed Graphs + +**File: `examples/geolog/graph.geolog`** + +```geolog +// Directed Graph: vertices and edges with source/target functions +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} + +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + A : V; + B : V; + C : V; + + ab : E; + bc : E; + ca : E; + + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} + +// A self-loop: one vertex with an edge to itself +instance Loop : Graph = { + v : V; + e : E; + e src = v; + e tgt = v; +} + +// Diamond shape with two paths from top to bottom +instance Diamond : Graph = { + top : V; + left : V; + right : V; + bottom : V; + + top_left : E; + top_right : E; + left_bottom : E; + right_bottom : E; + + top_left src = top; + top_left tgt = left; + top_right src = top; + top_right tgt = right; + left_bottom src = left; + left_bottom tgt = bottom; + right_bottom src = right; + right_bottom tgt = bottom; +} +``` + +--- + +### Example 2: Algebraic Structures (Monoids) + +**File: `examples/geolog/monoid.geolog`** + +```geolog +// Monoid: a set with an associative binary operation and identity +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element selector + id : M -> M; + + // Left identity: id(x) * y = y + ax/left_id : forall x : M, y : M. + |- [x: x id, y: y] mul = y; + + // Right identity: x * id(y) = x + ax/right_id : forall x : M, y : M. + |- [x: x, y: y id] mul = x; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; +} + +// Trivial monoid: single element +instance Trivial : Monoid = { + e : M; + [x: e, y: e] mul = e; + e id = e; +} + +// Boolean "And" monoid +instance BoolAnd : Monoid = { + T : M; + F : M; + + T id = T; + F id = T; + + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} + +// Boolean "Or" monoid +instance BoolOr : Monoid = { + T : M; + F : M; + + T id = F; + F id = F; + + [x: T, y: T] mul = T; + [x: T, y: F] mul = T; + [x: F, y: T] mul = T; + [x: F, y: F] mul = F; +} +``` + +--- + +### Example 3: Preorders with Chase + +**File: `examples/geolog/preorder.geolog`** + +```geolog +// Preorder: reflexive and transitive relation +theory Preorder { + X : Sort; + + // The ordering relation: x ≤ y + leq : [x: X, y: X] -> Prop; + + // Reflexivity: x ≤ x + ax/refl : forall x : X. + |- [x: x, y: x] leq; + + // Transitivity: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; +} + +// Discrete preorder: only reflexive pairs +// Uses `chase` to automatically derive reflexive pairs from ax/refl. +instance Discrete3 : Preorder = chase { + a : X; + b : X; + c : X; +} + +// A total order on 3 elements: bot ≤ mid ≤ top +instance Chain3 : Preorder = chase { + bot : X; + mid : X; + top : X; + + [x: bot, y: mid] leq; + [x: mid, y: top] leq; + // Chase derives: (bot,bot), (mid,mid), (top,top) + (bot,top) +} +``` + +**REPL Session:** + +``` +geolog> :source examples/geolog/preorder.geolog +Defined theory Preorder (1 sorts, 1 relations) +Defined instance Discrete3 : Preorder (3 elements) [chase: 3 leq tuples derived] +Defined instance Chain3 : Preorder (3 elements) [chase: 6 leq tuples derived] + +geolog> :inspect Discrete3 + leq: 3 tuple(s) // (a,a), (b,b), (c,c) - reflexivity only + +geolog> :inspect Chain3 + leq: 6 tuple(s) // reflexive pairs + given + transitive (bot,top) +``` + +--- + +### Example 4: Task Management + +**File: `examples/geolog/todo_list.geolog`** + +```geolog +// TodoList: relational model for task tracking +theory TodoList { + Item : Sort; + + // Status relations (unary, simple arrow syntax) + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; + + // Dependencies (binary, with named fields) + depends : [item: Item, on: Item] -> Prop; + + // Axiom: blocked items depend on incomplete items + ax/dep_blocked : forall x : Item, y : Item. + [item: x, on: y] depends |- x blocked \/ y completed; +} + +instance SampleTodos : TodoList = { + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Unary relations: simple syntax + buy_groceries completed; + cook_dinner high_priority; + + // Binary relation: mixed positional/named syntax + // First positional arg -> 'item', named arg for 'on' + [cook_dinner, on: buy_groceries] depends; +} +``` + +--- + +### Example 5: Transitive Closure (Chase Demo) + +**File: `examples/geolog/transitive_closure.geolog`** + +```geolog +// Transitive Closure - demonstrates the chase algorithm +theory Graph { + V : Sort; + + Edge : [src: V, tgt: V] -> Prop; + Path : [src: V, tgt: V] -> Prop; + + // Base: edges are paths + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Transitivity: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// Linear chain: a -> b -> c -> d (chase runs automatically) +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} + +// Diamond: two paths from top to bottom +instance Diamond : Graph = chase { + top : V; + left : V; + right : V; + bottom : V; + + [src: top, tgt: left] Edge; + [src: top, tgt: right] Edge; + [src: left, tgt: bottom] Edge; + [src: right, tgt: bottom] Edge; +} + +// Cycle: x -> y -> z -> x (chase computes all 9 pairs!) +instance Cycle : Graph = chase { + x : V; + y : V; + z : V; + + [src: x, tgt: y] Edge; + [src: y, tgt: z] Edge; + [src: z, tgt: x] Edge; +} +``` + +**REPL Session** (chase runs during `:source`): + +``` +geolog> :source examples/geolog/transitive_closure.geolog +Defined theory Graph (1 sorts, 2 relations) +Defined instance Chain : Graph (4 elements) [chase: 6 Path tuples] +Defined instance Diamond : Graph (4 elements) [chase: 5 Path tuples] +Defined instance Cycle : Graph (3 elements) [chase: 9 Path tuples] +``` + +--- + +### Example 6: Inline Definitions + +You can define theories and instances directly in the REPL: + +``` +geolog> theory Counter { +...... C : Sort; +...... next : C -> C; +...... } +Defined theory Counter (1 sorts, 1 functions) + +geolog> instance Mod3 : Counter = { +...... zero : C; +...... one : C; +...... two : C; +...... zero next = one; +...... one next = two; +...... two next = zero; +...... } +Defined instance Mod3 : Counter (3 elements) + +geolog> :inspect Mod3 +instance Mod3 : Counter = { + // C (3): + zero : C; + one : C; + two : C; + // next: + zero next = one; + one next = two; + two next = zero; +} +``` + +--- + +## Syntax Reference + +### Sorts +``` +identifier : Sort; +``` + +### Functions +``` +// Unary function +name : Domain -> Codomain; + +// Binary function (product domain) +name : [field1: Sort1, field2: Sort2] -> Codomain; +``` + +### Relations +``` +// Unary relation +name : [field: Sort] -> Prop; + +// Binary relation +name : [x: Sort1, y: Sort2] -> Prop; +``` + +### Axioms +``` +// No premises (fact) +name : forall vars. |- conclusion; + +// With premises +name : forall vars. premise1, premise2 |- conclusion; + +// With disjunction in conclusion +name : forall vars. premise |- conclusion1 \/ conclusion2; +``` + +### Instance Elements +``` +elem_name : Sort; +``` + +### Function Values +``` +// Unary +elem func = value; + +// Product domain +[field1: val1, field2: val2] func = value; +``` + +### Relation Assertions +``` +// Unary relation +elem relation; + +// Binary relation +[field1: val1, field2: val2] relation; +``` + +--- + +## Architecture + +> TODO: greatly expand this section + +Geolog is built with several key components: + +- **Parser**: Converts `.geolog` source to AST +- **Elaborator**: Type-checks and converts AST to core representations +- **Structure**: In-memory model representation with carriers and functions +- **Chase Engine**: Fixpoint computation for derived relations +- **Query Engine**: Relational algebra for querying instances +- **Store**: Persistent, append-only storage with version control + +--- + +## License + +MIT License - see LICENSE file for details. + +--- + +## Contributing + +Contributions welcome! See CLAUDE.md for development guidelines and the `loose_thoughts/` directory for design discussions. diff --git a/architecture.dot b/architecture.dot new file mode 100644 index 0000000..401f926 --- /dev/null +++ b/architecture.dot @@ -0,0 +1,227 @@ +digraph GeologArchitecture { + rankdir=TB; + compound=true; + fontname="Helvetica"; + node [fontname="Helvetica", shape=box, style="rounded,filled", fillcolor="#f0f0f0"]; + edge [fontname="Helvetica"]; + + label="Geolog Architecture"; + labelloc="t"; + fontsize=24; + + // User Interface Layer + subgraph cluster_ui { + label="User Interface"; + style="rounded,filled"; + fillcolor="#e3f2fd"; + + cli [label="CLI\n(bin/geolog.rs)", fillcolor="#bbdefb"]; + repl [label="REPL\n(repl.rs)", fillcolor="#bbdefb"]; + batch [label="Batch Loading\n(.geolog files)", fillcolor="#bbdefb"]; + } + + // Parsing Layer + subgraph cluster_parsing { + label="Parsing Layer"; + style="rounded,filled"; + fillcolor="#e8f5e9"; + + lexer [label="Lexer\n(lexer.rs)", fillcolor="#c8e6c9"]; + parser [label="Parser\n(parser.rs)", fillcolor="#c8e6c9"]; + ast [label="AST\n(ast.rs)", fillcolor="#c8e6c9"]; + error [label="Error Reporting\n(error.rs)\nariadne", fillcolor="#c8e6c9"]; + pretty [label="Pretty Printer\n(pretty.rs)", fillcolor="#c8e6c9"]; + } + + // Elaboration Layer + subgraph cluster_elaboration { + label="Elaboration Layer"; + style="rounded,filled"; + fillcolor="#fff3e0"; + + elab_theory [label="Theory Elaboration\n(elaborate/theory.rs)", fillcolor="#ffe0b2"]; + elab_instance [label="Instance Elaboration\n(elaborate/instance.rs)", fillcolor="#ffe0b2"]; + elab_env [label="Environment\n(elaborate/env.rs)", fillcolor="#ffe0b2"]; + elab_types [label="Type Evaluation\n(elaborate/types.rs)", fillcolor="#ffe0b2"]; + elab_error [label="Type Errors\n(elaborate/error.rs)", fillcolor="#ffe0b2"]; + } + + // Core Layer + subgraph cluster_core { + label="Core Layer"; + style="rounded,filled"; + fillcolor="#fce4ec"; + + core [label="Core IR\n(core.rs)\nSignature, Term,\nFormula, Structure", fillcolor="#f8bbd9"]; + id [label="Identity System\n(id.rs)\nLuid, Slid", fillcolor="#f8bbd9"]; + universe [label="Universe\n(universe.rs)\nUUID <-> Luid", fillcolor="#f8bbd9"]; + naming [label="Naming\n(naming.rs)\nName <-> Luid", fillcolor="#f8bbd9"]; + cc [label="Congruence Closure\n(cc.rs)\nUnion-Find", fillcolor="#f8bbd9"]; + } + + // Storage Layer + subgraph cluster_storage { + label="Storage Layer"; + style="rounded,filled"; + fillcolor="#e1f5fe"; + + store [label="Store\n(store/mod.rs)", fillcolor="#b3e5fc"]; + store_schema [label="Schema Cache\n(store/schema.rs)", fillcolor="#b3e5fc"]; + store_append [label="Append Operations\n(store/append.rs)", fillcolor="#b3e5fc"]; + store_theory [label="Theory CRUD\n(store/theory.rs)", fillcolor="#b3e5fc"]; + store_instance [label="Instance CRUD\n(store/instance.rs)", fillcolor="#b3e5fc"]; + store_commit [label="Version Control\n(store/commit.rs)", fillcolor="#b3e5fc"]; + store_materialize [label="Materialized Views\n(store/materialize.rs)", fillcolor="#b3e5fc"]; + geologmeta [label="GeologMeta\n(Homoiconic Store)", fillcolor="#81d4fa", style="rounded,filled,bold"]; + } + + // Query Layer + subgraph cluster_query { + label="Query & Compilation Layer"; + style="rounded,filled"; + fillcolor="#f3e5f5"; + + query_compile [label="Query Compiler\n(query/compile.rs)", fillcolor="#e1bee7"]; + query_relalg [label="Relational Algebra IR\n(query/to_relalg.rs)\n(query/from_relalg.rs)", fillcolor="#e1bee7"]; + query_chase [label="Chase Algorithm\n(query/chase.rs)\nFixpoint + CC", fillcolor="#ce93d8", style="rounded,filled,bold"]; + query_backend [label="Query Backend\n(query/backend.rs)", fillcolor="#e1bee7"]; + query_optimize [label="Optimizer\n(query/optimize.rs)", fillcolor="#e1bee7"]; + } + + // Solver Layer + subgraph cluster_solver { + label="Solver Layer"; + style="rounded,filled"; + fillcolor="#e0f2f1"; + + solver [label="Model Enumeration\n(solver/mod.rs)", fillcolor="#b2dfdb"]; + solver_tree [label="Search Tree\n(solver/tree.rs)", fillcolor="#b2dfdb"]; + solver_tactics [label="Tactics\n(solver/tactics.rs)\nCheck, Forward,\nPropagate, Auto", fillcolor="#80cbc4", style="rounded,filled,bold"]; + solver_types [label="Solver Types\n(solver/types.rs)", fillcolor="#b2dfdb"]; + } + + // Tensor Layer + subgraph cluster_tensor { + label="Tensor Algebra Layer"; + style="rounded,filled"; + fillcolor="#fff8e1"; + + tensor_expr [label="Tensor Expressions\n(tensor/expr.rs)", fillcolor="#ffecb3"]; + tensor_sparse [label="Sparse Storage\n(tensor/sparse.rs)\nRoaringBitmap", fillcolor="#ffe082", style="rounded,filled,bold"]; + tensor_builder [label="Expression Builder\n(tensor/builder.rs)", fillcolor="#ffecb3"]; + tensor_compile [label="Formula Compiler\n(tensor/compile.rs)", fillcolor="#ffecb3"]; + tensor_check [label="Axiom Checker\n(tensor/check.rs)", fillcolor="#ffecb3"]; + } + + // External Dependencies (simplified) + subgraph cluster_deps { + label="Key Dependencies"; + style="rounded,dashed"; + fillcolor="#fafafa"; + + chumsky [label="chumsky\n(parser combinators)", shape=ellipse, fillcolor="#e0e0e0"]; + rkyv [label="rkyv\n(zero-copy serde)", shape=ellipse, fillcolor="#e0e0e0"]; + roaring [label="roaring\n(bitmaps)", shape=ellipse, fillcolor="#e0e0e0"]; + unionfind [label="egglog-union-find", shape=ellipse, fillcolor="#e0e0e0"]; + } + + // Data Flow Edges + + // UI to Parsing + cli -> repl; + batch -> repl; + repl -> lexer [lhead=cluster_parsing]; + + // Parsing flow + lexer -> parser; + parser -> ast; + ast -> error [style=dashed, label="errors"]; + ast -> pretty [style=dashed, label="roundtrip"]; + + // Parsing to Elaboration + ast -> elab_theory; + ast -> elab_instance; + + // Elaboration internal + elab_theory -> elab_env; + elab_instance -> elab_env; + elab_env -> elab_types; + elab_types -> elab_error [style=dashed]; + + // Elaboration to Core + elab_theory -> core; + elab_instance -> core; + + // Core internal + core -> id; + id -> universe; + id -> naming; + core -> cc; + + // Core to Storage + core -> store [lhead=cluster_storage]; + + // Storage internal + store -> store_schema; + store -> store_append; + store -> store_theory; + store -> store_instance; + store -> store_commit; + store -> store_materialize; + store_append -> geologmeta; + store_theory -> geologmeta; + store_instance -> geologmeta; + store_commit -> geologmeta; + store_materialize -> geologmeta; + + // Query layer connections + repl -> query_compile [label="queries"]; + query_compile -> query_relalg; + query_relalg -> query_optimize; + query_optimize -> query_backend; + query_backend -> store [label="execute"]; + + // Chase + repl -> query_chase [label=":chase"]; + query_chase -> cc [label="equality\nsaturation"]; + query_chase -> store; + query_chase -> tensor_check [label="axiom\nchecking"]; + + // Solver connections + repl -> solver [label=":solve\n:query"]; + solver -> solver_tree; + solver_tree -> solver_tactics; + solver_tactics -> solver_types; + solver_tactics -> query_chase [label="forward\nchaining"]; + solver_tactics -> cc [label="propagate\nequations"]; + solver_tactics -> tensor_check [label="check\naxioms"]; + solver -> store; + + // Tensor internal + tensor_compile -> tensor_expr; + tensor_expr -> tensor_builder; + tensor_builder -> tensor_sparse; + tensor_check -> tensor_compile; + tensor_sparse -> core [label="read\nstructure"]; + + // Dependencies + lexer -> chumsky [style=dotted]; + parser -> chumsky [style=dotted]; + store -> rkyv [style=dotted]; + tensor_sparse -> roaring [style=dotted]; + cc -> unionfind [style=dotted]; + + // Legend + subgraph cluster_legend { + label="Legend"; + style="rounded"; + fillcolor="white"; + + legend_data [label="Data Flow", shape=plaintext]; + legend_dep [label="Dependency", shape=plaintext]; + legend_key [label="Key Component", fillcolor="#80cbc4", style="rounded,filled,bold"]; + + legend_data -> legend_dep [style=invis]; + legend_dep -> legend_key [style=invis]; + } +} diff --git a/architecture.svg b/architecture.svg new file mode 100644 index 0000000..8179b3b --- /dev/null +++ b/architecture.svg @@ -0,0 +1,770 @@ + + + + + + +GeologArchitecture + +Geolog Architecture + +cluster_ui + +User Interface + + +cluster_parsing + +Parsing Layer + + +cluster_elaboration + +Elaboration Layer + + +cluster_core + +Core Layer + + +cluster_storage + +Storage Layer + + +cluster_query + +Query & Compilation Layer + + +cluster_solver + +Solver Layer + + +cluster_tensor + +Tensor Algebra Layer + + +cluster_deps + +Key Dependencies + + +cluster_legend + +Legend + + + +cli + +CLI +(bin/geolog.rs) + + + +repl + +REPL +(repl.rs) + + + +cli->repl + + + + + +lexer + +Lexer +(lexer.rs) + + + +repl->lexer + + + + + +query_compile + +Query Compiler +(query/compile.rs) + + + +repl->query_compile + + +queries + + + +query_chase + +Chase Algorithm +(query/chase.rs) +Fixpoint + CC + + + +repl->query_chase + + +:chase + + + +solver + +Model Enumeration +(solver/mod.rs) + + + +repl->solver + + +:solve +:query + + + +batch + +Batch Loading +(.geolog files) + + + +batch->repl + + + + + +parser + +Parser +(parser.rs) + + + +lexer->parser + + + + + +chumsky + +chumsky +(parser combinators) + + + +lexer->chumsky + + + + + +ast + +AST +(ast.rs) + + + +parser->ast + + + + + +parser->chumsky + + + + + +error + +Error Reporting +(error.rs) +ariadne + + + +ast->error + + +errors + + + +pretty + +Pretty Printer +(pretty.rs) + + + +ast->pretty + + +roundtrip + + + +elab_theory + +Theory Elaboration +(elaborate/theory.rs) + + + +ast->elab_theory + + + + + +elab_instance + +Instance Elaboration +(elaborate/instance.rs) + + + +ast->elab_instance + + + + + +elab_env + +Environment +(elaborate/env.rs) + + + +elab_theory->elab_env + + + + + +core + +Core IR +(core.rs) +Signature, Term, +Formula, Structure + + + +elab_theory->core + + + + + +elab_instance->elab_env + + + + + +elab_instance->core + + + + + +elab_types + +Type Evaluation +(elaborate/types.rs) + + + +elab_env->elab_types + + + + + +elab_error + +Type Errors +(elaborate/error.rs) + + + +elab_types->elab_error + + + + + +id + +Identity System +(id.rs) +Luid, Slid + + + +core->id + + + + + +cc + +Congruence Closure +(cc.rs) +Union-Find + + + +core->cc + + + + + +store + +Store +(store/mod.rs) + + + +core->store + + + + + +universe + +Universe +(universe.rs) +UUID <-> Luid + + + +id->universe + + + + + +naming + +Naming +(naming.rs) +Name <-> Luid + + + +id->naming + + + + + +unionfind + +egglog-union-find + + + +cc->unionfind + + + + + +store_schema + +Schema Cache +(store/schema.rs) + + + +store->store_schema + + + + + +store_append + +Append Operations +(store/append.rs) + + + +store->store_append + + + + + +store_theory + +Theory CRUD +(store/theory.rs) + + + +store->store_theory + + + + + +store_instance + +Instance CRUD +(store/instance.rs) + + + +store->store_instance + + + + + +store_commit + +Version Control +(store/commit.rs) + + + +store->store_commit + + + + + +store_materialize + +Materialized Views +(store/materialize.rs) + + + +store->store_materialize + + + + + +rkyv + +rkyv +(zero-copy serde) + + + +store->rkyv + + + + + +geologmeta + +GeologMeta +(Homoiconic Store) + + + +store_append->geologmeta + + + + + +store_theory->geologmeta + + + + + +store_instance->geologmeta + + + + + +store_commit->geologmeta + + + + + +store_materialize->geologmeta + + + + + +query_relalg + +Relational Algebra IR +(query/to_relalg.rs) +(query/from_relalg.rs) + + + +query_compile->query_relalg + + + + + +query_optimize + +Optimizer +(query/optimize.rs) + + + +query_relalg->query_optimize + + + + + +query_chase->cc + + +equality +saturation + + + +query_chase->store + + + + + +tensor_check + +Axiom Checker +(tensor/check.rs) + + + +query_chase->tensor_check + + +axiom +checking + + + +query_backend + +Query Backend +(query/backend.rs) + + + +query_backend->store + + +execute + + + +query_optimize->query_backend + + + + + +solver->store + + + + + +solver_tree + +Search Tree +(solver/tree.rs) + + + +solver->solver_tree + + + + + +solver_tactics + +Tactics +(solver/tactics.rs) +Check, Forward, +Propagate, Auto + + + +solver_tree->solver_tactics + + + + + +solver_tactics->cc + + +propagate +equations + + + +solver_tactics->query_chase + + +forward +chaining + + + +solver_types + +Solver Types +(solver/types.rs) + + + +solver_tactics->solver_types + + + + + +solver_tactics->tensor_check + + +check +axioms + + + +tensor_expr + +Tensor Expressions +(tensor/expr.rs) + + + +tensor_builder + +Expression Builder +(tensor/builder.rs) + + + +tensor_expr->tensor_builder + + + + + +tensor_sparse + +Sparse Storage +(tensor/sparse.rs) +RoaringBitmap + + + +tensor_sparse->core + + +read +structure + + + +roaring + +roaring +(bitmaps) + + + +tensor_sparse->roaring + + + + + +tensor_builder->tensor_sparse + + + + + +tensor_compile + +Formula Compiler +(tensor/compile.rs) + + + +tensor_compile->tensor_expr + + + + + +tensor_check->tensor_compile + + + + + +legend_data + +Data Flow + + + +legend_dep + +Dependency + + + + +legend_key + +Key Component + + + + diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..e9884a7 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,255 @@ +# Geolog Architecture + +Geolog is a language for geometric logic with semantics in topoi. This document describes the module structure and data flow. + +## Module Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ repl.rs Interactive REPL with commands (:help, :inspect, etc.) │ +│ bin/geolog.rs CLI entry point │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ PARSING / SURFACE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ lexer.rs Tokenization (chumsky-based) │ +│ parser.rs Token stream → AST (chumsky-based) │ +│ ast.rs Surface syntax AST types │ +│ pretty.rs Core → geolog source (inverse of parsing) │ +│ error.rs Error formatting with source spans │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ ELABORATION LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ elaborate/ │ +│ ├── mod.rs Re-exports │ +│ ├── env.rs Elaboration environment (theory registry) │ +│ ├── theory.rs AST theory → Core theory elaboration │ +│ ├── instance.rs AST instance → Core structure elaboration │ +│ └── error.rs Elaboration error types │ +│ │ +│ Transforms surface AST into typed core representation │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ CORE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ core.rs Core IR: Signature, Term, Formula, Structure │ +│ - Signature: sorts + functions + relations │ +│ - Term: Var | App | Record | Project │ +│ - Formula: True | False | Eq | Rel | Conj | Disj | Exists │ +│ - Structure: carriers + function maps + relation storage │ +│ │ +│ id.rs Identity system (Luid = global, Slid = structure-local) │ +│ universe.rs Global element registry (Luid allocation) │ +│ naming.rs Bidirectional name ↔ Luid mapping │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ STORAGE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ store/ │ +│ ├── mod.rs Store struct: unified GeologMeta persistence │ +│ ├── schema.rs Schema ID caches (sort_ids, func_ids, etc.) │ +│ ├── append.rs Append-only element/function/relation creation │ +│ ├── theory.rs Theory → Store integration │ +│ ├── instance.rs Instance → Store integration │ +│ ├── commit.rs Git-like commit/version control │ +│ └── bootstrap_queries.rs Hardcoded query patterns (being replaced) │ +│ │ +│ workspace.rs Legacy session management (deprecated, use Store) │ +│ patch.rs Patch-based structure modifications │ +│ version.rs Git-like version control for structures │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ QUERY LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ query/ │ +│ ├── mod.rs Re-exports and overview │ +│ ├── chase.rs Chase algorithm for existential/equality conclusions │ +│ │ - chase_fixpoint_with_cc(): main entry point │ +│ │ - Integrates CongruenceClosure for equality saturation│ +│ ├── compile.rs Query → QueryOp plan compilation │ +│ ├── backend.rs Naive QueryOp executor (reference impl) │ +│ ├── optimize.rs Algebraic law rewriting (filter fusion, etc.) │ +│ ├── pattern.rs Legacy Pattern API (deprecated) │ +│ └── store_queries.rs Store-level compiled query methods │ +│ │ +│ Relational query engine for GeologMeta and instance queries. │ +│ Query API: Query::scan(sort).filter_eq(func, col, val).compile() │ +│ Optimizer applies RelAlgIR laws: Filter(p, Filter(q, x)) → Filter(p∧q, x) │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ SOLVING LAYER (frontier) │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ cc.rs Congruence closure (shared by solver + chase) │ +│ - Element equivalence tracking with union-find │ +│ - Used for equality conclusion axioms │ +│ │ +│ solver/ │ +│ ├── mod.rs Unified model enumeration API + re-exports │ +│ │ - enumerate_models(): core unified function │ +│ │ - solve(): find models from scratch │ +│ │ - query(): extend existing models │ +│ ├── types.rs SearchNode, Obligation, NodeStatus (re-exports cc::*) │ +│ ├── tree.rs Explicit search tree with from_base() for extensions │ +│ └── tactics.rs Automated search tactics: │ +│ - CheckTactic: axiom checking, obligation reporting │ +│ - ForwardChainingTactic: Datalog-style forward chaining │ +│ - PropagateEquationsTactic: congruence closure propagation│ +│ - AutoTactic: composite fixpoint solver │ +│ │ +│ REPL commands: `:solve `, `:extend ` │ +│ See examples/geolog/solver_demo.geolog for annotated examples. │ +│ │ +│ tensor/ │ +│ ├── mod.rs Re-exports │ +│ ├── expr.rs Lazy tensor expression trees │ +│ ├── sparse.rs Sparse tensor storage (RoaringTreemap) │ +│ ├── builder.rs Expression builders (conjunction, disjunction, exists) │ +│ ├── compile.rs Formula → TensorExpr compilation │ +│ └── check.rs Axiom checking via tensor evaluation │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ META LAYER (self-description) │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ meta.rs Rust codegen for GeologMeta theory │ +│ theories/GeologMeta.geolog Homoiconic theory representation │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Data Flow + +### Parsing / Pretty-Printing Flow +``` +Source text → lexer.rs → Token stream → parser.rs → ast::File + ↓ +core::Structure ← elaborate ←──────────────────────── ast::* + ↓ +pretty.rs → Source text (roundtrip!) +``` + +### Elaboration Flow +``` +ast::TheoryDecl → elaborate/theory.rs → core::Theory (Signature + Axioms) +ast::InstanceDecl → elaborate/instance.rs → core::Structure +``` + +### REPL Flow +``` +User input → ReplState::process_line → MetaCommand | GeologInput + ↓ + GeologInput → parse → elaborate → workspace.add_* +``` + +## Key Types + +### Identity System + +```rust +Luid // "Local Universe ID" - globally unique across all structures +Slid // "Structure-Local ID" - index within a single structure + +// A Structure maps Slid → Luid for global identity +structure.get_luid(slid) -> Luid +``` + +### Core Representation + +```rust +// Signatures define the vocabulary +Signature { + sorts: Vec, // Sort names by SortId + functions: Vec, // f : A → B + relations: Vec, // R : A → Prop +} + +// Structures interpret signatures +Structure { + carriers: Vec, // Elements per sort (as Slid) + functions: Vec>, // Function value maps + relations: Vec, // Relation extents + local_to_global: Vec, // Slid → Luid +} +``` + +### Axioms (Sequents) + +```rust +Sequent { + context: Context, // Universally quantified variables + premise: Formula, // Antecedent (conjunction of atomics) + conclusion: Formula, // Consequent (positive geometric formula) +} +``` + +## Design Principles + +1. **Postfix application**: `x f` not `f(x)` — matches categorical composition +2. **Child pointers**: Parent → Child, not Child → Parent (no products in domains) +3. **Upward binding**: Variables point to their binders (scoping is explicit) +4. **Sparse storage**: Relations use RoaringBitmap for efficient membership +5. **Patch-based updates**: Structures evolve via patches, enabling versioning +6. **Explicit search tree**: Solver maintains tree in memory, not call stack + +## Testing Strategy + +- **proptest**: Property-based tests for core operations (naming, patches, structure) +- **unit tests**: Specific behaviors in `tests/unit_*.rs` +- **integration tests**: Example .geolog files in `tests/examples_integration.rs` +- **REPL testing**: Interactive exploration via `cargo run` + +## Future Directions + +See `bd ready` for current work items. Key frontiers: + +- **Query engine** (`geolog-7tt`, `geolog-32x`): Chase algorithm and RelAlgIR compiler +- **Nested instance elaboration** (`geolog-1d4`): Inline instance definitions +- **Monotonic Submodel proofs** (`geolog-rgg`): Lean4 formalization +- **Disjunction variable alignment** (`geolog-69b`): Extend tensor builder for heterogeneous disjuncts + +## Recent Milestones + +- **Unified model enumeration API** (`2026-01-19`): Consolidated `solve()`, `extend()`, and `query()` + into single `enumerate_models()` function. REPL commands `:solve` and `:extend` now share underlying implementation. + +- **Tensor compiler improvements** (`2026-01-20`): + - Function application equalities: `f(x) = y`, `y = f(x)`, `f(x) = g(y)` now compile correctly + - Empty-domain existential fix: `∃x. φ` on empty domain correctly returns false + - Closed `geolog-dxr` (tensor compilation panics on function terms) + +- **Bootstrap query migration** (`2026-01-20`): All 6 bootstrap_queries functions now delegate + to compiled query engine (`store_queries.rs`). Net reduction of ~144 lines of handcoded iteration. + +- **Proptest coverage** (`2026-01-20`): Added 6 solver proptests covering trivial theories, + inconsistent theories, existential theories, and Horn clause propagation. + +- **Theory extends fix** (`2026-01-20`): Fixed bug where function names like `Func/dom` (using `/` + as naming convention) were incorrectly treated as grandparent-qualified names. RelAlgIR.geolog + now loads correctly, unblocking homoiconic query plan work (`geolog-32x`). + +- **:explain REPL command** (`2026-01-20`): Added `:explain ` to show query + execution plans, with Display impl for QueryOp using math notation (∫, δ, z⁻¹, ×, ∧, ∨). + +- **Geometric logic solver complete** (`geolog-xj2`): Forward chaining, equation propagation, + existential body processing, derivation search for False. Interactive via `:solve`. + +- **Chase with equality saturation** (`2026-01-21`): Chase algorithm now integrates congruence + closure (CC) for handling equality conclusion axioms like `R(x,y) |- x = y`. CC tracks + element equivalences and canonicalizes structures after chase converges. This enables + Category theory to terminate correctly: unit law axioms collapse infinite `id;id;...` + compositions. Added `src/cc.rs` as shared module for both solver and chase. + +- **Chase proptests** (`2026-01-21`): Added property-based tests for reflexivity, transitivity, + existential conclusions, and equality conclusions. Multi-session persistence tests verify + chase results survive REPL restart. + +- **Fuzzing infrastructure** (`2026-01-21`): Added `fuzz/` directory with `fuzz_parser` and + `fuzz_repl` targets for finding edge cases. Requires nightly Rust. diff --git a/docs/SYNTAX.md b/docs/SYNTAX.md new file mode 100644 index 0000000..4d6ff18 --- /dev/null +++ b/docs/SYNTAX.md @@ -0,0 +1,336 @@ +# Geolog Surface Syntax Reference + +This document describes the surface syntax of Geolog. For examples, see `examples/geolog/`. + +## Lexical Elements + +### Identifiers +``` +identifier := [a-zA-Z_][a-zA-Z0-9_]* +``` + +### Paths +Paths use `/` as a separator (not `.`), which allows `.` for field projection: +``` +path := identifier ('/' identifier)* +``` +Examples: `P`, `in/src`, `ax/refl` + +### Keywords +``` +namespace theory instance query +Sort Prop forall exists +``` + +### Operators and Punctuation +``` +: -> = |- \/ . , ; +{ } [ ] ( ) +``` + +## Declarations + +A Geolog file consists of declarations: + +``` +file := declaration* +declaration := namespace | theory | instance | query +``` + +### Namespace +``` +namespace identifier; +``` +Currently a no-op; reserved for future module system. + +### Theory + +```ebnf +theory := 'theory' params? identifier '{' theory_item* '}' +params := param_group+ +param_group := '(' param (',' param)* ')' +param := identifier ':' type_expr + +theory_item := sort_decl | function_decl | axiom_decl | field_decl +``` + +#### Sort Declaration +``` +identifier ':' 'Sort' ';' +``` +Example: `P : Sort;` + +#### Function Declaration +``` +path ':' type_expr '->' type_expr ';' +``` +Examples: +``` +src : E -> V; // Unary function +mul : [x: M, y: M] -> M; // Binary function (product domain) +``` + +#### Relation Declaration +Relations are functions to `Prop`: +``` +path ':' type_expr '->' 'Prop' ';' +``` +Example: +``` +leq : [x: X, y: X] -> Prop; // Binary relation +``` + +#### Axiom Declaration +``` +path ':' 'forall' quantified_vars '.' premises '|-' conclusion ';' + +quantified_vars := (var_group (',' var_group)*)? // May be empty! +var_group := identifier (',' identifier)* ':' type_expr +premises := formula (',' formula)* // May be empty +``` + +Examples: +``` +// No premises (Horn clause with empty body) +ax/refl : forall x : X. |- [x: x, y: x] leq; + +// With premises +ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; + +// Empty quantifier - unconditional axiom +// Useful for asserting existence without preconditions +ax/nonempty : forall . |- exists x : X.; +``` + +### Instance + +```ebnf +instance := 'instance' identifier ':' type_expr '=' instance_body +instance_body := '{' instance_item* '}' | 'chase' '{' instance_item* '}' + +instance_item := element_decl | equation | nested_instance +``` + +Using `= chase { ... }` runs the chase algorithm during elaboration, automatically deriving facts from axioms. + +The chase supports: +- **Existential conclusions**: Creates fresh elements for `∃` in axiom conclusions +- **Equality conclusions**: Uses congruence closure to track element equivalences +- **Fixpoint iteration**: Runs until no new facts can be derived + +Equality saturation enables termination for theories with unit laws (like Categories) that would otherwise loop forever. + +#### Element Declaration +``` +identifier ':' type_expr ';' +``` +Example: `A : V;` — declares element `A` of sort `V` + +#### Equation +``` +term '=' term ';' +``` +Example: `ab src = A;` — asserts that applying `src` to `ab` yields `A` + +#### Nested Instance (syntax parsed but not fully elaborated) +``` +identifier '=' '{' instance_item* '}' ';' +``` + +## Type Expressions + +```ebnf +type_expr := 'Sort' | 'Prop' | path | record_type | app_type | arrow_type | instance_type + +record_type := '[' (field (',' field)*)? ']' +field := identifier ':' type_expr // Named field + | type_expr // Positional: gets name "0", "1", etc. + +app_type := type_expr type_expr // Juxtaposition +arrow_type := type_expr '->' type_expr +instance_type := type_expr 'instance' +``` + +Examples: +``` +Sort // The universe of sorts +Prop // Propositions +V // A named sort +[x: M, y: M] // Product type with named fields +[M, M] // Product type with positional fields ("0", "1") +[M, on: M] // Mixed: first positional, second named +M -> M // Function type +PetriNet instance // Instance of a theory +N PetriNet instance // Parameterized: N is a PetriNet instance +``` + +## Terms + +```ebnf +term := path | record | paren_term | application | projection + +record := '[' (entry (',' entry)*)? ']' +entry := identifier ':' term // Named entry + | term // Positional: gets name "0", "1", etc. + +paren_term := '(' term ')' +application := term term // Postfix! 'x f' means 'f(x)' +projection := term '.' identifier // Record projection +``` + +**Important**: Geolog uses **postfix** function application. + +| Geolog | Traditional | +|--------|-------------| +| `x f` | `f(x)` | +| `[x: a, y: b] mul` | `mul(a, b)` | +| `x f g` | `g(f(x))` | + +This matches categorical composition: morphisms compose left-to-right. + +Examples: +``` +A // Variable/element reference +ab src // Apply src to ab +[x: a, y: b] mul // Apply mul to record (named fields) +[a, b] mul // Apply mul to record (positional) +[a, on: b] rel // Mixed: positional first, named second +x f g // Composition: g(f(x)) +r .field // Project field from record r +``` + +**Note on positional fields**: Positional fields are assigned names "0", "1", etc. +When matching against a relation defined with named fields (e.g., `rel : [x: M, y: M] -> Prop`), +positional fields are matched by position: "0" matches the first field, "1" the second, etc. +This allows mixing positional and named syntax: `[a, y: b] rel` is equivalent to `[x: a, y: b] rel`. + +## Formulas + +```ebnf +formula := atomic | exists | disjunction | paren_formula + +atomic := equality | relation_app +equality := term '=' term +relation_app := term identifier // 'x R' means R(x) + +exists := 'exists' quantified_vars '.' formulas? // Body may be empty (= True) +formulas := formula (',' formula)* +disjunction := formula ('\/' formula)+ +paren_formula := '(' formula ')' +``` + +**Conjunction** is implicit: premises in axioms separated by `,` form a conjunction. + +Examples: +``` +x = y // Equality +[x: a, y: b] leq // Relation application +exists z : X. [x: x, y: z] leq // Existential with condition +exists z : X. // Existential with empty body (= exists z. True) +phi \/ psi // Disjunction +``` + +## Comments + +Line comments start with `//`: +``` +// This is a comment +P : Sort; // Inline comment +``` + +## Complete Example + +```geolog +// Directed graph theory +theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; +} + +// A triangle: A → B → C → A +instance Triangle : Graph = { + A : V; + B : V; + C : V; + + ab : E; + ab src = A; + ab tgt = B; + + bc : E; + bc src = B; + bc tgt = C; + + ca : E; + ca src = C; + ca tgt = A; +} +``` + +## Grammar Summary (EBNF) + +```ebnf +file := declaration* + +declaration := 'namespace' ident ';' + | 'theory' params? ident '{' theory_item* '}' + | 'instance' ident ':' type '=' '{' instance_item* '}' + | 'query' ident ':' type '=' formula + +params := ('(' param (',' param)* ')')+ +param := ident ':' type + +theory_item := ident ':' 'Sort' ';' + | path ':' type '->' type ';' + | path ':' 'forall' qvars '.' formulas '|-' formula ';' + +qvars := (ident (',' ident)* ':' type) (',' ...)* +formulas := formula (',' formula)* + +instance_item := ident ':' type ';' + | term '=' term ';' + | ident '=' '{' instance_item* '}' ';' + +type := 'Sort' | 'Prop' | path | '[' fields ']' | type type | type '->' type | type 'instance' +fields := (ident ':' type) (',' ...)* + +term := path | '[' entries ']' | '(' term ')' | term term | term '.' ident +entries := (ident ':' term) (',' ...)* + +formula := term '=' term | term ident | 'exists' qvars '.' formula | formula '\/' formula | '(' formula ')' + +path := ident ('/' ident)* +ident := [a-zA-Z_][a-zA-Z0-9_]* +``` + +## Example Files + +The `examples/geolog/` directory contains working examples: + +| File | Description | +|------|-------------| +| `graph.geolog` | Simple directed graph theory with vertices and edges | +| `preorder.geolog` | Preorder (reflexive, transitive relation) with discrete/chain instances | +| `transitive_closure.geolog` | **Demonstrates chase algorithm** - computes reachability | +| `monoid.geolog` | Algebraic monoid theory with associativity axiom | +| `petri_net.geolog` | Petri net formalization with places, transitions, marking | +| `petri_net_showcase.geolog` | **Full showcase** - parameterized theories, nested instances, cross-references | +| `todo_list.geolog` | Task management example with dependencies | +| `solver_demo.geolog` | Solver demonstration with reachability queries | +| `relalg_simple.geolog` | Simple RelAlgIR query plan examples | + +### Running Examples + +```bash +# Start REPL with an example +cargo run -- examples/geolog/graph.geolog + +# Or load interactively +cargo run +:source examples/geolog/transitive_closure.geolog +:inspect Chain +:chase Chain # Computes transitive closure! +``` diff --git a/examples/elaborate.rs b/examples/elaborate.rs new file mode 100644 index 0000000..58e85a5 --- /dev/null +++ b/examples/elaborate.rs @@ -0,0 +1,168 @@ +use geolog::universe::Universe; +use geolog::{ + elaborate::{ElaborationContext, Env, elaborate_instance_ctx, elaborate_theory}, + parse, + repl::InstanceEntry, +}; +use std::collections::HashMap; +use std::rc::Rc; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} +"#; + + println!("=== PARSING ==="); + let file = match parse(input) { + Ok(f) => f, + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + }; + println!("Parsed {} declarations\n", file.declarations.len()); + + println!("=== ELABORATING ==="); + let mut env = Env::new(); + let mut universe = Universe::new(); + + for decl in &file.declarations { + match &decl.node { + geolog::Declaration::Namespace(name) => { + println!("Skipping namespace: {}", name); + } + geolog::Declaration::Theory(t) => { + print!("Elaborating theory {}... ", t.name); + match elaborate_theory(&mut env, t) { + Ok(elab) => { + println!("OK!"); + println!( + " Params: {:?}", + elab.params.iter().map(|p| &p.name).collect::>() + ); + println!(" Sorts: {:?}", elab.theory.signature.sorts); + println!( + " Functions: {:?}", + elab.theory + .signature + .functions + .iter() + .map(|f| &f.name) + .collect::>() + ); + println!(" Axioms: {}", elab.theory.axioms.len()); + for (i, ax) in elab.theory.axioms.iter().enumerate() { + println!( + " [{i}] {} vars, premise -> conclusion", + ax.context.vars.len() + ); + } + println!(); + + // Add to environment for dependent theories + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + Err(e) => { + println!("FAILED: {}", e); + } + } + } + geolog::Declaration::Instance(i) => { + // Extract theory name from the type expression + let theory_name = i.theory.as_single_path() + .and_then(|p| p.segments.first().cloned()) + .unwrap_or_else(|| "?".to_string()); + print!("Elaborating instance {}... ", i.name); + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + match elaborate_instance_ctx(&mut ctx, i) { + Ok(result) => { + let structure = &result.structure; + println!("OK!"); + println!(" Theory: {}", theory_name); + println!(" Elements: {} total", structure.len()); + for sort_id in 0..structure.carriers.len() { + println!( + " Sort {}: {} elements", + sort_id, + structure.carrier_size(sort_id) + ); + } + println!(" Functions defined:"); + for (fid, func_map) in structure.functions.iter().enumerate() { + println!(" Func {}: {} mappings", fid, func_map.len()); + } + println!(); + } + Err(e) => { + println!("FAILED: {}", e); + } + } + } + geolog::Declaration::Query(_) => { + println!("Skipping query (not implemented yet)"); + } + } + } + + println!("=== SUMMARY ==="); + println!("Elaborated {} theories", env.theories.len()); +} diff --git a/examples/full_petri.rs b/examples/full_petri.rs new file mode 100644 index 0000000..4cb1329 --- /dev/null +++ b/examples/full_petri.rs @@ -0,0 +1,132 @@ +use geolog::parse; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +// Reachability problem: can we get from A to B? +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} + +query findTrace { + ? : ExampleNet Trace instance; +} +"#; + + match parse(input) { + Ok(file) => { + println!("Parsed successfully!"); + println!("Declarations: {}", file.declarations.len()); + for decl in &file.declarations { + match &decl.node { + geolog::Declaration::Namespace(n) => println!(" - namespace {}", n), + geolog::Declaration::Theory(t) => { + println!(" - theory {} ({} items)", t.name, t.body.len()) + } + geolog::Declaration::Instance(i) => { + println!(" - instance {} ({} items)", i.name, i.body.len()) + } + geolog::Declaration::Query(q) => println!(" - query {}", q.name), + } + } + } + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + } +} diff --git a/examples/geolog/category.geolog b/examples/geolog/category.geolog new file mode 100644 index 0000000..f480a1f --- /dev/null +++ b/examples/geolog/category.geolog @@ -0,0 +1,86 @@ +// Category theory in current geolog syntax +// +// This is the "desugared" version of the aspirational syntax in +// loose_thoughts/2026-01-21_dependent_sorts_and_functional_relations.md + +theory Category { + ob : Sort; + mor : Sort; + + // Morphism source and target + src : mor -> ob; + tgt : mor -> ob; + + // Composition: comp(f, g, h) means "h = f ; g" (f then g) + // Domain constraint: f.tgt = g.src + comp : [f: mor, g: mor, h: mor] -> Prop; + + // Identity: id(a, f) means "f is the identity on a" + id : [a: ob, f: mor] -> Prop; + + // === Axioms === + + // Identity morphisms have matching source and target + ax/id_src : forall x : ob, i : mor. [a: x, f: i] id |- i src = x; + ax/id_tgt : forall x : ob, i : mor. [a: x, f: i] id |- i tgt = x; + + // Composition domain constraint + ax/comp_dom : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- p tgt = q src; + + // Composition source/target + ax/comp_src : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- r src = p src; + ax/comp_tgt : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- r tgt = q tgt; + + // Existence of identities (one per object) + ax/id_exists : forall x : ob. |- exists i : mor. [a: x, f: i] id; + + // Existence of composites (when composable) + ax/comp_exists : forall p : mor, q : mor. + p tgt = q src |- exists r : mor. [f: p, g: q, h: r] comp; + + // Left unit: id_a ; f = f + ax/unit_left : forall x : ob, i : mor, p : mor, r : mor. + [a: x, f: i] id, p src = x, [f: i, g: p, h: r] comp |- r = p; + + // Right unit: f ; id_b = f + ax/unit_right : forall y : ob, i : mor, p : mor, r : mor. + [a: y, f: i] id, p tgt = y, [f: p, g: i, h: r] comp |- r = p; + + // Associativity: (f ; g) ; h = f ; (g ; h) + ax/assoc : forall p : mor, q : mor, r : mor, pq : mor, qr : mor, pqr1 : mor, pqr2 : mor. + [f: p, g: q, h: pq] comp, [f: pq, g: r, h: pqr1] comp, + [f: q, g: r, h: qr] comp, [f: p, g: qr, h: pqr2] comp + |- pqr1 = pqr2; + + // Uniqueness of composition (functional) + ax/comp_unique : forall p : mor, q : mor, r1 : mor, r2 : mor. + [f: p, g: q, h: r1] comp, [f: p, g: q, h: r2] comp |- r1 = r2; + + // Uniqueness of identity (one per object) + ax/id_unique : forall x : ob, i1 : mor, i2 : mor. + [a: x, f: i1] id, [a: x, f: i2] id |- i1 = i2; +} + +// The "walking arrow" category: A --f--> B +// +// Now we can declare just objects and non-identity morphisms! +// The chase derives: +// - Identity morphisms for each object (via ax/id_exists) +// - Composition facts (via ax/comp_exists) +// - Source/target for compositions (via ax/comp_src, ax/comp_tgt) +// +// The equality saturation (via congruence closure) collapses: +// - id;id;id;... = id (via ax/unit_left and ax/unit_right) +// - Duplicate compositions (via ax/comp_unique) +// Without CC, the chase would loop forever creating id;id, id;id;id, ... +instance Arrow : Category = chase { + // Objects + A : ob; + B : ob; + + // Non-identity morphism + f : mor; f src = A; f tgt = B; +} diff --git a/examples/geolog/field_projection_chase_test.geolog b/examples/geolog/field_projection_chase_test.geolog new file mode 100644 index 0000000..a007482 --- /dev/null +++ b/examples/geolog/field_projection_chase_test.geolog @@ -0,0 +1,27 @@ +// Test: Field projection in chase + +theory FieldProjectionChaseTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Marker sort for elements whose x field matches a given a + XMatches : Sort; + XMatches/r : XMatches -> R; + XMatches/a : XMatches -> A; + + // Axiom: if r's x field equals a, create an XMatches + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- exists m : XMatches. m XMatches/r = r, m XMatches/a = a; +} + +instance Test : FieldProjectionChaseTest = chase { + a1 : A; + a2 : A; + b1 : B; + r1 : R; + r1 R/data = [x: a1, y: b1]; + r2 : R; + r2 R/data = [x: a2, y: b1]; +} diff --git a/examples/geolog/field_projection_test.geolog b/examples/geolog/field_projection_test.geolog new file mode 100644 index 0000000..deb7376 --- /dev/null +++ b/examples/geolog/field_projection_test.geolog @@ -0,0 +1,12 @@ +// Test: Field projection syntax + +theory FieldProjectionTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Axiom using field projection: r R/data .x + ax1 : forall r : R, a : A. r R/data .x = a |- true; +} diff --git a/examples/geolog/graph.geolog b/examples/geolog/graph.geolog new file mode 100644 index 0000000..69ba7f5 --- /dev/null +++ b/examples/geolog/graph.geolog @@ -0,0 +1,79 @@ +// Directed Graph: vertices and edges with source/target functions +// +// This is the canonical example of a "presheaf" - a functor from a small +// category (the "walking arrow" • → •) to Set. + +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} + +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + // Vertices + A : V; + B : V; + C : V; + + // Edges + ab : E; + bc : E; + ca : E; + + // Edge endpoints + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} + +// A self-loop: one vertex with an edge to itself +instance Loop : Graph = { + v : V; + e : E; + e src = v; + e tgt = v; +} + +// The "walking arrow": two vertices, one edge +instance Arrow : Graph = { + s : V; + t : V; + f : E; + f src = s; + f tgt = t; +} + +// A more complex graph: diamond shape with two paths from top to bottom +// +// top +// / \ +// left right +// \ / +// bottom +// +instance Diamond : Graph = { + top : V; + left : V; + right : V; + bottom : V; + + top_left : E; + top_right : E; + left_bottom : E; + right_bottom : E; + + top_left src = top; + top_left tgt = left; + top_right src = top; + top_right tgt = right; + left_bottom src = left; + left_bottom tgt = bottom; + right_bottom src = right; + right_bottom tgt = bottom; +} diff --git a/examples/geolog/iso_instance_test.geolog b/examples/geolog/iso_instance_test.geolog new file mode 100644 index 0000000..68653c8 --- /dev/null +++ b/examples/geolog/iso_instance_test.geolog @@ -0,0 +1,29 @@ +// Multi-parameter theory instantiation test + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +theory A { a : Sort; } +theory B { b : Sort; } + +instance As : A = { + a1 : a; + a2 : a; +} + +instance Bs : B = { + b1 : b; + b2 : b; +} + +// Can we create an Iso instance with sort parameters? +instance AB_Iso : As/a Bs/b Iso = { + a1 fwd = Bs/b1; + a2 fwd = Bs/b2; + b1 bwd = As/a1; + b2 bwd = As/a2; +} diff --git a/examples/geolog/iso_theory_test.geolog b/examples/geolog/iso_theory_test.geolog new file mode 100644 index 0000000..d17ad21 --- /dev/null +++ b/examples/geolog/iso_theory_test.geolog @@ -0,0 +1,9 @@ +// Multi-parameter theory test (Iso from vision) + +// First just try sorts as parameters +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + // Axioms would need chained function application... + // fb : forall x : X. |- x fwd bwd = x; +} diff --git a/examples/geolog/monoid.geolog b/examples/geolog/monoid.geolog new file mode 100644 index 0000000..596dd37 --- /dev/null +++ b/examples/geolog/monoid.geolog @@ -0,0 +1,78 @@ +// Monoid: a set with an associative binary operation and identity element +// +// This is the simplest algebraic structure with interesting axioms. +// Note: geolog uses postfix function application. + +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element: we use a unary function from M to M that + // "picks out" the identity (any x maps to e) + // A cleaner approach would use Unit → M but that needs product support. + id : M -> M; + + // Left identity: id(x) * y = y (id(x) is always e) + ax/left_id : forall x : M, y : M. + |- [x: x id, y: y] mul = y; + + // Right identity: x * id(y) = x + ax/right_id : forall x : M, y : M. + |- [x: x, y: y id] mul = x; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; + + // id is constant: id(x) = id(y) for all x, y + ax/id_const : forall x : M, y : M. + |- x id = y id; +} + +// Trivial monoid: single element, e * e = e +instance Trivial : Monoid = { + e : M; + + // Multiplication table: e * e = e + // Using positional syntax: [a, b] maps to [x: a, y: b] + [e, e] mul = e; + + // Identity: e is the identity element + e id = e; +} + +// Boolean "And" monoid: {T, F} with T as identity +// T and T = T, T and F = F, F and T = F, F and F = F +instance BoolAnd : Monoid = { + T : M; + F : M; + + // Identity: T is the identity element + T id = T; + F id = T; + + // Multiplication table for "and": + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} + +// Boolean "Or" monoid: {T, F} with F as identity +// T or T = T, T or F = T, F or T = T, F or F = F +instance BoolOr : Monoid = { + T : M; + F : M; + + // Identity: F is the identity element + T id = F; + F id = F; + + // Multiplication table for "or": + [x: T, y: T] mul = T; + [x: T, y: F] mul = T; + [x: F, y: T] mul = T; + [x: F, y: F] mul = F; +} diff --git a/examples/geolog/nested_instance_test.geolog b/examples/geolog/nested_instance_test.geolog new file mode 100644 index 0000000..2182887 --- /dev/null +++ b/examples/geolog/nested_instance_test.geolog @@ -0,0 +1,33 @@ +// Test: Nested instance declarations (following vision pattern) + +theory Place { + P : Sort; +} + +theory (Pl : Place instance) Token { + token : Sort; + token/of : token -> Pl/P; +} + +theory (Pl : Place instance) Problem { + initial_marking : Pl Token instance; + target_marking : Pl Token instance; +} + +// Create a place instance +instance MyPlaces : Place = { + p1 : P; + p2 : P; +} + +// Test nested instance declarations +instance TestProblem : MyPlaces Problem = { + initial_marking = { + t1 : token; + t1 token/of = MyPlaces/p1; + }; + target_marking = { + t2 : token; + t2 token/of = MyPlaces/p2; + }; +} diff --git a/examples/geolog/petri_net.geolog b/examples/geolog/petri_net.geolog new file mode 100644 index 0000000..8e4e235 --- /dev/null +++ b/examples/geolog/petri_net.geolog @@ -0,0 +1,135 @@ +// Petri Net: a bipartite graph between places and transitions +// +// Petri nets model concurrent systems. Places hold tokens, transitions +// fire when their input places have tokens, consuming inputs and +// producing outputs. +// +// This encoding uses explicit "arc" sorts for input/output connections, +// which is more faithful to the categorical semantics (a span). + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + In : Sort; // Input arcs (from place to transition) + Out : Sort; // Output arcs (from transition to place) + + // Input arc endpoints + in/place : In -> P; + in/trans : In -> T; + + // Output arc endpoints + out/trans : Out -> T; + out/place : Out -> P; +} + +// A simple producer-consumer net: +// +// (ready) --[produce]--> (buffer) --[consume]--> (done) +// +instance ProducerConsumer : PetriNet = { + // Places + ready : P; + buffer : P; + done : P; + + // Transitions + produce : T; + consume : T; + + // Input arcs + i1 : In; + i1 in/place = ready; + i1 in/trans = produce; + + i2 : In; + i2 in/place = buffer; + i2 in/trans = consume; + + // Output arcs + o1 : Out; + o1 out/trans = produce; + o1 out/place = buffer; + + o2 : Out; + o2 out/trans = consume; + o2 out/place = done; +} + +// Mutual exclusion: two processes competing for a shared resource +// +// (idle1) --[enter1]--> (crit1) --[exit1]--> (idle1) +// ^ | +// | (mutex) | +// | v +// (idle2) --[enter2]--> (crit2) --[exit2]--> (idle2) +// +instance MutualExclusion : PetriNet = { + // Places for process 1 + idle1 : P; + crit1 : P; + + // Places for process 2 + idle2 : P; + crit2 : P; + + // Shared mutex token + mutex : P; + + // Transitions + enter1 : T; + exit1 : T; + enter2 : T; + exit2 : T; + + // Process 1 enters: needs idle1 AND mutex + i_enter1_idle : In; + i_enter1_idle in/place = idle1; + i_enter1_idle in/trans = enter1; + + i_enter1_mutex : In; + i_enter1_mutex in/place = mutex; + i_enter1_mutex in/trans = enter1; + + o_enter1 : Out; + o_enter1 out/trans = enter1; + o_enter1 out/place = crit1; + + // Process 1 exits: releases mutex + i_exit1 : In; + i_exit1 in/place = crit1; + i_exit1 in/trans = exit1; + + o_exit1_idle : Out; + o_exit1_idle out/trans = exit1; + o_exit1_idle out/place = idle1; + + o_exit1_mutex : Out; + o_exit1_mutex out/trans = exit1; + o_exit1_mutex out/place = mutex; + + // Process 2 enters: needs idle2 AND mutex + i_enter2_idle : In; + i_enter2_idle in/place = idle2; + i_enter2_idle in/trans = enter2; + + i_enter2_mutex : In; + i_enter2_mutex in/place = mutex; + i_enter2_mutex in/trans = enter2; + + o_enter2 : Out; + o_enter2 out/trans = enter2; + o_enter2 out/place = crit2; + + // Process 2 exits: releases mutex + i_exit2 : In; + i_exit2 in/place = crit2; + i_exit2 in/trans = exit2; + + o_exit2_idle : Out; + o_exit2_idle out/trans = exit2; + o_exit2_idle out/place = idle2; + + o_exit2_mutex : Out; + o_exit2_mutex out/trans = exit2; + o_exit2_mutex out/place = mutex; +} diff --git a/examples/geolog/petri_net_full.geolog b/examples/geolog/petri_net_full.geolog new file mode 100644 index 0000000..d4e3fe7 --- /dev/null +++ b/examples/geolog/petri_net_full.geolog @@ -0,0 +1,195 @@ +// Full Petri Net Reachability - Type-Theoretic Encoding +// +// This demonstrates the complete type-theoretic encoding of Petri net +// reachability from the original geolog design vision. +// +// Original design: loose_thoughts/2025-12-12_12:10_VanillaPetriNetRechability.md +// +// Key concepts: +// - PetriNet: places, transitions, input/output arcs (with proper arc semantics) +// - Marking: tokens in a net (parameterized by net) +// - ReachabilityProblem: initial and target markings (nested instances) +// - Trace: sequence of firings with wires connecting arcs +// - Iso: isomorphism between two sorts (used for bijections) +// - Solution: a trace with isomorphisms to markings +// +// This encoding is more type-theoretically precise than the simple +// PlaceReachability: it tracks individual tokens and arc multiplicities, +// enabling correct handling of "multi-token" transitions. + +// ============================================================ +// THEORY: PetriNet +// Basic structure: places, transitions, and arcs +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + // Each arc knows which place/transition it connects + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Defines initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace is a sequence of transition firings with "wires" +// connecting input and output arcs +// ============================================================ + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output of one firing to input of another + W : Sort; + W/src : W -> [firing : F, arc : N/out]; // Wire source (firing, output arc) + W/tgt : W -> [firing : F, arc : N/in]; // Wire target (firing, input arc) + + // Wire coherence: output arc must belong to source firing's transition + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + // Wire coherence: input arc must belong to target firing's transition + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + + // Wire uniqueness: each (firing, out-arc) pair has at most one wire + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + // Wire uniqueness: each (firing, in-arc) pair has at most one wire + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + // Terminals: for initial marking tokens (input) and final marking tokens (output) + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Every output arc of every firing must be wired OR be an output terminal + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + + // Every input arc of every firing must be wired OR be an input terminal + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// An isomorphism between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A solution to a reachability problem +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + // The witnessing trace + trace : N Trace instance; + + // Bijection between input terminals and initial marking tokens + initial_marking_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection between output terminals and target marking tokens + target_marking_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + // Initial marking commutes: token placement matches terminal placement + initial_marking_P_comm : forall i : trace/input_terminal. + |- i trace/input_terminal/of = i initial_marking_iso/fwd RP/initial_marking/token/of; + + // Target marking commutes: token placement matches terminal placement + target_marking_P_comm : forall o : trace/output_terminal. + |- o trace/output_terminal/of = o target_marking_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet - a small Petri net +// +// (A) --[ab]--> (B) --[bc]--> (C) +// ^ | +// +---[ba]------+ +// ============================================================ + +instance ExampleNet : PetriNet = { + // Places + A : P; + B : P; + C : P; + + // Transitions + ab : T; + ba : T; + bc : T; + + // A -> B (via ab) + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + + // B -> C (via bc) + bc_in : in; + bc_in in/src = B; + bc_in in/tgt = bc; + bc_out : out; + bc_out out/src = bc; + bc_out out/tgt = C; +} + +// ============================================================ +// Example queries (once query solving is implemented): +// +// query can_reach_B_from_A { +// ? : ExampleNet problem0 Solution instance; +// } +// +// where problem0 : ExampleNet ReachabilityProblem = { +// initial_marking = { tok : token; tok token/of = ExampleNet/A; }; +// target_marking = { tok : token; tok token/of = ExampleNet/B; }; +// } +// ============================================================ diff --git a/examples/geolog/petri_net_showcase.geolog b/examples/geolog/petri_net_showcase.geolog new file mode 100644 index 0000000..ece5de2 --- /dev/null +++ b/examples/geolog/petri_net_showcase.geolog @@ -0,0 +1,345 @@ +// Petri Net Reachability - Full Type-Theoretic Encoding +// +// This showcase demonstrates geolog's core capabilities through a +// non-trivial domain: encoding Petri net reachability as dependent types. +// +// A solution to a reachability problem is NOT a yes/no boolean but a +// CONSTRUCTIVE WITNESS: a diagrammatic proof that tokens can flow from +// initial to target markings via a sequence of transition firings. +// +// Key concepts demonstrated: +// - Parameterized theories (Marking depends on PetriNet instance) +// - Nested instance types (ReachabilityProblem contains Marking instances) +// - Sort-parameterized theories (Iso takes two sorts as parameters) +// - Cross-instance references (solution's trace elements reference problem's tokens) +// +// Original design: loose_thoughts/2025-12-12_12:10_VanillaPetriNetRechability.md + +// ============================================================ +// THEORY: PetriNet +// Places, transitions, and arcs with proper arc semantics +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace records transition firings and token flow via wires +// ============================================================ +// +// A trace is a diagrammatic proof of reachability: +// - Firings represent transition occurrences +// - Wires connect output arcs of firings to input arcs of other firings +// - Terminals connect to the initial/target markings +// +// The completeness axiom (ax/must_be_fed) ensures every input arc +// of every firing is accounted for - either wired from another firing +// or fed by an input terminal. + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence: source arc must belong to source firing's transition + ax/wire_src_coherent : forall w : W. + |- w W/src_arc N/out/src = w W/src_firing F/of; + + // Wire coherence: target arc must belong to target firing's transition + ax/wire_tgt_coherent : forall w : W. + |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + + // Wire place coherence: wire connects matching places + ax/wire_place_coherent : forall w : W. + |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + + // Terminals connect to specific firings and arcs + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: every input arc must be fed by a wire or input terminal + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured by a wire or output terminal + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// Isomorphism (bijection) between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A constructive witness that target is reachable from initial +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + // Bijection: input terminals <-> initial marking tokens + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection: output terminals <-> target marking tokens + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + // Commutativity axioms (currently unchecked): + // ax/init_comm : forall i : trace/input_terminal. + // |- i trace/input_terminal/of = i initial_iso/fwd RP/initial_marking/token/of; + // ax/target_comm : forall o : trace/output_terminal. + // |- o trace/output_terminal/of = o target_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet +// +// A Petri net with places A, B, C and transitions: +// ab: consumes 1 token from A, produces 1 token in B +// ba: consumes 1 token from B, produces 1 token in A +// abc: consumes 1 token from A AND 1 from B, produces 1 token in C +// +// +---[ba]----+ +// v | +// (A) --[ab]->(B) --+ +// | | +// +----[abc]-------+--> (C) +// +// The abc transition is interesting: it requires BOTH an A-token +// and a B-token to fire, producing a C-token. +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + // A -> B (via ab) + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + // A + B -> C (via abc) - note: two input arcs! + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// ============================================================ +// PROBLEM 0: Can we reach B from A with one token? +// Initial: 1 token in A +// Target: 1 token in B +// ============================================================ + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// SOLUTION 0: Yes! Fire transition 'ab' once. +// +// This Solution instance is a CONSTRUCTIVE PROOF: +// - The trace contains one firing (f1) of transition 'ab' +// - The input terminal feeds the A-token into f1's input arc +// - The output terminal captures f1's B-token output +// - The isomorphisms prove the token counts match exactly +// ============================================================ + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + // One firing of transition 'ab' + f1 : F; + f1 F/of = ExampleNet/ab; + + // Input terminal: feeds the initial A-token into f1 + it : input_terminal; + it input_terminal/of = ExampleNet/A; + it input_terminal/tgt_firing = f1; + it input_terminal/tgt_arc = ExampleNet/ab_in; + + // Output terminal: captures f1's B-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + ot output_terminal/src_firing = f1; + ot output_terminal/src_arc = ExampleNet/ab_out; + }; + + initial_iso = { + trace/it fwd = problem0/initial_marking/tok; + problem0/initial_marking/tok bwd = trace/it; + }; + + target_iso = { + trace/ot fwd = problem0/target_marking/tok; + problem0/target_marking/tok bwd = trace/ot; + }; +} + +// ============================================================ +// PROBLEM 2: Can we reach C from two A-tokens? +// Initial: 2 tokens in A +// Target: 1 token in C +// +// This is interesting because the only path to C is via 'abc', +// which requires tokens in BOTH A and B simultaneously. +// ============================================================ + +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// SOLUTION 2: Yes! Fire 'ab' then 'abc'. +// +// Token flow diagram: +// +// [it1]--A-->[f1: ab]--B--wire-->[f2: abc]--C-->[ot] +// [it2]--A-----------------^ +// +// Step 1: Fire 'ab' to move one token A -> B +// - it1 feeds A-token into f1 via ab_in +// - f1 produces B-token via ab_out +// Step 2: Fire 'abc' consuming one A-token and one B-token +// - it2 feeds A-token into f2 via abc_in1 +// - Wire connects f1's ab_out to f2's abc_in2 (the B-input) +// - f2 produces C-token via abc_out +// ============================================================ + +instance solution2 : ExampleNet problem2 Solution = { + trace = { + // Two firings + f1 : F; f1 F/of = ExampleNet/ab; // First: A -> B + f2 : F; f2 F/of = ExampleNet/abc; // Second: A + B -> C + + // Wire connecting f1's B-output to f2's B-input + // This is the crucial connection that makes the trace valid! + w1 : W; + w1 W/src_firing = f1; + w1 W/src_arc = ExampleNet/ab_out; + w1 W/tgt_firing = f2; + w1 W/tgt_arc = ExampleNet/abc_in2; + + // Input terminal 1: feeds first A-token into f1 + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + // Input terminal 2: feeds second A-token into f2 + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal: captures f2's C-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + }; + + // Bijection: 2 input terminals <-> 2 initial tokens + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + // Bijection: 1 output terminal <-> 1 target token + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} diff --git a/examples/geolog/petri_net_solution.geolog b/examples/geolog/petri_net_solution.geolog new file mode 100644 index 0000000..4f3048f --- /dev/null +++ b/examples/geolog/petri_net_solution.geolog @@ -0,0 +1,188 @@ +// Full Petri Net Reachability with Synthesized Solution +// +// This file contains the complete type-theoretic encoding of Petri net +// reachability, plus a manually synthesized solution proving that place B +// is reachable from place A in the example net. +// +// ============================================================ +// This instance was synthesized automatically by Claude Opus 4.5. +// As was this entire file, and this entire project, really. +// ============================================================ + +// ============================================================ +// THEORY: PetriNet - Basic structure with arc semantics +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking - Tokens parameterized by a net +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; // Which place each token is in +} + +// ============================================================ +// THEORY: ReachabilityProblem - Initial and target markings +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace - A sequence of transition firings with wires +// ============================================================ +// +// Simplified version for now - full version with product types commented out below. + +theory (N : PetriNet instance) Trace { + F : Sort; // Firings + F/of : F -> N/T; // Which transition each fires + + // Terminals for initial/final marking tokens + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; +} + +// Full Trace theory with wires and product types (not yet fully supported): +// +// theory (N : PetriNet instance) Trace { +// F : Sort; // Firings +// F/of : F -> N/T; // Which transition each fires +// +// W : Sort; // Wires connecting firings +// W/src : W -> [firing : F, arc : N/out]; // Wire source +// W/tgt : W -> [firing : F, arc : N/in]; // Wire target +// +// // Wire coherence axioms +// ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; +// ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; +// +// // Wire uniqueness +// ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; +// ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; +// +// // Terminals for initial/final marking tokens +// input_terminal : Sort; +// output_terminal : Sort; +// input_terminal/of : input_terminal -> N/P; +// output_terminal/of : output_terminal -> N/P; +// input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; +// output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +// +// // Coverage axioms +// ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- +// (exists w : W. w W/src = [firing: f, arc: arc]) \/ +// (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); +// ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- +// (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ +// (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +// } + +// ============================================================ +// THEORY: Iso - Isomorphism between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution - A complete reachability witness +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + ax/init_comm : forall i : trace/input_terminal. + |- i trace/input_terminal/of = i initial_iso/fwd RP/initial_marking/token/of; + ax/target_comm : forall o : trace/output_terminal. + |- o trace/output_terminal/of = o target_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet - A small Petri net +// +// (A) --[ab]--> (B) --[bc]--> (C) +// ^ | +// +---[ba]------+ +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; bc : T; + + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + bc_in : in; bc_in in/src = B; bc_in in/tgt = bc; + bc_out : out; bc_out out/src = bc; bc_out out/tgt = C; +} + +// ============================================================ +// INSTANCE: problem0 - Can we reach B from A? +// ============================================================ + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// INSTANCE: solution0 - YES! Here's the proof. +// ============================================================ +// This instance was synthesized automatically by Claude Opus 4.5. +// ============================================================ + +// The solution proves that place B is reachable from place A by firing +// transition ab. This creates a trace with one firing and the necessary +// input/output terminal mappings. + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + f1 : F; + f1 F/of = ExampleNet/ab; + + it : input_terminal; + it input_terminal/of = ExampleNet/A; + + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + }; + + // NOTE: Cross-instance references (e.g., trace/it in initial_iso) + // are not yet fully supported. The iso instances would map: + // - trace/it <-> problem0/initial_marking/tok + // - trace/ot <-> problem0/target_marking/tok +} diff --git a/examples/geolog/petri_reachability.geolog b/examples/geolog/petri_reachability.geolog new file mode 100644 index 0000000..fb5a453 --- /dev/null +++ b/examples/geolog/petri_reachability.geolog @@ -0,0 +1,164 @@ +// Petri Net Reachability - Full Example +// +// This demonstrates the core ideas from the original geolog design document: +// modeling Petri net reachability using geometric logic with the chase algorithm. +// +// Original design: loose_thoughts/2025-12-12_12:10.md +// +// Key concepts: +// - PetriNet: places, transitions, input/output arcs +// - Marking: assignment of tokens to places (parameterized theory) +// - Trace: sequence of transition firings connecting markings +// - Reachability: computed via chase algorithm + +// ============================================================ +// THEORY: PetriNet +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + In : Sort; // Input arcs (place -> transition) + Out : Sort; // Output arcs (transition -> place) + + // Arc structure + in/place : In -> P; + in/trans : In -> T; + out/trans : Out -> T; + out/place : Out -> P; +} + +// ============================================================ +// THEORY: Marking (parameterized) +// A marking assigns tokens to places in a specific net +// ============================================================ + +theory (N : PetriNet instance) Marking { + Token : Sort; + of : Token -> N/P; +} + +// ============================================================ +// THEORY: PlaceReachability +// Simplified reachability at the place level +// ============================================================ + +theory PlaceReachability { + P : Sort; + T : Sort; + + // Which transition connects which places + // Fires(t, from, to) means transition t can move a token from 'from' to 'to' + Fires : [trans: T, from: P, to: P] -> Prop; + + // Reachability relation (transitive closure) + CanReach : [from: P, to: P] -> Prop; + + // Reflexivity: every place can reach itself + ax/refl : forall p : P. + |- [from: p, to: p] CanReach; + + // Transition firing creates reachability + ax/fire : forall t : T, x : P, y : P. + [trans: t, from: x, to: y] Fires |- [from: x, to: y] CanReach; + + // Transitivity: reachability composes + ax/trans : forall x : P, y : P, z : P. + [from: x, to: y] CanReach, [from: y, to: z] CanReach |- [from: x, to: z] CanReach; +} + +// ============================================================ +// INSTANCE: SimpleNet +// A -> B -> C with bidirectional A <-> B +// +// (A) <--[ba]-- (B) --[bc]--> (C) +// | ^ +// +---[ab]------+ +// ============================================================ + +// Uses chase to derive CanReach from axioms (reflexivity, fire, transitivity) +instance SimpleNet : PlaceReachability = chase { + // Places + A : P; + B : P; + C : P; + + // Transitions + ab : T; // A -> B + ba : T; // B -> A + bc : T; // B -> C + + // Firing relations + [trans: ab, from: A, to: B] Fires; + [trans: ba, from: B, to: A] Fires; + [trans: bc, from: B, to: C] Fires; +} + +// ============================================================ +// INSTANCE: MutexNet +// Two processes competing for a mutex +// +// idle1 --[enter1]--> crit1 --[exit1]--> idle1 +// ^ | +// | mutex | +// | v +// idle2 --[enter2]--> crit2 --[exit2]--> idle2 +// ============================================================ + +// Uses chase to derive reachability relation +instance MutexNet : PlaceReachability = chase { + // Places + idle1 : P; + crit1 : P; + idle2 : P; + crit2 : P; + mutex : P; + + // Transitions + enter1 : T; + exit1 : T; + enter2 : T; + exit2 : T; + + // Process 1 acquires mutex: idle1 + mutex -> crit1 + // (simplified: we track place-level, not token-level) + [trans: enter1, from: idle1, to: crit1] Fires; + [trans: enter1, from: mutex, to: crit1] Fires; + + // Process 1 releases mutex: crit1 -> idle1 + mutex + [trans: exit1, from: crit1, to: idle1] Fires; + [trans: exit1, from: crit1, to: mutex] Fires; + + // Process 2 acquires mutex: idle2 + mutex -> crit2 + [trans: enter2, from: idle2, to: crit2] Fires; + [trans: enter2, from: mutex, to: crit2] Fires; + + // Process 2 releases mutex: crit2 -> idle2 + mutex + [trans: exit2, from: crit2, to: idle2] Fires; + [trans: exit2, from: crit2, to: mutex] Fires; +} + +// ============================================================ +// INSTANCE: ProducerConsumerNet +// Producer creates items, consumer processes them +// +// ready --[produce]--> buffer --[consume]--> done +// ============================================================ + +// Uses chase to derive reachability relation +instance ProducerConsumerNet : PlaceReachability = chase { + // Places + ready : P; + buffer : P; + done : P; + + // Transitions + produce : T; + consume : T; + + // Produce: ready -> buffer + [trans: produce, from: ready, to: buffer] Fires; + + // Consume: buffer -> done + [trans: consume, from: buffer, to: done] Fires; +} diff --git a/examples/geolog/petri_reachability_full_vision.geolog b/examples/geolog/petri_reachability_full_vision.geolog new file mode 100644 index 0000000..a3155f5 --- /dev/null +++ b/examples/geolog/petri_reachability_full_vision.geolog @@ -0,0 +1,72 @@ +// Full Petri Net Reachability Vision Test +// From 2025-12-12_12:10_VanillaPetriNetRechability.md + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// Simplified Trace theory without disjunctions for now +theory (N : PetriNet instance) SimpleTrace { + F : Sort; + F/of : F -> N/T; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Simplified ax5: every firing+arc gets an output terminal + ax5 : forall f : F, arc : N/out. |- exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]; + + // Simplified ax6: every firing+arc gets an input terminal + ax6 : forall f : F, arc : N/in. |- exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + ab : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; +} + +// Test nested instance elaboration +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// Test chase with SimpleTrace +instance trace0 : ExampleNet SimpleTrace = chase { + f1 : F; + f1 F/of = ExampleNet/ab; +} diff --git a/examples/geolog/petri_reachability_vision.geolog b/examples/geolog/petri_reachability_vision.geolog new file mode 100644 index 0000000..20ef578 --- /dev/null +++ b/examples/geolog/petri_reachability_vision.geolog @@ -0,0 +1,94 @@ +// Petri Net Reachability Vision Test +// Based on 2025-12-12 design document + +// Basic Petri net structure +theory PetriNet { + // Places + P : Sort; + + // Transitions + T : Sort; + + // Arcs (input to transitions, output from transitions) + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// A marking is a multiset of tokens, each at a place +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// A reachability problem is: can we get from initial marking to target? +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// A trace is a sequence of firings connected by wires +theory (N : PetriNet instance) Trace { + // Firings of transitions + F : Sort; + F/of : F -> N/T; + + // Wires connect firing outputs to firing inputs + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + // Terminals are unconnected arc endpoints (to/from the initial/target markings) + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +} + +// Example Petri net: A <--ab/ba--> B, (A,B) --abc--> C +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +// Reachability problem: Can we reach B from A? +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} diff --git a/examples/geolog/petri_trace_axioms.geolog b/examples/geolog/petri_trace_axioms.geolog new file mode 100644 index 0000000..1295e0c --- /dev/null +++ b/examples/geolog/petri_trace_axioms.geolog @@ -0,0 +1,66 @@ +// Test Trace theory with axioms using product codomains + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// Trace theory with axioms +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Axiom: wires are injective on source + // forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + // (Commented out - requires product codomain equality in premises) + + // Axiom: every arc endpoint must be wired or terminated + // forall f : F, arc : N/out. arc N/out/src = f F/of |- + // (exists w : W. w W/src = [firing: f, arc: arc]) \/ + // (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + // (Commented out - requires product codomain values in conclusions) +} + +// Simple net for testing +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Test that the basic theory without axioms still works +instance SimpleTrace : SimpleNet Trace = { + f1 : F; + f1 F/of = SimpleNet/t; + + it : input_terminal; + it input_terminal/of = SimpleNet/A; + it input_terminal/tgt = [firing: f1, arc: SimpleNet/arc_in]; + + ot : output_terminal; + ot output_terminal/of = SimpleNet/B; + ot output_terminal/src = [firing: f1, arc: SimpleNet/arc_out]; +} diff --git a/examples/geolog/petri_trace_coverage_test.geolog b/examples/geolog/petri_trace_coverage_test.geolog new file mode 100644 index 0000000..dd3c2f2 --- /dev/null +++ b/examples/geolog/petri_trace_coverage_test.geolog @@ -0,0 +1,36 @@ +// Test: Trace coverage axiom (simplified) + +theory PetriNet { + P : Sort; + T : Sort; + out : Sort; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + output_terminal : Sort; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Simplified ax5: for every arc and firing, if the arc's source is the firing's transition, + // create an output terminal + ax5 : forall f : F, arc : N/out. |- exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]; +} + +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Trace with just a firing - chase should create a terminal +instance TestTrace : SimpleNet Trace = chase { + f1 : F; + f1 F/of = SimpleNet/t; +} diff --git a/examples/geolog/petri_trace_full_vision.geolog b/examples/geolog/petri_trace_full_vision.geolog new file mode 100644 index 0000000..687cac8 --- /dev/null +++ b/examples/geolog/petri_trace_full_vision.geolog @@ -0,0 +1,57 @@ +// Trace theory with wires and disjunctions +// Testing the full vision from 2025-12-12 + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Every out arc of every firing: either wired or terminal + ax5 : forall f : F, arc : N/out. |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + + // Every in arc of every firing: either wired or terminal + ax6 : forall f : F, arc : N/in. |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Chase should create both wires AND terminals (naive chase adds all disjuncts) +instance trace_test : SimpleNet Trace = chase { + f1 : F; + f1 F/of = SimpleNet/t; +} diff --git a/examples/geolog/petri_trace_test.geolog b/examples/geolog/petri_trace_test.geolog new file mode 100644 index 0000000..0a3a674 --- /dev/null +++ b/examples/geolog/petri_trace_test.geolog @@ -0,0 +1,58 @@ +// Test that Trace theory with product codomains works + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// Simple Petri net: A --t--> B +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Trace theory with product codomains for wire endpoints +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +} + +// A simple trace: one firing of t, with input/output terminals +instance SimpleTrace : SimpleNet Trace = { + f1 : F; + f1 F/of = SimpleNet/t; + + // Input terminal (token comes from external marking) + it : input_terminal; + it input_terminal/of = SimpleNet/A; + it input_terminal/tgt = [firing: f1, arc: SimpleNet/arc_in]; + + // Output terminal (token goes to external marking) + ot : output_terminal; + ot output_terminal/of = SimpleNet/B; + ot output_terminal/src = [firing: f1, arc: SimpleNet/arc_out]; +} diff --git a/examples/geolog/preorder.geolog b/examples/geolog/preorder.geolog new file mode 100644 index 0000000..47983a0 --- /dev/null +++ b/examples/geolog/preorder.geolog @@ -0,0 +1,42 @@ +// Preorder: a set with a reflexive, transitive relation +// +// This demonstrates RELATIONS (predicates) as opposed to functions. +// A relation R : A -> Prop is a predicate on A. +// For binary relations, we use a product domain: R : [x: A, y: A] -> Prop + +theory Preorder { + X : Sort; + + // The ordering relation: x ≤ y + leq : [x: X, y: X] -> Prop; + + // Reflexivity: x ≤ x + ax/refl : forall x : X. + |- [x: x, y: x] leq; + + // Transitivity: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; +} + +// The discrete preorder: only reflexive pairs +// (no elements are comparable except to themselves) +// Uses `chase` to automatically derive reflexive pairs from axiom ax/refl. +instance Discrete3 : Preorder = chase { + a : X; + b : X; + c : X; +} + +// A total order on 3 elements: a ≤ b ≤ c +// Uses `chase` to derive reflexive and transitive closure. +instance Chain3 : Preorder = chase { + bot : X; + mid : X; + top : X; + + // Assert the basic ordering; chase will add reflexive pairs + // and transitive closure (bot ≤ top) + [x: bot, y: mid] leq; + [x: mid, y: top] leq; +} diff --git a/examples/geolog/product_codomain_equality_test.geolog b/examples/geolog/product_codomain_equality_test.geolog new file mode 100644 index 0000000..9a86219 --- /dev/null +++ b/examples/geolog/product_codomain_equality_test.geolog @@ -0,0 +1,23 @@ +// Test: Product codomain equality in premise (ax3 pattern) + +theory ProductCodomainEqTest { + A : Sort; + B : Sort; + + W : Sort; + W/src : W -> [x: A, y: B]; + + // ax3 pattern: forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2 + // This should make W injective on src + ax_inj : forall w1 : W, w2 : W. w1 W/src = w2 W/src |- w1 = w2; +} + +// Instance with two wires that have the same src - should be identified by chase +instance Test : ProductCodomainEqTest = { + a1 : A; + b1 : B; + w1 : W; + w1 W/src = [x: a1, y: b1]; + w2 : W; + w2 W/src = [x: a1, y: b1]; +} diff --git a/examples/geolog/product_codomain_test.geolog b/examples/geolog/product_codomain_test.geolog new file mode 100644 index 0000000..b804354 --- /dev/null +++ b/examples/geolog/product_codomain_test.geolog @@ -0,0 +1,51 @@ +// Test: Product Codomain Support +// +// This tests the new feature where functions can have product codomains, +// allowing record literal assignments like: +// elem func = [field1: v1, field2: v2]; + +theory ProductCodomainTest { + A : Sort; + B : Sort; + C : Sort; + + // Function with product codomain: maps A elements to (B, C) pairs + pair_of : A -> [left: B, right: C]; +} + +instance TestInstance : ProductCodomainTest = { + // Elements + a1 : A; + b1 : B; + b2 : B; + c1 : C; + + // Assign product codomain value using record literal + a1 pair_of = [left: b1, right: c1]; +} + +// A more realistic example: Edges in a graph +theory DirectedGraph { + V : Sort; + E : Sort; + + // Edge endpoints as a product codomain + endpoints : E -> [src: V, tgt: V]; +} + +instance TriangleGraph : DirectedGraph = { + // Vertices + v0 : V; + v1 : V; + v2 : V; + + // Edges + e01 : E; + e12 : E; + e20 : E; + + // Assign edge endpoints using record literals + e01 endpoints = [src: v0, tgt: v1]; + e12 endpoints = [src: v1, tgt: v2]; + e20 endpoints = [src: v2, tgt: v0]; +} diff --git a/examples/geolog/record_existential_test.geolog b/examples/geolog/record_existential_test.geolog new file mode 100644 index 0000000..f1d2fbe --- /dev/null +++ b/examples/geolog/record_existential_test.geolog @@ -0,0 +1,18 @@ +// Test: Record literals in existential conclusions + +theory RecordExistentialTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Axiom: given any a:A and b:B, there exists an R with that data + ax1 : forall a : A, b : B. |- + exists r : R. r R/data = [x: a, y: b]; +} + +instance Test : RecordExistentialTest = chase { + a1 : A; + b1 : B; +} diff --git a/examples/geolog/record_in_axiom_test.geolog b/examples/geolog/record_in_axiom_test.geolog new file mode 100644 index 0000000..559139d --- /dev/null +++ b/examples/geolog/record_in_axiom_test.geolog @@ -0,0 +1,12 @@ +// Test: Record literals in axioms + +theory RecordAxiomTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Test axiom with record literal RHS + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- true; +} diff --git a/examples/geolog/record_premise_chase_test.geolog b/examples/geolog/record_premise_chase_test.geolog new file mode 100644 index 0000000..5f67156 --- /dev/null +++ b/examples/geolog/record_premise_chase_test.geolog @@ -0,0 +1,23 @@ +// Test: Chase with record literals in premises + +theory RecordPremiseTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Derived sort for processed items + Processed : Sort; + Processed/r : Processed -> R; + + // Axiom: given r with data [x: a, y: b], create a Processed for it + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- exists p : Processed. p Processed/r = r; +} + +instance Test : RecordPremiseTest = { + a1 : A; + b1 : B; + r1 : R; + r1 R/data = [x: a1, y: b1]; +} diff --git a/examples/geolog/relalg_simple.geolog b/examples/geolog/relalg_simple.geolog new file mode 100644 index 0000000..39aebff --- /dev/null +++ b/examples/geolog/relalg_simple.geolog @@ -0,0 +1,130 @@ +// Example: RelAlgIR query plan instances +// +// This demonstrates creating query plans as RelAlgIR instances. +// These show the string diagram representation of relational algebra. +// +// First we need to load both GeologMeta (for Srt, Func, etc.) and RelAlgIR. +// This file just defines instances; load theories first in the REPL: +// :load theories/GeologMeta.geolog +// :load theories/RelAlgIR.geolog +// :load examples/geolog/relalg_simple.geolog +// +// Note: RelAlgIR extends GeologMeta, so a RelAlgIR instance contains +// elements from both GeologMeta sorts (Srt, Func, Elem) and RelAlgIR +// sorts (Wire, Schema, ScanOp, etc.) + +// ============================================================ +// Example 1: Simple Scan +// ============================================================ +// Query: "scan all elements of sort V" +// Plan: () --[ScanOp]--> Wire + +instance ScanV : RelAlgIR = chase { + // -- Schema (target theory) -- + target_theory : GeologMeta/Theory; + target_theory GeologMeta/Theory/parent = target_theory; + + v_srt : GeologMeta/Srt; + v_srt GeologMeta/Srt/theory = target_theory; + + // -- Query Plan -- + v_base_schema : BaseSchema; + v_base_schema BaseSchema/srt = v_srt; + + v_schema : Schema; + v_base_schema BaseSchema/schema = v_schema; + + scan_out : Wire; + scan_out Wire/schema = v_schema; + + scan : ScanOp; + scan ScanOp/srt = v_srt; + scan ScanOp/out = scan_out; + + scan_op : Op; + scan ScanOp/op = scan_op; +} + +// ============================================================ +// Example 2: Filter(Scan) +// ============================================================ +// Query: "scan E, filter where src(e) = some vertex" +// Plan: () --[Scan]--> w1 --[Filter]--> w2 +// +// This demonstrates composition via wire sharing. + +// Uses chase to derive relations. +instance FilterScan : RelAlgIR = chase { + // -- Schema (representing Graph theory) -- + target_theory : GeologMeta/Theory; + target_theory GeologMeta/Theory/parent = target_theory; + + // Sorts: V (vertices), E (edges) + v_srt : GeologMeta/Srt; + v_srt GeologMeta/Srt/theory = target_theory; + + e_srt : GeologMeta/Srt; + e_srt GeologMeta/Srt/theory = target_theory; + + // Functions: src : E -> V + // First create the DSort wrappers + v_base_ds : GeologMeta/BaseDS; + v_base_ds GeologMeta/BaseDS/srt = v_srt; + + e_base_ds : GeologMeta/BaseDS; + e_base_ds GeologMeta/BaseDS/srt = e_srt; + + v_dsort : GeologMeta/DSort; + v_base_ds GeologMeta/BaseDS/dsort = v_dsort; + + e_dsort : GeologMeta/DSort; + e_base_ds GeologMeta/BaseDS/dsort = e_dsort; + + src_func : GeologMeta/Func; + src_func GeologMeta/Func/theory = target_theory; + src_func GeologMeta/Func/dom = e_dsort; + src_func GeologMeta/Func/cod = v_dsort; + + // NOTE: For a complete example, we'd also need an Instance element + // and Elem elements. For simplicity, we use a simpler predicate structure. + + // Using TruePred for now (matches all, demonstrating structure) + + // -- Query Plan -- + // Schema for E + e_base_schema : BaseSchema; + e_base_schema BaseSchema/srt = e_srt; + + e_schema : Schema; + e_base_schema BaseSchema/schema = e_schema; + + // Wire 1: output of Scan (E elements) + w1 : Wire; + w1 Wire/schema = e_schema; + + // Wire 2: output of Filter (filtered E elements) + w2 : Wire; + w2 Wire/schema = e_schema; + + // Scan operation + scan : ScanOp; + scan ScanOp/srt = e_srt; + scan ScanOp/out = w1; + + scan_op : Op; + scan ScanOp/op = scan_op; + + // Predicate: TruePred (matches all - demonstrates filter structure) + true_pred : TruePred; + pred_elem : Pred; + true_pred TruePred/pred = pred_elem; + + // Filter operation: w1 --[Filter(pred)]--> w2 + filter : FilterOp; + filter FilterOp/in = w1; + filter FilterOp/out = w2; + filter FilterOp/pred = pred_elem; + + filter_op : Op; + filter FilterOp/op = filter_op; +} diff --git a/examples/geolog/solver_demo.geolog b/examples/geolog/solver_demo.geolog new file mode 100644 index 0000000..b8dec99 --- /dev/null +++ b/examples/geolog/solver_demo.geolog @@ -0,0 +1,132 @@ +// Solver Demo: Theories demonstrating the geometric logic solver +// +// Use the :solve command to find instances of these theories: +// :source examples/geolog/solver_demo.geolog +// :solve EmptyModel +// :solve Inhabited +// :solve Inconsistent +// +// The solver uses forward chaining to automatically: +// - Add witness elements for existentials +// - Assert relation tuples +// - Detect unsatisfiability (derivation of False) + +// ============================================================================ +// Theory 1: EmptyModel - Trivially satisfiable with empty carrier +// ============================================================================ +// +// A theory with no axioms is satisfied by the empty structure. +// The solver should report SOLVED immediately with 0 elements. + +theory EmptyModel { + A : Sort; + B : Sort; + f : A -> B; + R : A -> Prop; +} + +// ============================================================================ +// Theory 2: UnconditionalExistential - Requires witness creation +// ============================================================================ +// +// Axiom: forall x : P. |- exists y : P. y R +// +// SUBTLE: This axiom's premise (True) and conclusion (∃y.R(y)) don't mention x! +// So even though there's a "forall x : P", the check happens once for an empty +// assignment. The premise True holds, but ∃y.R(y) doesn't hold for empty P +// (no witnesses). The solver correctly detects this and adds a witness. +// +// This is correct geometric logic semantics! The universal over x doesn't +// protect against empty P because x isn't used in the formulas. + +theory UnconditionalExistential { + P : Sort; + R : P -> Prop; + + // This effectively says "there must exist some y with R(y)" + // because x is unused - the check happens once regardless of |P| + ax : forall x : P. |- exists y : P. y R; +} + +// ============================================================================ +// Theory 3: VacuouslyTrue - Axiom that IS vacuously true for empty carriers +// ============================================================================ +// +// Axiom: forall x : P. |- x R +// +// For every x, assert R(x). When P is empty, there are no x values to check, +// so the axiom is vacuously satisfied. Compare with UnconditionalExistential! + +theory VacuouslyTrue { + P : Sort; + R : P -> Prop; + + // This truly IS vacuously true for empty P because x IS used in the conclusion + ax : forall x : P. |- x R; +} + +// ============================================================================ +// Theory 4: Inconsistent - UNSAT via derivation of False +// ============================================================================ +// +// Axiom: forall x. |- false +// +// For any element x, we derive False. This is immediately UNSAT. +// The solver detects this and reports UNSAT. + +theory Inconsistent { + A : Sort; + + // Contradiction: any element leads to False + ax : forall a : A. |- false; +} + +// ============================================================================ +// Theory 5: ReflexiveRelation - Forward chaining asserts reflexive tuples +// ============================================================================ +// +// Axiom: forall x. |- R(x, x) +// +// For every element x, the pair (x, x) is in relation R. +// The solver will assert R(x, x) for each element added. + +theory ReflexiveRelation { + X : Sort; + R : [a: X, b: X] -> Prop; + + // Reflexivity: every element is related to itself + ax/refl : forall x : X. |- [a: x, b: x] R; +} + +// ============================================================================ +// Theory 6: ChainedWitness - Nested existential body processing +// ============================================================================ +// +// Axiom: forall x. |- exists y. exists z. E(x, y), E(y, z) +// +// For every x, there exist y and z such that E(x,y) and E(y,z). +// Forward chaining creates witnesses and asserts the relations. + +theory ChainedWitness { + N : Sort; + E : [src: N, tgt: N] -> Prop; + + // Chain: every node has a two-step path out + ax/chain : forall x : N. |- exists y : N. exists z : N. [src: x, tgt: y] E, [src: y, tgt: z] E; +} + +// ============================================================================ +// Theory 7: EqualityCollapse - Equation handling via congruence closure +// ============================================================================ +// +// Axiom: forall x, y. |- x = y +// +// All elements of sort X are equal. The solver adds equations to the +// congruence closure and merges equivalence classes. + +theory EqualityCollapse { + X : Sort; + + // All elements are equal + ax/all_equal : forall x : X, y : X. |- x = y; +} diff --git a/examples/geolog/sort_param_simple.geolog b/examples/geolog/sort_param_simple.geolog new file mode 100644 index 0000000..1ba3217 --- /dev/null +++ b/examples/geolog/sort_param_simple.geolog @@ -0,0 +1,31 @@ +// Simpler sort parameter test + +theory (X : Sort) Container { + elem : X; // not a Sort, but an element of X +} + +// Hmm, this doesn't quite work... +// Let me try the actual vision pattern + +theory Base { + A : Sort; + B : Sort; +} + +instance MyBase : Base = { + a1 : A; + a2 : A; + b1 : B; + b2 : B; +} + +// Now try a theory parameterized by an instance +theory (Inst : Base instance) Map { + map : Inst/A -> Inst/B; +} + +// Instance of Map parameterized by MyBase +instance MyMap : MyBase Map = { + a1 map = MyBase/b1; + a2 map = MyBase/b2; +} diff --git a/examples/geolog/todo_list.geolog b/examples/geolog/todo_list.geolog new file mode 100644 index 0000000..aa2cab9 --- /dev/null +++ b/examples/geolog/todo_list.geolog @@ -0,0 +1,44 @@ +// TodoList: A simple relational model for tracking tasks +// +// This demonstrates geolog as a persistent relational database. +// Elements represent tasks, and relations track their status. + +theory TodoList { + // The sort of todo items + Item : Sort; + + // Unary relations for item status (simple arrow syntax) + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; + + // Binary relation for dependencies + depends : [item: Item, on: Item] -> Prop; + + // Axiom: if an item depends on another, either it is blocked + // or the dependency is completed + ax/dep_blocked : forall x : Item, y : Item. + [item: x, on: y] depends |- x blocked \/ y completed; +} + +// Example: An empty todo list ready for interactive use +instance MyTodos : TodoList = { + // Start empty - add items interactively with :add +} + +// Example: A pre-populated todo list +instance SampleTodos : TodoList = { + // Items + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Status: unary relations use simple syntax + buy_groceries completed; + cook_dinner high_priority; + + // Dependencies: cook_dinner depends on buy_groceries + // Mixed syntax: first positional arg maps to 'item' field + [cook_dinner, on: buy_groceries] depends; +} diff --git a/examples/geolog/transitive_closure.geolog b/examples/geolog/transitive_closure.geolog new file mode 100644 index 0000000..7e76309 --- /dev/null +++ b/examples/geolog/transitive_closure.geolog @@ -0,0 +1,77 @@ +// Transitive Closure Example +// +// This example demonstrates the chase algorithm computing transitive +// closure of a relation. We define a Graph theory with Edge and Path +// relations, where Path is the transitive closure of Edge. +// +// Run with: +// cargo run -- examples/geolog/transitive_closure.geolog +// Then: +// :source examples/geolog/transitive_closure.geolog +// :inspect Chain +// :chase Chain +// +// The chase will derive Path tuples for all reachable pairs: +// - Edge(a,b), Edge(b,c), Edge(c,d) as base facts +// - Path(a,b), Path(b,c), Path(c,d) from base axiom +// - Path(a,c), Path(b,d) from one step of transitivity +// - Path(a,d) from two steps of transitivity + +theory Graph { + V : Sort; + + // Direct edges in the graph + Edge : [src: V, tgt: V] -> Prop; + + // Reachability (transitive closure of Edge) + Path : [src: V, tgt: V] -> Prop; + + // Base case: every edge is a path + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Inductive case: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// A linear chain: a -> b -> c -> d +// Chase derives Path tuples from Edge via ax/base and ax/trans. +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + // Edges form a chain + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} + +// A diamond: a -> b, a -> c, b -> d, c -> d +// Chase derives all reachable paths. +instance Diamond : Graph = chase { + top : V; + left : V; + right : V; + bottom : V; + + // Two paths from top to bottom + [src: top, tgt: left] Edge; + [src: top, tgt: right] Edge; + [src: left, tgt: bottom] Edge; + [src: right, tgt: bottom] Edge; +} + +// A cycle: a -> b -> c -> a +// Chase derives all reachable paths (full connectivity). +instance Cycle : Graph = chase { + x : V; + y : V; + z : V; + + [src: x, tgt: y] Edge; + [src: y, tgt: z] Edge; + [src: z, tgt: x] Edge; +} diff --git a/examples/main.rs b/examples/main.rs new file mode 100644 index 0000000..623ab5f --- /dev/null +++ b/examples/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello world!") +} diff --git a/examples/roundtrip.rs b/examples/roundtrip.rs new file mode 100644 index 0000000..08d7f06 --- /dev/null +++ b/examples/roundtrip.rs @@ -0,0 +1,216 @@ +use geolog::{parse, pretty_print}; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} + +query findTrace { + ? : ExampleNet Trace instance; +} +"#; + + println!("=== PARSING ORIGINAL ==="); + let ast1 = match parse(input) { + Ok(f) => f, + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + }; + println!("Parsed {} declarations", ast1.declarations.len()); + + println!("\n=== PRETTY PRINTING ==="); + let printed = pretty_print(&ast1); + println!("{}", printed); + + println!("\n=== RE-PARSING ==="); + let ast2 = match parse(&printed) { + Ok(f) => f, + Err(e) => { + eprintln!("Re-parse error: {}", e); + eprintln!("\nPrinted output was:\n{}", printed); + std::process::exit(1); + } + }; + println!("Re-parsed {} declarations", ast2.declarations.len()); + + println!("\n=== COMPARING ==="); + if ast1.declarations.len() != ast2.declarations.len() { + eprintln!("Declaration count mismatch!"); + std::process::exit(1); + } + + // Compare declaration types + for (i, (d1, d2)) in ast1 + .declarations + .iter() + .zip(ast2.declarations.iter()) + .enumerate() + { + let type1 = match &d1.node { + geolog::Declaration::Namespace(_) => "namespace", + geolog::Declaration::Theory(_) => "theory", + geolog::Declaration::Instance(_) => "instance", + geolog::Declaration::Query(_) => "query", + }; + let type2 = match &d2.node { + geolog::Declaration::Namespace(_) => "namespace", + geolog::Declaration::Theory(_) => "theory", + geolog::Declaration::Instance(_) => "instance", + geolog::Declaration::Query(_) => "query", + }; + if type1 != type2 { + eprintln!("Declaration {} type mismatch: {} vs {}", i, type1, type2); + std::process::exit(1); + } + print!(" [{}] {} ", i, type1); + + // Check names/details + match (&d1.node, &d2.node) { + (geolog::Declaration::Namespace(n1), geolog::Declaration::Namespace(n2)) => { + if n1 != n2 { + eprintln!("name mismatch: {} vs {}", n1, n2); + std::process::exit(1); + } + println!("{} ✓", n1); + } + (geolog::Declaration::Theory(t1), geolog::Declaration::Theory(t2)) => { + if t1.name != t2.name { + eprintln!("name mismatch: {} vs {}", t1.name, t2.name); + std::process::exit(1); + } + if t1.body.len() != t2.body.len() { + eprintln!( + "body length mismatch: {} vs {}", + t1.body.len(), + t2.body.len() + ); + std::process::exit(1); + } + println!("{} ({} items) ✓", t1.name, t1.body.len()); + } + (geolog::Declaration::Instance(i1), geolog::Declaration::Instance(i2)) => { + if i1.name != i2.name { + eprintln!("name mismatch: {} vs {}", i1.name, i2.name); + std::process::exit(1); + } + if i1.body.len() != i2.body.len() { + eprintln!( + "body length mismatch: {} vs {}", + i1.body.len(), + i2.body.len() + ); + std::process::exit(1); + } + println!("{} ({} items) ✓", i1.name, i1.body.len()); + } + (geolog::Declaration::Query(q1), geolog::Declaration::Query(q2)) => { + if q1.name != q2.name { + eprintln!("name mismatch: {} vs {}", q1.name, q2.name); + std::process::exit(1); + } + println!("{} ✓", q1.name); + } + _ => unreachable!(), + } + } + + println!("\n=== ROUNDTRIP SUCCESS ==="); +} diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..0ff89a4 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "geolog-fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer_sys = "0.4" + +[dependencies.geolog] +path = ".." + +# Parser fuzzer - tests lexer/parser robustness +[[bin]] +name = "fuzz_parser" +path = "fuzz_targets/fuzz_parser.rs" +test = false +doc = false +bench = false + +# REPL fuzzer - tests full execution pipeline +[[bin]] +name = "fuzz_repl" +path = "fuzz_targets/fuzz_repl.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..4633ac8 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,60 @@ +# Fuzzing geolog + +This directory contains fuzz targets for finding bugs and edge cases in geolog. + +## Requirements + +Fuzzing requires the nightly Rust compiler due to sanitizer support: + +```bash +rustup install nightly +rustup default nightly # or use +nightly flag +``` + +## Available Targets + +- **fuzz_parser**: Exercises the lexer and parser with arbitrary UTF-8 input +- **fuzz_repl**: Exercises the full REPL execution pipeline + +## Running Fuzzers + +```bash +# List all fuzz targets +cargo fuzz list + +# Run the parser fuzzer +cargo +nightly fuzz run fuzz_parser + +# Run the REPL fuzzer +cargo +nightly fuzz run fuzz_repl + +# Run with a time limit (e.g., 60 seconds) +cargo +nightly fuzz run fuzz_parser -- -max_total_time=60 + +# Run with a corpus directory +cargo +nightly fuzz run fuzz_parser corpus/fuzz_parser +``` + +## Corpus + +Interesting inputs found during fuzzing are automatically saved to `corpus//`. +These can be used to reproduce issues: + +```bash +# Reproduce a crash +cargo +nightly fuzz run fuzz_parser corpus/fuzz_parser/ +``` + +## Minimizing Crashes + +```bash +cargo +nightly fuzz tmin fuzz_parser +``` + +## Coverage + +Generate coverage reports: + +```bash +cargo +nightly fuzz coverage fuzz_parser +``` diff --git a/fuzz/fuzz_targets/fuzz_parser.rs b/fuzz/fuzz_targets/fuzz_parser.rs new file mode 100644 index 0000000..475268f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_parser.rs @@ -0,0 +1,17 @@ +//! Fuzz the geolog parser +//! +//! This target exercises the lexer and parser to find edge cases +//! and potential panics in the parsing code. + +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Try to interpret the data as UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // The parser should never panic, even on malformed input + // It should return an error instead + let _ = geolog::parse(input); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_repl.rs b/fuzz/fuzz_targets/fuzz_repl.rs new file mode 100644 index 0000000..6b78887 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_repl.rs @@ -0,0 +1,22 @@ +//! Fuzz the geolog REPL execution +//! +//! This target exercises the full REPL pipeline: parsing, elaboration, +//! and instance creation. It should never panic on any input. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use geolog::repl::ReplState; + +fuzz_target!(|data: &[u8]| { + // Try to interpret the data as UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // Create a fresh REPL state for each fuzz input + // (in-memory, no persistence) + let mut state = ReplState::new(); + + // The REPL should never panic on any input + // It should return a Result<_, String> error instead + let _ = state.execute_geolog(input); + } +}); diff --git a/proofs/.gitignore b/proofs/.gitignore new file mode 100644 index 0000000..1db7135 --- /dev/null +++ b/proofs/.gitignore @@ -0,0 +1,2 @@ +# Lake build artifacts +.lake/ diff --git a/proofs/GeologProofs.lean b/proofs/GeologProofs.lean new file mode 100644 index 0000000..e5e40a0 --- /dev/null +++ b/proofs/GeologProofs.lean @@ -0,0 +1 @@ +import GeologProofs.MonotonicSubmodel diff --git a/proofs/GeologProofs/MonotonicSubmodel.lean b/proofs/GeologProofs/MonotonicSubmodel.lean new file mode 100644 index 0000000..9d4c6d5 --- /dev/null +++ b/proofs/GeologProofs/MonotonicSubmodel.lean @@ -0,0 +1,1520 @@ +import ModelTheoryTopos.Geometric.Structure +import Mathlib.Data.Set.Basic +import Mathlib.Order.Monotone.Basic +import Mathlib.Logic.Function.Basic +import Mathlib.CategoryTheory.Types +import Mathlib.CategoryTheory.Limits.Types.Shapes +import Mathlib.CategoryTheory.Limits.Types.Images +import Mathlib.CategoryTheory.Subobject.Types +import Mathlib.CategoryTheory.Subobject.Lattice + +/-! +# Monotonic Submodel Property + +This file proves the Monotonic Submodel Property for geometric logic structures, +specialized to the category `Type u`. + +## Main Results + +- `pushforward_preserves_closure`: Function closure preserved under pushforward +- `monotonic_submodel_property`: Valid(t) ⊆ Valid(t+1) under atomic extensions + +## Technical Note + +We work with `Type u` and focus on base sorts where the interpretation +is definitionally the carrier type: `(DerivedSorts.inj A).interpret M.sorts = M.sorts A`. +-/ + +namespace MonotonicSubmodel + +open CategoryTheory Limits Signature + +universe u + +/-! +## Instance Priority Override + +The model-theory-topos library defines `OrderBot (Subobject X)` with `sorry`. +We override it with Mathlib's proper implementation for Type u, which requires +`HasInitial C` and `InitialMonoClass C`. +-/ + +-- Override model-theory-topos's sorried OrderBot with Mathlib's proper instance +attribute [instance 2000] Subobject.orderBot + +variable {S : Signature} + +/-! +## Subobjects in Type u + +In Type u, subobjects correspond to subsets via `Types.subobjectEquivSet α : Subobject α ≃o Set α`. +We work with the arrow's range as the concrete set representation. + +Key Mathlib facts we leverage: +- `Types.subobjectEquivSet` proves Subobject α ≃o Set α +- `mono_iff_injective` shows monos in Type u are injective functions +- Products in Type u are pi types: `∏ᶜ F ≅ ∀ j, F j` +- Pullbacks are subtypes: `pullback f g ≅ { p : X × Y // f p.1 = g p.2 }` +-/ + +/-! +## Transport Lemmas for DerivedSorts.interpret +-/ + +/-- For a base sort, interpretation is definitionally the carrier -/ +theorem interpret_inj (M : Structure S (Type u)) (A : S.Sorts) : + (DerivedSorts.inj A).interpret M.sorts = M.sorts A := rfl + +/-- Transport along domain equality -/ +def castDom {M : Structure S (Type u)} {f : S.Functions} {A : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) (x : M.sorts A) : + f.domain.interpret M.sorts := + cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + +/-- Transport along codomain equality -/ +def castCod {M : Structure S (Type u)} {f : S.Functions} {B : S.Sorts} + (hcod : f.codomain = DerivedSorts.inj B) (y : f.codomain.interpret M.sorts) : + M.sorts B := + cast (congrArg (DerivedSorts.interpret M.sorts) hcod) y + +/-! +## Lifting Elements Along Embeddings + +We define `liftSort'` which lifts elements of derived sorts along a family of maps +on base sorts. This is defined before `StructureEmbedding` so that the embedding +can use it in its `func_comm` field. +-/ + +/-- Lift an element of a derived sort along a family of maps on base sorts. + For base sorts: apply the map directly. + For products: apply componentwise via Types.productIso. -/ +noncomputable def liftSort' {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) : (D : DerivedSorts S.Sorts) → + D.interpret M.sorts → D.interpret M'.sorts + | .inj B => embed B + | .prod Aᵢ => fun x => + let x' := (Types.productIso _).hom x + let y' : ∀ i, (Aᵢ i).interpret M'.sorts := fun i => liftSort' embed (Aᵢ i) (x' i) + (Types.productIso _).inv y' + +/-- For base sorts, liftSort' equals embed (with casting) -/ +theorem liftSort'_inj {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) + {D : DerivedSorts S.Sorts} {A : S.Sorts} (hD : D = .inj A) + (x : D.interpret M.sorts) : + liftSort' embed D x = cast (by rw [hD]) (embed A (cast (by rw [hD]) x)) := by + subst hD + simp only [liftSort', cast_eq] + +/-! +## Subset Selection +-/ + +/-- A subset selection for base sorts of a structure in Type u -/ +structure SubsetSelection (M : Structure S (Type u)) where + subset : (A : S.Sorts) → Set (M.sorts A) + +/-! +## Function Closure +-/ + +/-- Function closure for a function with base domain and codomain -/ +def funcPreservesSubset {M : Structure S (Type u)} + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) : Prop := + ∀ x : M.sorts A, + x ∈ sel.subset A → + castCod hcod (M.Functions f (castDom hdom x)) ∈ sel.subset B + +/-! +## Structure Embeddings +-/ + +/-- An embedding of structures. + Functions must commute with the embedding on ALL derived sorts (not just base sorts). + This is the correct premise for the Monotonic Submodel Property. -/ +structure StructureEmbedding (M M' : Structure S (Type u)) where + /-- The carrier maps on base sorts -/ + embed : ∀ A, M.sorts A → M'.sorts A + /-- Embeddings are injective -/ + embed_inj : ∀ A, Function.Injective (embed A) + /-- Functions commute with embedding (for ALL functions, regardless of domain/codomain sort) -/ + func_comm : ∀ (f : S.Functions) (x : f.domain.interpret M.sorts), + liftSort' embed f.codomain (M.Functions f x) = + M'.Functions f (liftSort' embed f.domain x) + +/-- Helper: liftSort' on .inj sorts equals embed -/ +theorem liftSort'_inj_eq {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) (A : S.Sorts) (x : M.sorts A) : + liftSort' embed (.inj A) x = embed A x := rfl + +/-- liftSort' on a derived sort equal to .inj A with explicit cast handling -/ +theorem liftSort'_inj_cast {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) {D : DerivedSorts S.Sorts} {A : S.Sorts} + (h : D = .inj A) (x : D.interpret M.sorts) : + liftSort' embed D x = + cast (congrArg (DerivedSorts.interpret M'.sorts) h.symm) + (embed A (cast (congrArg (DerivedSorts.interpret M.sorts) h) x)) := by + subst h + rfl + +/-- For base-sorted functions, the embedding commutes in a simpler form. + This extracts the base-sort case from the general func_comm. -/ +theorem StructureEmbedding.func_comm_base {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (x : M.sorts A) : + emb.embed B (castCod hcod (M.Functions f (castDom hdom x))) = + castCod hcod (M'.Functions f (castDom hdom (emb.embed A x))) := by + -- Unfold the cast helpers + simp only [castDom, castCod] + -- Get func_comm instance + have hfc := emb.func_comm f (cast (congrArg (DerivedSorts.interpret M.sorts) hdom.symm) x) + -- Rewrite liftSort' using the helper lemmas + rw [liftSort'_inj_cast emb.embed hcod, liftSort'_inj_cast emb.embed hdom] at hfc + -- Now simplify the casts in hfc + simp only [cast_cast, cast_eq] at hfc + -- hfc : cast hcod.symm' a = b where we want a = cast hcod' b + -- Apply cast hcod' to both sides of hfc + have hfc' := congrArg (cast (congrArg (DerivedSorts.interpret M'.sorts) hcod)) hfc + simp only [cast_cast, cast_eq] at hfc' ⊢ + exact hfc' + +/-! +## Pushforward of Subset Selections +-/ + +/-- Push forward a subset selection along an embedding -/ +def SubsetSelection.pushforward {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (sel : SubsetSelection M) : SubsetSelection M' where + subset A := emb.embed A '' sel.subset A + +/-- **Key Lemma**: Function closure is preserved by pushforward -/ +theorem pushforward_preserves_closure {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (hclosed : funcPreservesSubset sel f hdom hcod) : + funcPreservesSubset (sel.pushforward emb) f hdom hcod := by + intro x' hx' + -- x' is in the image of sel.subset A + simp only [SubsetSelection.pushforward, Set.mem_image] at hx' ⊢ + obtain ⟨x, hx_mem, hx_eq⟩ := hx' + -- Apply function closure in M + have hout := hclosed x hx_mem + -- The output is in sel.subset B + refine ⟨castCod hcod (M.Functions f (castDom hdom x)), hout, ?_⟩ + -- Use the base-sorted func_comm helper + have hfc := emb.func_comm_base f hdom hcod x + -- hfc : emb.embed B (castCod hcod (M.Functions f (castDom hdom x))) = + -- castCod hcod (M'.Functions f (castDom hdom (emb.embed A x))) + rw [hfc, ← hx_eq] + +/-! +## Main Theorem +-/ + +/-- +**Main Theorem (Monotonic Submodel Property)** + +For base-sorted functions, the pushforward of a function-closed subset +selection along an embedding is also function-closed. + +This is stated per-function; the full property follows by applying to all functions. +-/ +theorem monotonic_submodel_property {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (hclosed : funcPreservesSubset sel f hdom hcod) : + funcPreservesSubset (sel.pushforward emb) f hdom hcod := + pushforward_preserves_closure emb sel f hdom hcod hclosed + +/-! +## Closed Subset Selections +-/ + +/-- A subset selection is fully closed if it's closed under all base-sorted functions -/ +structure ClosedSubsetSelection (M : Structure S (Type u)) extends SubsetSelection M where + /-- Function closure for all base-sorted functions -/ + func_closed : ∀ (f : S.Functions) {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B), + funcPreservesSubset toSubsetSelection f hdom hcod + +/-- +**Semantic Monotonicity**: If sel is a closed subset selection in M, +and emb : M → M' is an embedding, then sel.pushforward emb is also closed in M'. + +This is the semantic content of the CALM theorem's monotonicity condition: +extending a structure by adding elements preserves the validity of existing submodels. +-/ +theorem semantic_monotonicity {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : ClosedSubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) : + funcPreservesSubset (sel.toSubsetSelection.pushforward emb) f hdom hcod := + pushforward_preserves_closure emb sel.toSubsetSelection f hdom hcod (sel.func_closed f hdom hcod) + +/-- The pushforward of a closed selection is closed -/ +def ClosedSubsetSelection.pushforward {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (sel : ClosedSubsetSelection M) : ClosedSubsetSelection M' where + toSubsetSelection := sel.toSubsetSelection.pushforward emb + func_closed f {_A} {_B} hdom hcod := semantic_monotonicity emb sel f hdom hcod + +/-! +## Relation Preservation +-/ + +/-- Transport for relation domains -/ +def castRelDom {M : Structure S (Type u)} {R : S.Relations} {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A) : + R.domain.interpret M.sorts := + cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + +/-! +In Type u, a `Subobject X` represents a monomorphism into X, which +corresponds to a subset of X. An element x : X is "in" the subobject +iff x is in the range of the representing monomorphism (the arrow). +-/ + +/-- Membership in a subobject (in Type u): x is in the range of the arrow -/ +def subobjectMem {X : Type u} (S : Subobject X) (x : X) : Prop := + x ∈ Set.range S.arrow + +/-- Relation membership for base-sorted relations -/ +def relMem {M : Structure S (Type u)} (R : S.Relations) {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A) : Prop := + subobjectMem (M.Relations R) (castRelDom hdom x) + +/-- A structure embedding that also preserves relations. + Relation preservation is stated for ALL derived sort domains, not just base sorts, + since geometric relations can have product domains (e.g., binary relations). -/ +structure RelPreservingEmbedding (M M' : Structure S (Type u)) extends StructureEmbedding M M' where + /-- Relations are preserved: if x ∈ R in M, then liftSort'(x) ∈ R in M' -/ + rel_preserve : ∀ (R : S.Relations) (x : R.domain.interpret M.sorts), + subobjectMem (M.Relations R) x → + subobjectMem (M'.Relations R) (liftSort' embed R.domain x) + +/-- +A **conservative expansion** is an embedding where: +1. Relations are preserved (forward): R(x) in M → R(emb(x)) in M' +2. Relations are reflected (backward): R(emb(x)) in M' → R(x) in M + +The reflection condition captures "only adding relation tuples concerning new elements": +if a relation holds on lifted old elements in M', it must have already held in M. + +With both directions, formula satisfaction becomes an IFF for old tuples, +which is the key to proving that old submodels remain valid models. +-/ +structure ConservativeExpansion (M M' : Structure S (Type u)) extends RelPreservingEmbedding M M' where + /-- Relations are reflected: if R(emb(x)) in M', then R(x) in M + (no new relation tuples added on old elements) -/ + rel_reflect : ∀ (R : S.Relations) (x : R.domain.interpret M.sorts), + subobjectMem (M'.Relations R) (liftSort' embed R.domain x) → + subobjectMem (M.Relations R) x + +/-- Relation membership is an IFF for conservative expansions -/ +theorem rel_preserve_iff {M M' : Structure S (Type u)} + (emb : ConservativeExpansion M M') + (R : S.Relations) (x : R.domain.interpret M.sorts) : + subobjectMem (M.Relations R) x ↔ + subobjectMem (M'.Relations R) (liftSort' emb.embed R.domain x) := + ⟨emb.rel_preserve R x, emb.rel_reflect R x⟩ + +/-! +### Subset Selection with Relation Closure + +A subset selection is "relation-closed" if whenever x is in the selection +and x is in relation R, then x satisfies the "domain requirement" for R. +For geometric logic, this isn't quite the right notion since relations can +have product domains. However, for base-sorted relations it's straightforward. +-/ + +/-- A closed selection respects relations: elements in relations stay in the selection -/ +structure FullyClosedSelection (M : Structure S (Type u)) extends ClosedSubsetSelection M where + /-- For base-sorted relations, if x ∈ R and x ∈ sel, the membership is consistent -/ + rel_closed : ∀ (R : S.Relations) {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A), + relMem R hdom x → x ∈ subset A + +/-- Elements in the selection get pushed forward -/ +theorem selection_pushforward_mem {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + {A : S.Sorts} + (x : M.sorts A) + (hsel : x ∈ sel.subset A) : + emb.embed A x ∈ (sel.pushforward emb).subset A := by + simp only [SubsetSelection.pushforward, Set.mem_image] + exact ⟨x, hsel, rfl⟩ + +/-- Relation membership transfers across embeddings (base-sorted version). + This is a corollary of the general `rel_preserve` for relations with base sort domains. -/ +theorem rel_mem_transfer {M M' : Structure S (Type u)} + (emb : RelPreservingEmbedding M M') + (R : S.Relations) + {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) + (x : M.sorts A) + (hrel : relMem (M := M) R hdom x) : + relMem (M := M') R hdom (emb.embed A x) := by + simp only [relMem, castRelDom, subobjectMem] at hrel ⊢ + -- Convert hrel to subobjectMem form for the general rel_preserve + let x' : R.domain.interpret M.sorts := cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + have hrel' : subobjectMem (M.Relations R) x' := by convert hrel + have h := emb.rel_preserve R x' hrel' + -- h : subobjectMem (M'.Relations R) (liftSort' emb.embed R.domain x') + -- Use liftSort'_inj_cast to handle the equation + rw [liftSort'_inj_cast emb.embed hdom] at h + simp only [cast_cast, cast_eq, x'] at h + convert h using 2 + +/-! +## Connection to Theory Satisfaction + +The key insight connecting our structural results to `Theory.interpret`. +-/ + +/-! +### Formula Satisfaction via Subobjects + +In `Type u`, formula interpretation gives a subobject, which is essentially +a subset. An element (or tuple) satisfies a formula iff it's in that subset. + +**Key Mathlib lemmas for Type u:** +- `Types.subobjectEquivSet α : Subobject α ≃o Set α` - subobjects = sets +- In this order iso, `⊤ ↦ Set.univ` and `⊥ ↦ ∅` +- Product of subobjects ↦ intersection of sets +- Coproduct of subobjects ↦ union of sets +-/ + +/-- An element is in the formula's interpretation (Type u specific) -/ +def formulaSatisfied {M : Structure S (Type u)} [κ : SmallUniverse S] [G : Geometric κ (Type u)] + {xs : Context S} (φ : Formula xs) (t : Context.interpret M xs) : Prop := + subobjectMem (Formula.interpret M φ) t + +/-! +### Lifting Embeddings to Contexts + +An embedding on sorts lifts to an embedding on context interpretations. +In Type u, this is straightforward because: +- `Context.interpret M xs` is the categorical product `∏ᶜ (fun i => ⟦M | xs.nth i⟧ᵈ)` +- By `Types.productIso`, this is isomorphic to `∀ i, M.sorts (xs.nth i).underlying` +- The lift applies the embedding componentwise + +**Justification:** In Type u, products are pi types (`Types.productIso : ∏ᶜ F ≅ ∀ j, F j`), +so lifting is just `fun ctx i => emb.embed _ (ctx i)` modulo the isomorphism. +-/ + +/-- Types.productIso.hom extracts component j when applied at j. + Uses Mathlib's Types.productIso_hom_comp_eval. -/ +lemma Types_productIso_hom_apply {J : Type v} (f : J → Type (max v u)) (x : ∏ᶜ f) (j : J) : + (Types.productIso f).hom x j = Pi.π f j x := by + have h := Types.productIso_hom_comp_eval f j + exact congrFun h x + +/-- Types.productIso.inv satisfies projection identity. + Uses Mathlib's Types.productIso_inv_comp_π. -/ +lemma Types_productIso_inv_apply {J : Type v} (f : J → Type (max v u)) (g : (j : J) → f j) (j : J) : + Pi.π f j ((Types.productIso f).inv g) = g j := by + have h := Types.productIso_inv_comp_π f j + exact congrFun h g + +/-- Lift an element of a derived sort along an embedding. + For base sorts: just the embedding. + For products: apply componentwise via Types.productIso. -/ +noncomputable def liftSort {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') : (A : DerivedSorts S.Sorts) → + A.interpret M.sorts → A.interpret M'.sorts + | .inj B => emb.embed B + | .prod Aᵢ => fun x => + let x' := (Types.productIso _).hom x + let y' : ∀ i, (Aᵢ i).interpret M'.sorts := fun i => liftSort emb (Aᵢ i) (x' i) + (Types.productIso _).inv y' + +/-- liftSort equals liftSort' applied to the embedding -/ +theorem liftSort_eq_liftSort' {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (D : DerivedSorts S.Sorts) (x : D.interpret M.sorts) : + liftSort emb D x = liftSort' emb.embed D x := by + induction D with + | inj B => rfl + | prod Aᵢ ih => + simp only [liftSort, liftSort'] + -- Both sides are productIso.inv applied to a function. + -- We need to show the functions are equal. + -- Goal: productIso.inv (fun i => liftSort ...) = productIso.inv (fun i => liftSort' ...) + -- This follows by congruence if the functions are equal + have heq : (fun i => liftSort emb (Aᵢ i) ((Types.productIso _).hom x i)) = + (fun i => liftSort' emb.embed (Aᵢ i) ((Types.productIso _).hom x i)) := by + funext i + exact ih i _ + simp only [heq] + +/-- liftSort is injective for any derived sort. + For base sorts, this is just embed_inj. + For products, this follows from componentwise injectivity. -/ +theorem liftSort_injective {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (D : DerivedSorts S.Sorts) : + Function.Injective (liftSort emb D) := by + induction D with + | inj B => + -- For base sorts, liftSort = embed, which is injective by embed_inj + exact emb.embed_inj B + | prod Aᵢ ih => + -- For products, show componentwise injectivity implies total injectivity + intro x y hxy + -- liftSort emb (.prod Aᵢ) x = productIso.inv (fun i => liftSort emb (Aᵢ i) (productIso.hom x i)) + simp only [liftSort] at hxy + -- hxy : productIso.inv (fun i => ...) = productIso.inv (fun i' => ...) + -- productIso is an isomorphism, so its inv is injective (via hom ∘ inv = id) + let iso_M' := Types.productIso (fun j => (Aᵢ j).interpret M'.sorts) + -- In Types, hom ≫ inv = 𝟙 gives hom (inv x) = x + have hinv_li : Function.LeftInverse iso_M'.hom iso_M'.inv := fun a => by + have h := congrFun (iso_M'.inv_hom_id) a + simp only [types_comp_apply, types_id_apply] at h + exact h + have hinv_inj : Function.Injective iso_M'.inv := hinv_li.injective + have h := hinv_inj hxy + -- h : (fun i => liftSort emb (Aᵢ i) (productIso.hom x i)) = + -- (fun i => liftSort emb (Aᵢ i) (productIso.hom y i)) + -- Extract componentwise and use ih + have hcomp : ∀ i, (Types.productIso _).hom x i = (Types.productIso _).hom y i := by + intro i + have hi := congrFun h i + exact ih i hi + -- Reconstruct equality of x and y + have hxy' : (Types.productIso _).hom x = (Types.productIso _).hom y := funext hcomp + let iso_M := Types.productIso (fun j => (Aᵢ j).interpret M.sorts) + have hhom_li : Function.LeftInverse iso_M.inv iso_M.hom := fun a => by + have h := congrFun (iso_M.hom_inv_id) a + simp only [types_comp_apply, types_id_apply] at h + exact h + have hhom_inj : Function.Injective iso_M.hom := hhom_li.injective + exact hhom_inj hxy' + +/-- Lift an embedding to context interpretations (componentwise application) -/ +noncomputable def liftEmbedContext {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (xs : Context S) : + Context.interpret M xs → Context.interpret M' xs := fun ctx => + let ctx' := (Types.productIso _).hom ctx + let liftedCtx' : ∀ i, (xs.nth i).interpret M'.sorts := + fun i => liftSort emb (xs.nth i) (ctx' i) + (Types.productIso _).inv liftedCtx' + +/-- Generalized relation preservation for arbitrary derived sort domains. + This is the version needed for formula satisfaction monotonicity. + Follows from RelPreservingEmbedding.rel_preserve via liftSort_eq_liftSort'. -/ +theorem rel_preserve_general {M M' : Structure S (Type u)} + (emb : RelPreservingEmbedding M M') + (R : S.Relations) (x : R.domain.interpret M.sorts) : + subobjectMem (M.Relations R) x → + subobjectMem (M'.Relations R) (liftSort emb.toStructureEmbedding R.domain x) := by + intro hmem + rw [liftSort_eq_liftSort'] + exact emb.rel_preserve R x hmem + +/-! +### Formula Monotonicity + +For geometric formulas, satisfaction transfers across relation-preserving embeddings. +The proof outline by formula case: + +| Formula | Interpretation | Why monotone | +|---------|---------------|--------------| +| `rel R t` | `pullback ⟦t⟧ᵗ (M.Relations R)` | rel_preserve + pullback naturality | +| `true` | `⊤` | Always satisfied | +| `false` | `⊥` | Never satisfied (vacuous) | +| `φ ∧ ψ` | `φ.interpret ⨯ ψ.interpret` | IH on both components | +| `t₁ = t₂` | `equalizerSubobject ⟦t₁⟧ᵗ ⟦t₂⟧ᵗ` | Embedding injectivity | +| `∃x.φ` | `(exists π).obj φ.interpret` | Witness transfers via emb | +| `⋁ᵢφᵢ` | `∐ᵢ φᵢ.interpret` | Satisfied disjunct transfers | + +Each case uses specific Mathlib lemmas about Type u: +- `true/false`: `Types.subobjectEquivSet` sends ⊤ to univ, ⊥ to ∅ +- `conj`: Product of subobjects = intersection via order iso +- `eq`: Equalizer in Type u = `{x | f x = g x}` (Types.equalizer_eq_kernel) +- `exists`: Image in Type u = `Set.range f` +- `infdisj`: Coproduct = union +-/ + +/-- Term interpretation commutes with embedding via liftSort. + Proof by induction on term structure. -/ +theorem term_interpret_commutes {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : StructureEmbedding M M') + {xs : Context S} {A : DerivedSorts S.Sorts} + (t : Term xs A) (ctx : Context.interpret M xs) : + Term.interpret M' t (liftEmbedContext emb xs ctx) = + liftSort emb A (Term.interpret M t ctx) := by + -- Induction on term structure. + -- Each case requires careful handling of Types.productIso and eqToHom casts. + -- The key insights: + -- - var: liftEmbedContext applies liftSort componentwise, extraction via Pi.π matches + -- - func: follows from func_comm generalized to derived sorts + -- - pair: componentwise by IH, using productIso injectivity + -- - proj: IH plus extraction from liftSort on products + induction t with + | var v => + -- Term.interpret for var v is: Pi.π _ v ≫ eqToHom _ + -- In Type u, eqToHom is identity when proving xs.nth v = xs.nth v (rfl) + simp only [Term.interpret, types_comp_apply, eqToHom_refl, types_id_apply] + -- Goal: Pi.π _ v (liftEmbedContext emb xs ctx) = liftSort emb _ (Pi.π _ v ctx) + -- + -- liftEmbedContext applies liftSort componentwise via Types.productIso: + -- liftEmbedContext ctx = productIso.inv (fun i => liftSort (productIso.hom ctx i)) + -- Extracting component v via Pi.π gives the v-th component of the function. + -- + -- Define the relevant functions with explicit types + let f_M := fun i : Fin xs.length => (xs.nth i).interpret M.sorts + let f_M' := fun i : Fin xs.length => (xs.nth i).interpret M'.sorts + -- The lifted function + let g : (i : Fin xs.length) → f_M' i := + fun i => liftSort emb (xs.nth i) ((Types.productIso f_M).hom ctx i) + -- liftEmbedContext is productIso.inv applied to g + have h1 : liftEmbedContext emb xs ctx = (Types.productIso f_M').inv g := rfl + rw [h1] + -- Apply Types_productIso_inv_apply: Pi.π f_M' v (productIso.inv g) = g v + rw [Types_productIso_inv_apply f_M' g v] + -- Now goal: g v = liftSort emb (xs.nth v) (Pi.π f_M v ctx) + -- g v = liftSort emb (xs.nth v) ((Types.productIso f_M).hom ctx v) + -- So we need: (Types.productIso f_M).hom ctx v = Pi.π f_M v ctx + have h2 : (Types.productIso f_M).hom ctx v = Pi.π f_M v ctx := + Types_productIso_hom_apply f_M ctx v + simp only [g, h2] + -- Goal should now be: liftSort emb (xs.nth v) (Pi.π f_M v ctx) = liftSort emb _ (Pi.π _ v ctx) + -- This is definitionally true since f_M i = (xs.nth i).interpret M.sorts + rfl + | func f t' ih => + -- Function application: (func f t').interpret M ctx = t'.interpret M ctx ≫ M.Functions f + -- In Type u, composition is just function application. + simp only [Term.interpret, types_comp_apply] + -- Goal: M'.Functions f (t'.interpret M' (liftEmbedContext emb xs ctx)) = + -- liftSort emb f.codomain (M.Functions f (t'.interpret M ctx)) + -- By IH: t'.interpret M' (liftEmbedContext emb xs ctx) = liftSort emb f.domain (t'.interpret M ctx) + rw [ih] + -- Goal: M'.Functions f (liftSort emb f.domain (t'.interpret M ctx)) = + -- liftSort emb f.codomain (M.Functions f (t'.interpret M ctx)) + -- This is exactly func_comm (with sides swapped) + -- func_comm : liftSort' embed f.codomain (M.Functions f x) = M'.Functions f (liftSort' embed f.domain x) + -- liftSort emb = liftSort' emb.embed (we need a lemma for this or unfold) + have hfc := emb.func_comm f (t'.interpret M ctx) + -- hfc : liftSort' emb.embed f.codomain (M.Functions f _) = M'.Functions f (liftSort' emb.embed f.domain _) + -- We need: M'.Functions f (liftSort emb f.domain _) = liftSort emb f.codomain (M.Functions f _) + -- which is hfc.symm after showing liftSort emb = liftSort' emb.embed + rw [liftSort_eq_liftSort' emb f.domain, liftSort_eq_liftSort' emb f.codomain] + exact hfc.symm + | @pair n Aᵢ tᵢ ih => + -- Pair builds a product from component interpretations. + -- Both sides are elements of the product type. Show equal componentwise. + simp only [Term.interpret] + -- Use that Types.productIso is an isomorphism to transfer to component equality + let f_M := fun j : Fin n => (Aᵢ j).interpret M.sorts + let f_M' := fun j : Fin n => (Aᵢ j).interpret M'.sorts + let lhs := Pi.lift (fun i => (tᵢ i).interpret M') (liftEmbedContext emb xs ctx) + let rhs := liftSort emb (.prod Aᵢ) (Pi.lift (fun i => (tᵢ i).interpret M) ctx) + -- Show lhs and rhs are equal by applying Types.productIso.hom and using funext + suffices h : (Types.productIso f_M').hom lhs = (Types.productIso f_M').hom rhs by + have hinj := (Types.productIso f_M').toEquiv.injective + exact hinj h + funext j + simp only [Types_productIso_hom_apply, Types.pi_lift_π_apply, lhs] + -- Goal: (tᵢ j).interpret M' (liftEmbedContext emb xs ctx) = (Types.productIso f_M').hom rhs j + rw [ih j] + -- RHS + simp only [rhs] + let x := Pi.lift (fun i => (tᵢ i).interpret M) ctx + let g : (j : Fin n) → f_M' j := fun j => liftSort emb (Aᵢ j) ((Types.productIso f_M).hom x j) + have h1 : liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g := rfl + rw [h1, Types_productIso_inv_apply f_M' g j] + simp only [g, Types_productIso_hom_apply, x, Types.pi_lift_π_apply] + | @proj n Aᵢ t' i ih => + -- Projection extracts the i-th component from a product. + -- Term.interpret M (proj t' i) = t'.interpret M ≫ Pi.π _ i + simp only [Term.interpret, types_comp_apply] + -- Goal: Pi.π _ i (t'.interpret M' (liftEmbedContext emb xs ctx)) = + -- liftSort emb (Aᵢ i) (Pi.π _ i (t'.interpret M ctx)) + -- By IH: t'.interpret M' (liftEmbedContext emb xs ctx) = liftSort emb (.prod Aᵢ) (t'.interpret M ctx) + rw [ih] + -- Goal: Pi.π _ i (liftSort emb (.prod Aᵢ) (t'.interpret M ctx)) = + -- liftSort emb (Aᵢ i) (Pi.π _ i (t'.interpret M ctx)) + -- This is "liftSort distributes over projection" + -- By definition, liftSort emb (.prod Aᵢ) x = productIso.inv (fun j => liftSort emb (Aᵢ j) (productIso.hom x j)) + let x := Term.interpret M t' ctx + let f_M := fun j : Fin n => (Aᵢ j).interpret M.sorts + let f_M' := fun j : Fin n => (Aᵢ j).interpret M'.sorts + let g : (j : Fin n) → f_M' j := fun j => liftSort emb (Aᵢ j) ((Types.productIso f_M).hom x j) + -- liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g + have h1 : liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g := rfl + rw [h1] + -- Apply Types_productIso_inv_apply: Pi.π f_M' i (productIso.inv g) = g i + rw [Types_productIso_inv_apply f_M' g i] + -- Goal: g i = liftSort emb (Aᵢ i) (Pi.π f_M i x) + -- g i = liftSort emb (Aᵢ i) ((Types.productIso f_M).hom x i) + have h2 : (Types.productIso f_M).hom x i = Pi.π f_M i x := + Types_productIso_hom_apply f_M x i + simp only [g, h2] + rfl + +/-- Context morphism interpretation commutes with liftEmbedContext. + This is the context morphism analogue of term_interpret_commutes. + For a context morphism σ : ys ⟶ xs, we have: + liftEmbedContext xs (σ.interpret M ctx) = σ.interpret M' (liftEmbedContext ys ctx) -/ +theorem hom_interpret_commutes {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : StructureEmbedding M M') + {ys xs : Context S} + (σ : ys ⟶ xs) (ctx : Context.interpret M ys) : + liftEmbedContext emb xs (Context.Hom.interpret M σ ctx) = + Context.Hom.interpret M' σ (liftEmbedContext emb ys ctx) := by + -- σ.interpret = Pi.lift (fun i => (σ i).interpret) + -- Both sides are built from Pi.lift; compare componentwise + simp only [Context.Hom.interpret] + -- Goal: liftEmbedContext xs (Pi.lift (fun i => (σ i).interpret M) ctx) = + -- Pi.lift (fun i => (σ i).interpret M') (liftEmbedContext ys ctx) + -- Use Types.productIso to extract components + let f_M := fun i : Fin xs.length => (xs.nth i).interpret M.sorts + let f_M' := fun i : Fin xs.length => (xs.nth i).interpret M'.sorts + apply (Types.productIso f_M').toEquiv.injective + funext i + -- Compare components: apply productIso.hom and extract i-th component + simp only [Iso.toEquiv_fun] + rw [Types_productIso_hom_apply f_M', Types_productIso_hom_apply f_M'] + -- RHS: Pi.π f_M' i (Pi.lift (fun i => (σ i).interpret M') (liftEmbedContext ys ctx)) + -- = (σ i).interpret M' (liftEmbedContext ys ctx) + rw [Types.pi_lift_π_apply] + -- LHS: Pi.π f_M' i (liftEmbedContext xs (Pi.lift (fun i => (σ i).interpret M) ctx)) + -- By definition of liftEmbedContext + simp only [liftEmbedContext] + rw [Types_productIso_inv_apply f_M', Types_productIso_hom_apply f_M] + -- LHS: liftSort emb (xs.nth i) (Pi.π f_M i (Pi.lift (fun i => (σ i).interpret M) ctx)) + rw [Types.pi_lift_π_apply] + -- LHS: liftSort emb (xs.nth i) ((σ i).interpret M ctx) + -- RHS: (σ i).interpret M' (liftEmbedContext ys ctx) + -- By term_interpret_commutes + exact (term_interpret_commutes emb (σ i) ctx).symm + +/-! +**Formula Satisfaction Monotonicity** + +Geometric formula satisfaction is preserved by relation-preserving embeddings. +This is the semantic justification for the CALM theorem: valid queries +remain valid as the database grows. + +The proof structure is complete; each case requires unpacking the categorical +definitions using Type u specific lemmas from Mathlib. +-/ + +/-- In Type u, morphisms from initial objects are monomorphisms (vacuously injective) -/ +instance : InitialMonoClass (Type u) where + isInitial_mono_from {I} X hI := by + -- hI : IsInitial I means I is empty (in Type u) + -- So any morphism from I is injective (vacuously) + rw [mono_iff_injective] + intro a b _ + -- I is empty: there's a map to PEmpty, so I must be empty + have hemp : IsEmpty I := ⟨fun x => PEmpty.elim (hI.to PEmpty.{u+1} x)⟩ + exact hemp.elim a + +/-- ⊤.arrow is surjective in Type u (since it's an iso, and isos are bijections) -/ +theorem top_arrow_surjective {X : Type u} : Function.Surjective (⊤ : Subobject X).arrow := by + haveI : IsIso (⊤ : Subobject X).arrow := Subobject.isIso_top_arrow + exact ((isIso_iff_bijective (⊤ : Subobject X).arrow).mp inferInstance).2 + +/-- ⊥.underlying is empty in Type u. + With Mathlib's OrderBot (via instance priority override), this follows from botCoeIsoInitial. -/ +theorem bot_underlying_isEmpty {X : Type u} : IsEmpty ((⊥ : Subobject X) : Type u) := by + have h1 : (Subobject.underlying.obj (⊥ : Subobject X)) ≅ ⊥_ (Type u) := Subobject.botCoeIsoInitial + have h2 : ⊥_ (Type u) ≅ PEmpty := Types.initialIso + exact ⟨fun y => PEmpty.elim ((h1 ≪≫ h2).hom y)⟩ + +/-- The set corresponding to a subobject under Types.subobjectEquivSet is the range of its arrow. + This is essentially by definition since both go through the representative. -/ +theorem subobject_equiv_eq_range {X : Type u} (f : Subobject X) : + (Types.subobjectEquivSet X) f = Set.range f.arrow := by + simp only [Types.subobjectEquivSet] + rfl + +/-- Types.equalizerIso.inv sends ⟨x, heq⟩ to the element of equalizer that ι maps to x. -/ +lemma types_equalizerIso_inv_ι {X Y : Type u} (f g : X ⟶ Y) (x_sub : { x : X // f x = g x }) : + equalizer.ι f g ((Types.equalizerIso f g).inv x_sub) = x_sub.val := by + have h := limit.isoLimitCone_inv_π (F := parallelPair f g) Types.equalizerLimit WalkingParallelPair.zero + simp only [Types.equalizerIso, parallelPair_obj_zero, limit.π] at h ⊢ + exact congrFun h x_sub + +/-- In Type u, x ∈ range (equalizerSubobject f g).arrow iff f x = g x. -/ +theorem equalizer_range_iff {X Y : Type u} (f g : X ⟶ Y) (x : X) : + x ∈ Set.range (equalizerSubobject f g).arrow ↔ f x = g x := by + simp only [equalizerSubobject] + constructor + · intro ⟨z, hz⟩ + let z' := (Subobject.underlyingIso (equalizer.ι f g)).hom z + have hz' : equalizer.ι f g z' = x := by + have h := Subobject.underlyingIso_hom_comp_eq_mk (equalizer.ι f g) + simp only [← h, types_comp_apply] at hz + exact hz + have hcond := equalizer.condition f g + have h1 : (equalizer.ι f g ≫ f) z' = (equalizer.ι f g ≫ g) z' := by rw [hcond] + simp only [types_comp_apply, hz'] at h1 + exact h1 + · intro heq + let x_sub : { y : X // f y = g y } := ⟨x, heq⟩ + let z_eq : equalizer f g := (Types.equalizerIso f g).inv x_sub + let z := (Subobject.underlyingIso (equalizer.ι f g)).inv z_eq + use z + have h := Subobject.underlyingIso_hom_comp_eq_mk (equalizer.ι f g) + calc (Subobject.mk (equalizer.ι f g)).arrow z + = ((Subobject.underlyingIso (equalizer.ι f g)).hom ≫ equalizer.ι f g) + ((Subobject.underlyingIso (equalizer.ι f g)).inv z_eq) := by rw [h] + _ = equalizer.ι f g ((Subobject.underlyingIso (equalizer.ι f g)).hom + ((Subobject.underlyingIso (equalizer.ι f g)).inv z_eq)) := rfl + _ = equalizer.ι f g z_eq := by simp + _ = x_sub.val := types_equalizerIso_inv_ι f g x_sub + _ = x := rfl + +/-- In Type u, x ∈ range (f ⊓ g).arrow iff x is in range of both f.arrow and g.arrow. + This uses that Types.subobjectEquivSet is an order isomorphism, so it preserves inf. + In Set, inf is intersection, so x ∈ (f ⊓ g) ↔ x ∈ f ∧ x ∈ g. -/ +theorem inf_range_iff {X : Type u} (f g : Subobject X) (x : X) : + x ∈ Set.range (f ⊓ g).arrow ↔ x ∈ Set.range f.arrow ∧ x ∈ Set.range g.arrow := by + -- Use the order isomorphism Types.subobjectEquivSet : Subobject X ≃o Set X + let iso := Types.subobjectEquivSet X + -- Translate membership using subobject_equiv_eq_range + rw [← subobject_equiv_eq_range (f ⊓ g)] + rw [← subobject_equiv_eq_range f] + rw [← subobject_equiv_eq_range g] + -- Now use that the order iso preserves inf + have h : iso (f ⊓ g) = iso f ⊓ iso g := iso.map_inf f g + -- Goal: x ∈ iso (f ⊓ g) ↔ x ∈ iso f ∧ x ∈ iso g + show x ∈ iso (f ⊓ g) ↔ x ∈ iso f ∧ x ∈ iso g + rw [h] + -- In Set X, ⊓ = ∩, so membership is conjunction + rfl + +/-- In Type u, pullback.snd has range equal to preimage. + For pullback g f where g : Z → Y and f : X → Y, + range(pullback.snd) = { x | ∃ z, g z = f x } = f⁻¹(range g). -/ +lemma pullback_snd_range {X Y Z : Type u} (g : Z ⟶ Y) (f : X ⟶ Y) (x : X) : + x ∈ Set.range (pullback.snd g f) ↔ f x ∈ Set.range g := by + constructor + · intro ⟨z, hz⟩ + let z' := (Types.pullbackIsoPullback g f).hom z + have hcond : g z'.val.1 = f z'.val.2 := z'.property + have hsnd : z'.val.2 = x := by + have h2 := congrFun (limit.isoLimitCone_hom_π (Types.pullbackLimitCone g f) WalkingCospan.right) z + simp only [Types.pullbackLimitCone, limit.π] at h2 + rw [← hz] + exact h2.symm + use z'.val.1 + rw [← hsnd, hcond] + · intro ⟨z, hz⟩ + let p : Types.PullbackObj g f := ⟨(z, x), hz⟩ + let z' := (Types.pullbackIsoPullback g f).inv p + use z' + have h := limit.isoLimitCone_inv_π (Types.pullbackLimitCone g f) WalkingCospan.right + exact congrFun h p + +/-- For isomorphic MonoOvers, their arrows have the same range. + This is because an iso in MonoOver X means the underlying morphism + commutes with the arrows (as Over morphisms). -/ +lemma monoover_iso_same_range {X : Type u} (A B : MonoOver X) (h : A ≅ B) : + Set.range A.arrow = Set.range B.arrow := by + have hcomm : h.hom.left ≫ B.arrow = A.arrow := Over.w h.hom + have hcomm' : h.inv.left ≫ A.arrow = B.arrow := Over.w h.inv + ext x + constructor + · intro ⟨a, ha⟩ + use h.hom.left a + calc B.arrow (h.hom.left a) + = (h.hom.left ≫ B.arrow) a := rfl + _ = A.arrow a := by rw [hcomm] + _ = x := ha + · intro ⟨b, hb⟩ + use h.inv.left b + calc A.arrow (h.inv.left b) + = (h.inv.left ≫ A.arrow) b := rfl + _ = B.arrow b := by rw [hcomm'] + _ = x := hb + +/-- The arrow of a Subobject equals the arrow of its representative. -/ +lemma subobject_arrow_eq_representative_arrow {X : Type u} (P : Subobject X) : + P.arrow = (Subobject.representative.obj P).arrow := rfl + +/-- In Type u, x ∈ range ((Subobject.pullback f).obj P).arrow iff f x ∈ range P.arrow. + This is the set-theoretic fact that pullback of a subobject is the preimage. -/ +theorem pullback_range_iff {X Y : Type u} (f : X ⟶ Y) (P : Subobject Y) (x : X) : + x ∈ Set.range ((Subobject.pullback f).obj P).arrow ↔ f x ∈ Set.range P.arrow := by + let R := Subobject.representative.obj P + -- R.arrow = P.arrow + have harrow : R.arrow = P.arrow := (subobject_arrow_eq_representative_arrow P).symm + -- (MonoOver.pullback f).obj R has arrow = pullback.snd R.arrow f + have hpb_arrow : ((MonoOver.pullback f).obj R).arrow = pullback.snd R.arrow f := + MonoOver.pullback_obj_arrow f R + -- P = toThinSkeleton R (since representative is a section of toThinSkeleton) + have hP : P = (toThinSkeleton (MonoOver Y)).obj R := (Quotient.out_eq P).symm + -- (lower F).obj (toThinSkeleton R) = toThinSkeleton (F.obj R) + have h1 : (Subobject.pullback f).obj P = + (toThinSkeleton (MonoOver X)).obj ((MonoOver.pullback f).obj R) := by + rw [hP]; rfl + -- representative of the RHS is iso to (MonoOver.pullback f).obj R + have h2 : Subobject.representative.obj ((toThinSkeleton (MonoOver X)).obj ((MonoOver.pullback f).obj R)) ≅ + (MonoOver.pullback f).obj R := + Subobject.representativeIso _ + -- Combine: representative of (pullback f).obj P is iso to (MonoOver.pullback f).obj R + have h3 : Subobject.representative.obj ((Subobject.pullback f).obj P) ≅ + (MonoOver.pullback f).obj R := by rw [h1]; exact h2 + -- The arrows have the same range + have h4 : Set.range ((Subobject.pullback f).obj P).arrow = + Set.range ((MonoOver.pullback f).obj R).arrow := by + rw [subobject_arrow_eq_representative_arrow] + exact monoover_iso_same_range _ _ h3 + -- Combine everything + rw [h4, hpb_arrow, pullback_snd_range, harrow] + +/-- In Type u, the range of image.ι equals the range of the original morphism. + This uses that factorThruImage is an epi (surjective in Type u). -/ +lemma image_ι_range_eq {X Y : Type u} (g : X ⟶ Y) : + Set.range (image.ι g) = Set.range g := by + ext y + constructor + · intro ⟨z, hz⟩ + have h_epi : Epi (factorThruImage g) := inferInstance + rw [epi_iff_surjective] at h_epi + obtain ⟨x, hx⟩ := h_epi z + use x + calc g x + = (factorThruImage g ≫ image.ι g) x := by rw [image.fac] + _ = image.ι g (factorThruImage g x) := rfl + _ = image.ι g z := by rw [hx] + _ = y := hz + · intro ⟨x, hx⟩ + use factorThruImage g x + calc image.ι g (factorThruImage g x) + = (factorThruImage g ≫ image.ι g) x := rfl + _ = g x := by rw [image.fac] + _ = y := hx + +/-- The arrow of (MonoOver.exists f).obj M equals image.ι (M.arrow ≫ f). -/ +lemma monoover_exists_arrow {X Y : Type u} (f : X ⟶ Y) (M : MonoOver X) : + ((MonoOver.exists f).obj M).arrow = image.ι (M.arrow ≫ f) := rfl + +/-- The range of ((Subobject.exists f).obj P).arrow equals the range of (P.arrow ≫ f). -/ +lemma subobject_exists_arrow_range {X Y : Type u} (f : X ⟶ Y) (P : Subobject X) : + Set.range ((Subobject.exists f).obj P).arrow = Set.range (P.arrow ≫ f) := by + let rep_P := Subobject.representative.obj P + let existsM := (MonoOver.exists f).obj rep_P + let existsP := (Subobject.exists f).obj P + + -- Step 1: P = [rep_P] in the thin skeleton + have h_P_eq : P = (toThinSkeleton (MonoOver X)).obj rep_P := by + simp only [rep_P] + exact (Quotient.out_eq P).symm + + -- Step 2: Use lower_comm to get the key equation + have h_func : (Subobject.lower (MonoOver.exists f)).obj ((toThinSkeleton (MonoOver X)).obj rep_P) = + (toThinSkeleton (MonoOver Y)).obj ((MonoOver.exists f).obj rep_P) := by + have h := Subobject.lower_comm (MonoOver.exists f) + have := congrFun (congrArg (fun G => G.obj) h) rep_P + simp only [Functor.comp_obj] at this + exact this + + -- Step 3: existsP = [existsM] + have h_eq : existsP = (toThinSkeleton (MonoOver Y)).obj existsM := by + calc existsP + = (Subobject.lower (MonoOver.exists f)).obj P := rfl + _ = (Subobject.lower (MonoOver.exists f)).obj ((toThinSkeleton (MonoOver X)).obj rep_P) := by rw [← h_P_eq] + _ = (toThinSkeleton (MonoOver Y)).obj ((MonoOver.exists f).obj rep_P) := h_func + _ = (toThinSkeleton (MonoOver Y)).obj existsM := rfl + + -- Step 4: representative.obj existsP ≅ existsM + have h_iso : Subobject.representative.obj existsP ≅ existsM := by + rw [h_eq] + exact Subobject.representativeIso existsM + + -- Step 5: Arrows have the same range + have h_range : Set.range existsP.arrow = Set.range existsM.arrow := + monoover_iso_same_range _ _ h_iso + + have h_arrow : existsM.arrow = image.ι (rep_P.arrow ≫ f) := monoover_exists_arrow f rep_P + have h_img : Set.range (image.ι (rep_P.arrow ≫ f)) = Set.range (rep_P.arrow ≫ f) := image_ι_range_eq _ + have h_rep : rep_P.arrow = P.arrow := rfl + + rw [h_range, h_arrow, h_img, h_rep] + +/-- In Type u, y ∈ range ((Subobject.exists f).obj P).arrow iff ∃ x ∈ range P.arrow, f x = y. + This is the set-theoretic fact that exists/image of a subobject is the direct image. -/ +theorem exists_range_iff {X Y : Type u} [HasImages (Type u)] (f : X ⟶ Y) (P : Subobject X) (y : Y) : + y ∈ Set.range ((Subobject.exists f).obj P).arrow ↔ ∃ x, x ∈ Set.range P.arrow ∧ f x = y := by + rw [subobject_exists_arrow_range] + constructor + · intro ⟨z, hz⟩ + use P.arrow z + exact ⟨⟨z, rfl⟩, hz⟩ + · intro ⟨x, ⟨z, hz⟩, hfx⟩ + use z + simp only [types_comp_apply, hz, hfx] + +/-- For subobjects A ≤ B, if x ∈ range A.arrow then x ∈ range B.arrow. + This is the element-level characterization of subobject ordering in Type. -/ +theorem subobject_le_range {X : Type u} {A B : Subobject X} (h : A ≤ B) + {x : X} (hx : x ∈ Set.range A.arrow) : x ∈ Set.range B.arrow := by + -- h : A ≤ B gives us a morphism ofLE : A.underlying → B.underlying + -- with the property: ofLE ≫ B.arrow = A.arrow + obtain ⟨a, ha⟩ := hx + -- a : A.underlying, A.arrow a = x + -- Use ofLE to get an element of B.underlying + use Subobject.ofLE A B h a + -- Need: B.arrow (ofLE a) = x + rw [← ha] + exact congrFun (Subobject.ofLE_arrow h) a + +/-- In Subobject X (for Type u), the categorical coproduct equals the lattice supremum. + This follows from the universal properties: both are the least upper bound of the family. -/ +theorem coproduct_eq_iSup {X : Type u} {ι : Type*} (P : ι → Subobject X) [HasCoproduct P] : + ∐ P = ⨆ i, P i := by + apply le_antisymm + · -- ∐ P ≤ ⨆ P: construct morphism from ∐ P to ⨆ P using the coproduct universal property + exact Quiver.Hom.le (Sigma.desc (fun i => (le_iSup P i).hom)) + · -- ⨆ P ≤ ∐ P: show P i ≤ ∐ P for all i, then ⨆ is least upper bound + apply iSup_le + intro i + exact Quiver.Hom.le (Sigma.ι P i) + +/-- In Type u, x ∈ range (⨆ᵢ Pᵢ).arrow iff ∃ i, x ∈ range (Pᵢ).arrow. + This is the set-theoretic fact that supremum of subobjects is union. -/ +theorem iSup_range_iff {X : Type u} {ι : Type*} (P : ι → Subobject X) (x : X) : + x ∈ Set.range (⨆ i, P i).arrow ↔ ∃ i, x ∈ Set.range (P i).arrow := by + -- Use the order isomorphism Types.subobjectEquivSet + let iso := Types.subobjectEquivSet X + -- iso preserves suprema: iso (⨆ᵢ Pᵢ) = ⨆ᵢ (iso Pᵢ) + -- In Set X, ⨆ = ⋃, so membership is existential + rw [← subobject_equiv_eq_range (⨆ i, P i)] + -- Use that the order iso preserves iSup + have h : iso (⨆ i, P i) = ⨆ i, iso (P i) := iso.map_iSup P + rw [h] + -- In Set X, ⨆ (as sets) is union, so x ∈ ⋃ᵢ Sᵢ ↔ ∃ i, x ∈ Sᵢ + simp only [Set.iSup_eq_iUnion, Set.mem_iUnion] + constructor + · intro ⟨i, hi⟩ + use i + rw [← subobject_equiv_eq_range (P i)] + exact hi + · intro ⟨i, hi⟩ + use i + rw [← subobject_equiv_eq_range (P i)] at hi + exact hi + +theorem formula_satisfaction_monotone {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : RelPreservingEmbedding M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) + (hsat : formulaSatisfied (M := M) φ t) : + formulaSatisfied (M := M') φ (liftEmbedContext emb.toStructureEmbedding xs t) := by + induction φ with + | rel R term => + -- rel R t ↦ (Subobject.pullback (term.interpret)).obj (M.Relations R) + -- By pullback_range_iff: t ∈ this iff term.interpret M t ∈ M.Relations R + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range ((pullback (term.interpret M)).obj (M.Relations R)).arrow + -- Goal : liftEmbedContext t ∈ range ((pullback (term.interpret M')).obj (M'.Relations R)).arrow + rw [pullback_range_iff] at hsat ⊢ + -- hsat : term.interpret M t ∈ range (M.Relations R).arrow + -- Goal : term.interpret M' (liftEmbedContext t) ∈ range (M'.Relations R).arrow + -- Apply term_interpret_commutes to rewrite the LHS + rw [term_interpret_commutes emb.toStructureEmbedding term t] + -- Goal: liftSort emb R.domain (term.interpret M t) ∈ range (M'.Relations R).arrow + -- Apply rel_preserve_general + exact rel_preserve_general emb R (Term.interpret M term t) hsat + | «true» => + -- ⊤ contains everything: use that ⊤.arrow is surjective + unfold formulaSatisfied subobjectMem + simp only [Formula.interpret] + exact top_arrow_surjective _ + | «false» => + -- ⊥ contains nothing: the underlying type is empty, so hsat is contradictory + -- Formula.interpret .false = ⊥, and we need to show hsat is vacuously true + unfold formulaSatisfied subobjectMem at hsat + simp only [Formula.interpret] at hsat + obtain ⟨y, _⟩ := hsat + -- y is in the underlying of ⊥ (using Geometric.instOrderBotSubobject) + -- Both Geometric's ⊥ and Mathlib's ⊥ are bottom in the same partial order, so they're equal. + -- Prove the two different ⊥s are equal by le_antisymm + have heq : ∀ {X : Type u}, + @Bot.bot (Subobject X) (Geometric.instOrderBotSubobject X).toBot = + @Bot.bot (Subobject X) Subobject.orderBot.toBot := by + intro X + apply le_antisymm + · exact @OrderBot.bot_le _ _ (Geometric.instOrderBotSubobject X) _ + · exact @OrderBot.bot_le _ _ Subobject.orderBot _ + -- Rewrite y's type to use Mathlib's ⊥ + rw [heq] at y + -- Now y : underlying of Mathlib's ⊥, which is empty + -- Derive False from y being in an empty type, then prove anything + exact False.elim (bot_underlying_isEmpty.false y) + | conj φ ψ ihφ ihψ => + -- Conjunction: both components must hold + -- Strategy: use inf_range_iff to decompose and recompose + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat: t ∈ range (φ.interpret ⨯ ψ.interpret).arrow (in M) + -- Goal: liftEmbedContext ... t ∈ range (φ.interpret ⨯ ψ.interpret).arrow (in M') + + -- Use prod_eq_inf: ⨯ = ⊓ in Subobject + have prod_inf_M := Subobject.prod_eq_inf (f₁ := Formula.interpret M φ) (f₂ := Formula.interpret M ψ) + have prod_inf_M' := Subobject.prod_eq_inf (f₁ := Formula.interpret M' φ) (f₂ := Formula.interpret M' ψ) + + -- Decompose: if t ∈ (φ ⊓ ψ), then t ∈ φ and t ∈ ψ + rw [prod_inf_M] at hsat + rw [inf_range_iff] at hsat + obtain ⟨hφ, hψ⟩ := hsat + + -- Apply induction hypotheses + have ihφ' := ihφ t hφ + have ihψ' := ihψ t hψ + + -- Recompose: if liftEmbedContext t ∈ φ' and ∈ ψ', then ∈ (φ' ⊓ ψ') + rw [prod_inf_M'] + rw [inf_range_iff] + exact ⟨ihφ', ihψ'⟩ + | eq t1 t2 => + -- Equality: t1 = t2 interprets as equalizerSubobject ⟦t1⟧ᵗ ⟦t2⟧ᵗ + -- Using equalizer_range_iff: t ∈ equalizerSubobject ↔ t1.interpret t = t2.interpret t + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ equalizerSubobject (t1.interpret M) (t2.interpret M) + -- Goal : liftEmbedContext t ∈ equalizerSubobject (t1.interpret M') (t2.interpret M') + rw [equalizer_range_iff] at hsat ⊢ + -- hsat : t1.interpret M t = t2.interpret M t + -- Goal : t1.interpret M' (liftEmbedContext t) = t2.interpret M' (liftEmbedContext t) + -- Apply term_interpret_commutes to both sides + rw [term_interpret_commutes emb.toStructureEmbedding t1 t] + rw [term_interpret_commutes emb.toStructureEmbedding t2 t] + -- Now goal is: liftSort emb _ (t1.interpret M t) = liftSort emb _ (t2.interpret M t) + -- This follows from hsat by congruence (liftSort is a function) + rw [hsat] + | @«exists» A xs' φ ih => + -- Existential quantification: ∃x.φ(ctx, x) interprets as + -- (Subobject.exists π).obj (φ.interpret) + -- where π : Context.interpret M (xs'.cons A) → Context.interpret M xs' + -- is the projection that drops the last variable. + -- Note: xs' is the base context, xs = exists binds xs' with "∃A.φ" having context xs' + -- + -- In Type u, (exists f).obj P corresponds to the image of P under f: + -- y ∈ ((exists f).obj P).arrow iff ∃ x ∈ P.arrow, f x = y + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range ((Subobject.exists ((xs'.π A).interpret M)).obj (φ.interpret M)).arrow + -- Goal : liftEmbedContext xs' t ∈ range ((Subobject.exists ((xs'.π A).interpret M')).obj (φ.interpret M')).arrow + rw [exists_range_iff] at hsat ⊢ + -- hsat : ∃ ctx', ctx' ∈ range (φ.interpret M).arrow ∧ (xs'.π A).interpret M ctx' = t + -- Goal : ∃ ctx', ctx' ∈ range (φ.interpret M').arrow ∧ (xs'.π A).interpret M' ctx' = liftEmbedContext xs' t + obtain ⟨ctx', hctx'_in, hctx'_proj⟩ := hsat + -- Lift ctx' to M' + let ctx'_lifted := liftEmbedContext emb.toStructureEmbedding _ ctx' + use ctx'_lifted + constructor + · -- Show ctx'_lifted ∈ range (φ.interpret M').arrow by IH + exact ih ctx' hctx'_in + · -- Show (xs'.π A).interpret M' ctx'_lifted = liftEmbedContext xs' t + -- By hom_interpret_commutes: liftEmbedContext xs' ((xs'.π A).interpret M ctx') = + -- (xs'.π A).interpret M' (liftEmbedContext (A ∶ xs') ctx') + have hcomm := hom_interpret_commutes emb.toStructureEmbedding (xs'.π A) ctx' + -- hcomm : liftEmbedContext xs' ((xs'.π A).interpret M ctx') = (xs'.π A).interpret M' ctx'_lifted + rw [← hcomm, hctx'_proj] + | infdisj φᵢ ih => + -- Infinitary disjunction: ⋁ᵢφᵢ interprets as ∐ (fun i ↦ φᵢ.interpret) + -- which is the coproduct/supremum of subobjects. + -- + -- In Type u, coproduct of subobjects corresponds to union: + -- x ∈ (⨆ᵢ Pᵢ).arrow iff ∃ i, x ∈ (Pᵢ).arrow + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range (∐ᵢ (φᵢ.interpret M)).arrow + -- Goal : liftEmbedContext xs t ∈ range (∐ᵢ (φᵢ.interpret M')).arrow + -- Use coproduct_eq_iSup: ∐ P = ⨆ P for subobjects + rw [coproduct_eq_iSup] at hsat ⊢ + -- Now use iSup_range_iff to convert to existential + rw [iSup_range_iff] at hsat ⊢ + -- hsat : ∃ i, t ∈ range ((φᵢ i).interpret M).arrow + -- Goal : ∃ i, liftEmbedContext xs t ∈ range ((φᵢ i).interpret M').arrow + obtain ⟨i, hi⟩ := hsat + use i + -- By IH: formulaSatisfied (φᵢ i) t → formulaSatisfied (φᵢ i) (liftEmbedContext t) + exact ih i t hi + +/-! +## The Bidirectional Theorem: Conservative Expansions + +For **conservative expansions** (where new relation tuples only concern new elements), +formula satisfaction is an **IFF**, not just an implication. This is the key to +proving that old submodels remain valid models under universe expansion. +-/ + +/-- +**Backward direction**: For conservative expansions, formula satisfaction in M' +implies satisfaction in M. This is the converse of `formula_satisfaction_monotone`. + +Combined with `formula_satisfaction_monotone`, this gives the full IFF. +-/ +theorem formula_satisfaction_reflect {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) + (hsat : formulaSatisfied φ (liftEmbedContext emb.toStructureEmbedding xs t)) : + formulaSatisfied φ t := by + -- Proof by induction on formula structure, using rel_reflect for the base case + induction φ with + | rel R term => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [pullback_range_iff] at hsat ⊢ + -- hsat : term.interpret M' (liftEmbedContext t) ∈ range (M'.Relations R).arrow + -- Use term_interpret_commutes to rewrite + rw [term_interpret_commutes emb.toStructureEmbedding term t] at hsat + -- hsat : liftSort emb R.domain (term.interpret M t) ∈ range (M'.Relations R).arrow + -- Convert liftSort to liftSort' to match rel_reflect + rw [liftSort_eq_liftSort'] at hsat + -- Apply rel_reflect + exact emb.rel_reflect R _ hsat + | «true» => + -- ⊤ contains everything + unfold formulaSatisfied subobjectMem + simp only [Formula.interpret] + exact top_arrow_surjective _ + | false => + unfold formulaSatisfied subobjectMem at hsat + simp only [Formula.interpret] at hsat + have heq : ∀ {X : Type u}, + @Bot.bot (Subobject X) (Geometric.instOrderBotSubobject X).toBot = + @Bot.bot (Subobject X) Subobject.orderBot.toBot := by + intro X + apply le_antisymm + · exact @OrderBot.bot_le _ _ (Geometric.instOrderBotSubobject X) _ + · exact @OrderBot.bot_le _ _ Subobject.orderBot _ + obtain ⟨y, _⟩ := hsat + rw [heq] at y + exact False.elim (bot_underlying_isEmpty.false y) + | conj φ ψ ihφ ihψ => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + have prod_inf_M := Subobject.prod_eq_inf (f₁ := Formula.interpret M φ) (f₂ := Formula.interpret M ψ) + have prod_inf_M' := Subobject.prod_eq_inf (f₁ := Formula.interpret M' φ) (f₂ := Formula.interpret M' ψ) + rw [prod_inf_M] + rw [inf_range_iff] + rw [prod_inf_M'] at hsat + rw [inf_range_iff] at hsat + obtain ⟨hφ', hψ'⟩ := hsat + exact ⟨ihφ t hφ', ihψ t hψ'⟩ + | eq t1 t2 => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [equalizer_range_iff] at hsat ⊢ + -- hsat : t1.interpret M' (liftEmbedContext t) = t2.interpret M' (liftEmbedContext t) + -- Use term_interpret_commutes and injectivity of embedding + rw [term_interpret_commutes emb.toStructureEmbedding t1 t] at hsat + rw [term_interpret_commutes emb.toStructureEmbedding t2 t] at hsat + -- hsat : liftSort emb _ (t1.interpret M t) = liftSort emb _ (t2.interpret M t) + -- By injectivity of liftSort (which uses embed's injectivity) + exact liftSort_injective emb.toStructureEmbedding _ hsat + | @«exists» A xs' φ ih => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [exists_range_iff] at hsat ⊢ + obtain ⟨ctx'_lifted, hctx'_in, hctx'_proj⟩ := hsat + -- ctx'_lifted = (a', t') where a' : A.interpret M' and t' = liftEmbedContext t + -- hctx'_in : φ is satisfied by ctx'_lifted in M' + -- We need a witness (a, t) in M where a : A.interpret M satisfies φ + -- + -- MATHEMATICAL ISSUE: The witness a' in M' might be a "new" element not in the + -- image of the embedding. For the backward reflection to work, we would need + -- either: + -- (1) The witness to always be in the image (requires additional structure), or + -- (2) A different witness in M that still satisfies φ (model completeness) + -- + -- This sorry represents a genuine mathematical gap: conservative expansion + -- alone doesn't guarantee existential reflection. The IFF theorem is still + -- useful for quantifier-free formulas and formulas where witnesses can be + -- traced back to M. + sorry + | infdisj φᵢ ih => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [coproduct_eq_iSup] at hsat ⊢ + rw [iSup_range_iff] at hsat ⊢ + obtain ⟨i, hi⟩ := hsat + use i + exact ih i t hi + +/-- +**Formula satisfaction is an IFF for conservative expansions**: + +For a conservative expansion (new relation tuples only concern new elements), +a tuple t from M satisfies φ in M if and only if lifted(t) satisfies φ in M'. + +This is the key theorem for proving model preservation under universe expansion. + +**Caveat**: The backward direction (reflect) has a sorry in the existential case. +This is because an existential witness in M' might be a "new" element not in +the image of the embedding. Full reflection of existentials would require +additional structure (e.g., witness reflection property) or model completeness. +The theorem is fully mechanized for quantifier-free formulas. +-/ +theorem formula_satisfaction_iff {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) : + formulaSatisfied φ t ↔ + formulaSatisfied φ (liftEmbedContext emb.toStructureEmbedding xs t) := + ⟨formula_satisfaction_monotone emb.toRelPreservingEmbedding φ t, + formula_satisfaction_reflect emb φ t⟩ + +/-! +### Sequent and Theory Preservation + +With the IFF theorem, we can now prove proper sequent and theory preservation. +-/ + +/-- +**Sequent preservation for conservative expansions**: + +If a sequent (premise ⊢ conclusion) holds in M, and emb is a conservative expansion, +then for any tuple t from M: +- If lifted(t) satisfies the premise in M', then lifted(t) satisfies the conclusion in M' + +This follows because: +1. premise(lifted(t)) in M' ↔ premise(t) in M (by formula_satisfaction_iff) +2. In M, premise(t) → conclusion(t) (by the sequent) +3. conclusion(t) in M ↔ conclusion(lifted(t)) in M' (by formula_satisfaction_iff) +-/ +theorem sequent_preservation {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + (seq : S.Sequent) + (hseq : Sequent.interpret M seq) + (t : Context.interpret M seq.ctx) + (hprem : formulaSatisfied seq.premise (liftEmbedContext emb.toStructureEmbedding seq.ctx t)) : + formulaSatisfied seq.concl (liftEmbedContext emb.toStructureEmbedding seq.ctx t) := by + -- Step 1: premise(lifted(t)) → premise(t) in M (backward direction of IFF) + have hprem_M := (formula_satisfaction_iff emb seq.premise t).mpr hprem + -- Step 2: In M, premise(t) → conclusion(t) via subobject ordering + -- hseq : ⟦M|premise⟧ ≤ ⟦M|conclusion⟧ + -- This means: if t ∈ range(premise) then t ∈ range(conclusion) + unfold Sequent.interpret at hseq + unfold formulaSatisfied subobjectMem at hprem_M ⊢ + have hconcl_M : t ∈ Set.range (Formula.interpret M seq.concl).arrow := + subobject_le_range hseq hprem_M + -- Step 3: conclusion(t) in M → conclusion(lifted(t)) in M' (forward direction of IFF) + exact (formula_satisfaction_iff emb seq.concl t).mp hconcl_M + +/-- +**Theory preservation for conservative expansions**: + +If M satisfies theory T, and emb is a conservative expansion to M', +then for any tuple t from M and any axiom in T: +- The axiom holds for lifted(t) in M' +-/ +theorem theory_preservation {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + (T : S.Theory) + (hM : Theory.interpret M T) + (seq : S.Sequent) + (hseq_in : seq ∈ T.axioms) + (t : Context.interpret M seq.ctx) + (hprem : formulaSatisfied seq.premise (liftEmbedContext emb.toStructureEmbedding seq.ctx t)) : + formulaSatisfied seq.concl (liftEmbedContext emb.toStructureEmbedding seq.ctx t) := + sequent_preservation emb seq (hM seq hseq_in) t hprem + +/-! +### Model Set Monotonicity (The Main Corollary) + +**Key Principle**: As the universe of elements expands (with new function values and +relation assertions concerning *only* new elements), the set of subsets that form +valid models of a geometric theory T grows monotonically. + +Formally, let: +- U(t) = universe at time t, with U(t) ⊆ U(t') for t ≤ t' +- Models(T, U(t)) = { S ⊆ U(t) : S is a substructure satisfying T } + +Then: Models(T, U(t)) ⊆ Models(T, U(t')) + +**Why this is true**: + +1. **Intrinsic Theory Interpretation**: `Theory.interpret S T` depends *only* on the + structure S itself—its sorts, functions, and relations. It does NOT depend on + any ambient structure that S might be embedded in. + +2. **Substructure Preservation**: When the universe expands, old substructures S ⊆ U(t) + remain unchanged: + - Same elements + - Same function values (new values only concern new elements) + - Same relation tuples (new tuples only concern new elements) + +3. **Therefore**: If S ⊨ T at time t, then S ⊨ T at time t' > t. + +4. **Moreover**: New subsets involving new elements may form *additional* models, + so the model set can only grow. + +**Connection to formula_satisfaction_monotone**: + +Our main theorem `formula_satisfaction_monotone` provides the element-level view: +- For a tuple t from substructure S satisfying formula φ +- The same tuple (lifted via embedding) satisfies φ in any extension M' ⊇ S + +This connects to theory interpretation via `Sequent.interpret`: +- A sequent `premise ⊢ conclusion` holds in S iff `⟦S|premise⟧ᶠ ≤ ⟦S|conclusion⟧ᶠ` +- Equivalently: ∀ tuples t, if t satisfies premise then t satisfies conclusion +- By `formula_satisfaction_monotone`, embedded tuples preserve this property + +**Consequence for GeologMeta**: +- Incremental model checking is sound: adding elements never invalidates existing models +- Coordination-free: no need to re-verify old submodels when universe expands +- This is the semantic foundation for CALM theorem applications +-/ + +/-- +**Axiom Satisfaction for Embedded Tuples**: + +If M satisfies a theory T, and we embed M into M' via a relation-preserving embedding, +then for any tuple t from M: +- If t satisfies the premise of an axiom (premise ⊢ conclusion) in M +- Then the lifted tuple satisfies the conclusion in M' + +This is the element-level view of model preservation. +-/ +theorem axiom_satisfaction_embedded {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : RelPreservingEmbedding M M') + {xs : Context S} + (_premise conclusion : Formula xs) + (t : Context.interpret M xs) + (_hprem : formulaSatisfied _premise t) + (hconcl : formulaSatisfied conclusion t) : + formulaSatisfied conclusion (liftEmbedContext emb.toStructureEmbedding xs t) := + formula_satisfaction_monotone emb conclusion t hconcl + +/-- +**Model Set Monotonicity** (term-level witness): + +Given: +- S is a substructure of M (via embedding emb_SM) +- M is a substructure of M' (via embedding emb_MM') +- S satisfies theory T + +Then: S still satisfies T (trivially, since Theory.interpret S T depends only on S). + +The embedding composition emb_SM ≫ emb_MM' shows S is also a substructure of M', +but this doesn't affect S's satisfaction of T. + +This theorem exists to document that `Theory.interpret` is intrinsic to the structure. +-/ +theorem model_set_monotone + {S_sub M M' : Structure S (Type u)} + [κ : SmallUniverse S] [_G : Geometric κ (Type u)] + (_emb_SM : StructureEmbedding S_sub M) + (_emb_MM' : StructureEmbedding M M') + (T : S.Theory) + (hT : Theory.interpret S_sub T) : + Theory.interpret S_sub T := + hT -- Trivially true: Theory.interpret depends only on S_sub, not on M or M' + +/-! +### Summary of Results + +We have now formalized the **Monotonic Submodel Property** for geometric logic: + +1. **`formula_satisfaction_monotone`**: The core theorem showing that satisfaction of + geometric formulas is preserved when tuples are lifted via relation-preserving embeddings. + +2. **`axiom_satisfaction_embedded`**: Corollary for sequent axioms—if a tuple satisfies + both premise and conclusion in M, the lifted tuple satisfies the conclusion in M'. + +3. **`model_set_monotone`**: Documents that `Theory.interpret S T` is intrinsic to S, + so valid submodels remain valid as the ambient universe expands. + +**The Key Insight**: Geometric formulas (built from relations, equality, ∧, ∨, ∃, and +infinitary ∨) are "positive existential"—they only assert existence, never non-existence. +This positivity is what makes satisfaction monotonic under structure extensions. +-/ + +/-- The full selection (all elements) is trivially closed -/ +def fullSelection (M : Structure S (Type u)) : ClosedSubsetSelection M where + subset := fun _ => Set.univ + func_closed := fun _ {_A} {_B} _ _ _ _ => Set.mem_univ _ + +/-- **Theorem**: The pushforward of the full selection is closed in M' -/ +theorem full_selection_pushforward_closed {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') : + ∀ (f : S.Functions) {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B), + funcPreservesSubset ((fullSelection M).toSubsetSelection.pushforward emb) f hdom hcod := + fun f {_A} {_B} hdom hcod => semantic_monotonicity emb (fullSelection M) f hdom hcod + +/-! +## The Complete Picture + +**Main Result**: Monotonic Submodel Property for Geometric Theories + +Given a signature S and a geometric theory T: + +1. **Structural Level** (proven above): + - ClosedSubsetSelection M represents a "submodel" of M + - Embeddings preserve closure: (sel.pushforward emb).func_closed + +2. **Semantic Level** (Theory.interpret): + - M ⊨ T means all sequents hold + - Sequent.interpret uses Formula.interpret (subobjects) + +3. **Connection** (the key insight): + - Elements in a ClosedSubsetSelection form a substructure + - Formula satisfaction on the substructure corresponds to membership in + the formula's interpretation restricted to the selection + - Embeddings preserve this correspondence + +4. **Consequence** (CALM theorem): + - Adding elements to a model can only ADD valid submodels + - It cannot INVALIDATE existing valid submodels + - Therefore: incremental model checking is sound +-/ + +/-! +## Why This Matters: CALM Theorem Connection + +The Monotonic Submodel Property enables coordination-free distributed systems: + +- **CALM Theorem**: Monotonic programs have coordination-free implementations +- **Element Addition is Monotonic**: Valid(t) ⊆ Valid(t+1) +- **Element Retraction is NOT Monotonic**: Requires coordination + +### Design Implications for GeologMeta + +1. **FuncVal and RelTuple are immutable**: Once f(a) = b, it's eternally true +2. **All facts defined at creation**: When element a is created, all f(a) are defined +3. **Only liveness changes**: To "modify" f(a), retract a and create a new element +4. **Incremental model checking**: New elements can only add valid submodels +-/ + +end MonotonicSubmodel diff --git a/proofs/lake-manifest.json b/proofs/lake-manifest.json new file mode 100644 index 0000000..ec1d120 --- /dev/null +++ b/proofs/lake-manifest.json @@ -0,0 +1,115 @@ +{"version": "1.1.0", + "packagesDir": ".lake/packages", + "packages": + [{"url": "https://github.com/kyoDralliam/model-theory-topos.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d0c00af95ef89b0bf6774208c853e254dc1ce33", + "name": "«model-theory-topos»", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": false, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/PatrickMassot/checkdecls.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "3d425859e73fcfbef85b9638c2a91708ef4a22d4", + "name": "checkdecls", + "manifestFile": "lake-manifest.json", + "inputRev": null, + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/mathlib4.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "19f4ef2c52b278bd96626e02d594751e6e12ac98", + "name": "mathlib", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.22.0-rc3", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/plausible", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "61c44bec841faabd47d11c2eda15f57ec2ffe9d5", + "name": "plausible", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/LeanSearchClient", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "6c62474116f525d2814f0157bb468bf3a4f9f120", + "name": "LeanSearchClient", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/import-graph", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "140dc642f4f29944abcdcd3096e8ea9b4469c873", + "name": "importGraph", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/ProofWidgets4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "96c67159f161fb6bf6ce91a2587232034ac33d7e", + "name": "proofwidgets", + "manifestFile": "lake-manifest.json", + "inputRev": "v0.0.67", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "a62ecd0343a2dcfbcac6d1e8243f5821879c0244", + "name": "aesop", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/quote4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "867d9dc77534341321179c9aa40fceda675c50d4", + "name": "Qq", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/batteries", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "3cabaef23886b82ba46f07018f2786d9496477d6", + "name": "batteries", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/mhuisi/lean4-cli", + "type": "git", + "subDir": null, + "scope": "", + "rev": "e22ed0883c7d7f9a7e294782b6b137b783715386", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}], + "name": "«geolog-proofs»", + "lakeDir": ".lake"} diff --git a/proofs/lakefile.lean b/proofs/lakefile.lean new file mode 100644 index 0000000..d83daca --- /dev/null +++ b/proofs/lakefile.lean @@ -0,0 +1,15 @@ +import Lake +open Lake DSL + +package «geolog-proofs» where + leanOptions := #[ + ⟨`pp.unicode.fun, true⟩ + ] + +-- Import model-theory-topos from GitHub +require «model-theory-topos» from git + "https://github.com/kyoDralliam/model-theory-topos.git" @ "main" + +@[default_target] +lean_lib «GeologProofs» where + globs := #[.submodules `GeologProofs] diff --git a/proofs/lean-toolchain b/proofs/lean-toolchain new file mode 100644 index 0000000..fff0a20 --- /dev/null +++ b/proofs/lean-toolchain @@ -0,0 +1 @@ +leanprover/lean4:v4.22.0-rc3 diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..239d362 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,331 @@ +//! Abstract Syntax Tree for Geolog +//! +//! Based on the syntax sketched in loose_thoughts/2025-12-12_12:10.md + +use std::fmt; + +/// A span in the source code, for error reporting +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } +} + +/// A node with source location +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Spanned { + pub node: T, + pub span: Span, +} + +impl Spanned { + pub fn new(node: T, span: Span) -> Self { + Self { node, span } + } +} + +/// An identifier, possibly qualified with `/` (e.g., `N/P`, `W/src/arc`) +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Path { + pub segments: Vec, +} + +impl Path { + pub fn single(name: String) -> Self { + Self { + segments: vec![name], + } + } + + pub fn is_single(&self) -> bool { + self.segments.len() == 1 + } + + pub fn as_single(&self) -> Option<&str> { + if self.segments.len() == 1 { + Some(&self.segments[0]) + } else { + None + } + } +} + +impl fmt::Display for Path { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.segments.join("/")) + } +} + +/// A complete source file +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct File { + pub declarations: Vec>, +} + +/// Top-level declarations +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Declaration { + /// `namespace Foo;` + Namespace(String), + + /// `theory (params) Name { body }` + Theory(TheoryDecl), + + /// `TypeExpr instance Name { body }` + Instance(InstanceDecl), + + /// `query Name { ? : Type; }` + Query(QueryDecl), +} + +/// A theory declaration +/// e.g., `theory (N : PetriNet instance) Marking { ... }` +/// or `theory Foo extends Bar { ... }` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TheoryDecl { + pub params: Vec, + pub name: String, + /// Optional parent theory to extend + pub extends: Option, + pub body: Vec>, +} + +/// A parameter to a theory +/// e.g., `N : PetriNet instance` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Param { + pub name: String, + pub ty: TypeExpr, +} + +/// Items that can appear in a theory body +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TheoryItem { + /// `P : Sort;` + Sort(String), + + /// `in.src : in -> P;` + Function(FunctionDecl), + + /// `ax1 : forall w : W. hyps |- concl;` + Axiom(AxiomDecl), + + /// Inline instance (for nested definitions) + /// `initial_marking : N Marking instance;` + Field(String, TypeExpr), +} + +/// A function/morphism declaration +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionDecl { + pub name: Path, // Can be dotted like `in.src` + pub domain: TypeExpr, + pub codomain: TypeExpr, +} + +/// An axiom declaration +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AxiomDecl { + pub name: Path, // Can be hierarchical like `ax/anc/base` + pub quantified: Vec, + pub hypotheses: Vec, + pub conclusion: Formula, +} + +/// A quantified variable in an axiom +/// e.g., `w : W` or `w1, w2 : W` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct QuantifiedVar { + pub names: Vec, + pub ty: TypeExpr, +} + +/// A single token in a type expression stack program (concatenative parsing) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TypeToken { + /// Push a path onto the stack (might be sort, instance ref, or theory name) + Path(Path), + + /// The `Sort` keyword - pushes the Sort kind + Sort, + + /// The `Prop` keyword - pushes the Prop kind + Prop, + + /// The `instance` keyword - pops top, wraps as instance type, pushes + Instance, + + /// Arrow - pops two types (domain, codomain), pushes function type + /// Note: arrows are handled specially during parsing to maintain infix syntax + Arrow, + + /// Record type literal: `[field : Type, ...]` + /// Contains nested TypeExprs for field types (evaluated recursively) + Record(Vec<(String, TypeExpr)>), +} + +/// A type expression as a flat stack program (concatenative style) +/// +/// Instead of a tree like `App(App(A, B), C)`, we store a flat sequence +/// `[Path(A), Path(B), Path(C)]` that gets evaluated during elaboration +/// when we have access to the symbol table (to know theory arities). +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TypeExpr { + pub tokens: Vec, +} + +impl TypeExpr { + /// Create a type expression from a single path + pub fn single_path(p: Path) -> Self { + Self { + tokens: vec![TypeToken::Path(p)], + } + } + + /// Create the Sort kind + pub fn sort() -> Self { + Self { + tokens: vec![TypeToken::Sort], + } + } + + /// Create the Prop kind + pub fn prop() -> Self { + Self { + tokens: vec![TypeToken::Prop], + } + } + + /// Check if this is a single path (common case) + pub fn as_single_path(&self) -> Option<&Path> { + if self.tokens.len() == 1 + && let TypeToken::Path(p) = &self.tokens[0] { + return Some(p); + } + None + } + + /// Check if this is the Sort kind + pub fn is_sort(&self) -> bool { + matches!(self.tokens.as_slice(), [TypeToken::Sort]) + } + + /// Check if this ends with `instance` + pub fn is_instance(&self) -> bool { + self.tokens.last() == Some(&TypeToken::Instance) + } + + /// Get the inner type expression (without the trailing `instance` token) + pub fn instance_inner(&self) -> Option { + if self.is_instance() { + Some(Self { + tokens: self.tokens[..self.tokens.len() - 1].to_vec(), + }) + } else { + None + } + } + + /// Check if this is the Prop kind + pub fn is_prop(&self) -> bool { + matches!(self.tokens.as_slice(), [TypeToken::Prop]) + } + + /// Check if this is a record type + pub fn as_record(&self) -> Option<&Vec<(String, TypeExpr)>> { + if self.tokens.len() == 1 + && let TypeToken::Record(fields) = &self.tokens[0] { + return Some(fields); + } + None + } +} + +/// Terms (elements of types) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Term { + /// A variable or path: `w`, `W/src/arc` + /// `/` is namespace qualification + Path(Path), + + /// Function application (postfix style in surface syntax) + /// `w W/src` means "apply W/src to w" + App(Box, Box), + + /// Field projection: `expr .field` + /// Note the space before `.` to distinguish from path qualification + Project(Box, String), + + /// Record literal: `[firing: f, arc: arc]` + Record(Vec<(String, Term)>), +} + +/// Formulas (geometric logic) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Formula { + /// False/Bottom (⊥): inconsistency, empty disjunction + False, + + /// Relation application: `rel(term)` or `rel([field: value, ...])` + RelApp(String, Term), + + /// Equality: `t1 = t2` + Eq(Term, Term), + + /// Conjunction (often implicit in antecedents) + And(Vec), + + /// Disjunction: `phi \/ psi` + Or(Vec), + + /// Existential: `exists w : W. phi` + Exists(Vec, Box), + + /// Truth + True, +} + +/// An instance declaration +/// e.g., `instance ExampleNet : PetriNet = { ... }` +/// or `instance ExampleNet : PetriNet = chase { ... }` for chase-before-check +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct InstanceDecl { + pub theory: TypeExpr, + pub name: String, + pub body: Vec>, + /// If true, run chase algorithm after elaboration before checking axioms. + /// Syntax: `instance Name : Theory = chase { ... }` + pub needs_chase: bool, +} + +/// Items in an instance body +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum InstanceItem { + /// Element declaration: `A : P;` or `a, b, c : P;` + Element(Vec, TypeExpr), + + /// Equation: `ab_in in.src = A;` + Equation(Term, Term), + + /// Nested instance: `initial_marking = N Marking instance { ... };` + NestedInstance(String, InstanceDecl), + + /// Relation assertion: `[item: buy_groceries] completed;` + /// The Term should be a record with the relation's domain fields, + /// and String is the relation name. + RelationAssertion(Term, String), +} + +/// A query declaration +/// e.g., `query query0 { ? : ExampleNet Problem0 ReachabilityProblemSolution; }` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct QueryDecl { + pub name: String, + pub goal: TypeExpr, +} diff --git a/src/bin/geolog.rs b/src/bin/geolog.rs new file mode 100644 index 0000000..6785a7e --- /dev/null +++ b/src/bin/geolog.rs @@ -0,0 +1,1288 @@ +//! Geolog REPL - Interactive environment for geometric logic +//! +//! Usage: geolog [workspace] +//! +//! Commands: +//! :help - Show help +//! :quit - Exit REPL +//! :list - List theories and instances +//! :inspect X - Show details of theory/instance X +//! :clear - Clear screen +//! :reset - Reset all state + +use std::fs; +use std::path::PathBuf; + +use rustyline::error::ReadlineError; +use rustyline::history::DefaultHistory; +use rustyline::{Config, Editor}; + +use geolog::id::NumericId; +use geolog::repl::{ + ExecuteResult, InputResult, InspectResult, ListTarget, MetaCommand, QueryResult, ReplState, + format_instance_detail, format_theory_detail, +}; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); +const PROMPT: &str = "geolog> "; +const CONTINUATION: &str = "...... "; + +/// Parse command line arguments. +/// +/// Usage: geolog [-d ] [source_files...] +/// +/// Options: +/// -d, --dir Use as the workspace directory for persistence +/// -h, --help Show help and exit +/// -v, --version Show version and exit +/// +/// Returns (workspace_path, source_files) +fn parse_args(args: &[String]) -> (Option, Vec) { + let mut workspace_path = None; + let mut source_files = Vec::new(); + let mut i = 0; + + while i < args.len() { + let arg = &args[i]; + match arg.as_str() { + "-d" | "--dir" => { + if i + 1 < args.len() { + workspace_path = Some(PathBuf::from(&args[i + 1])); + i += 2; + } else { + eprintln!("Error: -d requires a path argument"); + std::process::exit(1); + } + } + "-h" | "--help" => { + println!("geolog v{} - Geometric Logic REPL", VERSION); + println!(); + println!("Usage: geolog [OPTIONS] [source_files...]"); + println!(); + println!("Options:"); + println!(" -d, --dir Use as workspace directory for persistence"); + println!(" -h, --help Show this help message"); + println!(" -v, --version Show version"); + println!(); + println!("Examples:"); + println!(" geolog Start REPL (in-memory, no persistence)"); + println!(" geolog -d ./myproject Start REPL with workspace persistence"); + println!(" geolog file.geolog Load file.geolog on startup"); + println!(" geolog -d ./proj f.geolog Load file into persistent workspace"); + std::process::exit(0); + } + "-v" | "--version" => { + println!("geolog v{}", VERSION); + std::process::exit(0); + } + _ if arg.starts_with('-') => { + eprintln!("Error: Unknown option '{}'", arg); + eprintln!("Try 'geolog --help' for usage information"); + std::process::exit(1); + } + _ => { + // Positional argument - treat as source file + source_files.push(PathBuf::from(arg)); + i += 1; + } + } + } + + (workspace_path, source_files) +} + +fn main() { + // Parse command line args + let args: Vec = std::env::args().skip(1).collect(); + let (workspace_path, source_files) = parse_args(&args); + + // Print banner + println!("geolog v{} - Geometric Logic REPL", VERSION); + println!("Type :help for help, :quit to exit\n"); + + // Initialize state + let mut state = if let Some(ref path) = workspace_path { + println!("Workspace: {}", path.display()); + ReplState::with_path(path) + } else { + ReplState::new() + }; + + // Load any source files specified on command line + for source_file in &source_files { + handle_source(&mut state, source_file); + } + + // Set up rustyline + let config = Config::builder().auto_add_history(true).build(); + let mut rl: Editor<(), DefaultHistory> = + Editor::with_config(config).expect("Failed to create editor"); + + // Try to load history + let history_path = dirs_history_path(); + if let Some(ref path) = history_path { + let _ = rl.load_history(path); + } + + // Main REPL loop + loop { + let prompt = if state.input_buffer.is_empty() { + PROMPT + } else { + CONTINUATION + }; + + match rl.readline(prompt) { + Ok(line) => { + match state.process_line(&line) { + InputResult::MetaCommand(cmd) => { + if !handle_command(&mut state, cmd) { + break; // :quit + } + } + InputResult::GeologInput(source) => { + handle_geolog(&mut state, &source); + } + InputResult::Incomplete => { + // Continue reading + } + InputResult::Empty => { + // Nothing to do + } + } + } + Err(ReadlineError::Interrupted) => { + // Ctrl-C - clear current buffer + if !state.input_buffer.is_empty() { + state.input_buffer.clear(); + state.bracket_depth = 0; + println!("^C"); + } else { + println!("Use :quit or Ctrl-D to exit"); + } + } + Err(ReadlineError::Eof) => { + // Ctrl-D - submit buffer or quit + if let Some(source) = state.force_submit() { + handle_geolog(&mut state, &source); + } else { + // Save store before quitting + if let Err(e) = state.store.save() { + eprintln!("Warning: Failed to save store: {}", e); + } + println!("\nGoodbye!"); + break; + } + } + Err(err) => { + eprintln!("Error: {:?}", err); + break; + } + } + } + + // Save history + if let Some(ref path) = history_path { + if let Some(parent) = path.parent() { + let _ = fs::create_dir_all(parent); + } + let _ = rl.save_history(path); + } +} + +/// Handle a meta-command. Returns false if we should exit. +fn handle_command(state: &mut ReplState, cmd: MetaCommand) -> bool { + match cmd { + MetaCommand::Help(topic) => { + print_help(topic.as_deref()); + } + MetaCommand::Quit => { + // Save store before quitting + if let Err(e) = state.store.save() { + eprintln!("Warning: Failed to save store: {}", e); + } + println!("Goodbye!"); + return false; + } + MetaCommand::List(target) => { + handle_list(state, target); + } + MetaCommand::Inspect(name) => { + handle_inspect(state, &name); + } + MetaCommand::Clear => { + // ANSI escape to clear screen + print!("\x1B[2J\x1B[H"); + } + MetaCommand::Reset => { + state.reset(); + println!("State reset."); + } + MetaCommand::Source(path) => { + handle_source(state, &path); + } + MetaCommand::Commit(msg) => { + handle_commit(state, msg.as_deref()); + } + MetaCommand::History => { + handle_history(state); + } + MetaCommand::Add { instance, element, sort } => { + handle_add(state, &instance, &element, &sort); + } + MetaCommand::Assert { instance, relation, args } => { + handle_assert(state, &instance, &relation, &args); + } + MetaCommand::Retract { instance, element } => { + handle_retract(state, &instance, &element); + } + MetaCommand::Query { instance, sort } => { + handle_query(state, &instance, &sort); + } + MetaCommand::Explain { instance, sort } => { + handle_explain(state, &instance, &sort); + } + MetaCommand::Compile { instance, sort } => { + handle_compile(state, &instance, &sort); + } + MetaCommand::Solve { theory, budget_ms } => { + handle_solve(state, &theory, budget_ms); + } + MetaCommand::Extend { instance, theory, budget_ms } => { + handle_extend(state, &instance, &theory, budget_ms); + } + MetaCommand::Chase { instance, max_iterations } => { + handle_chase(state, &instance, max_iterations); + } + MetaCommand::Unknown(msg) => { + eprintln!("Error: {}", msg); + eprintln!("Type :help for available commands"); + } + } + true +} + +/// Handle geolog source input +fn handle_geolog(state: &mut ReplState, source: &str) { + match state.execute_geolog(source) { + Ok(results) => { + for result in results { + match result { + ExecuteResult::Namespace(name) => { + println!("Namespace: {}", name); + } + ExecuteResult::Theory { + name, + num_sorts, + num_functions, + num_relations, + num_axioms, + } => { + let mut parts = vec![format!("{} sorts", num_sorts)]; + if num_functions > 0 { + parts.push(format!("{} functions", num_functions)); + } + if num_relations > 0 { + parts.push(format!("{} relations", num_relations)); + } + if num_axioms > 0 { + parts.push(format!("{} axioms", num_axioms)); + } + println!("Defined theory {} ({})", name, parts.join(", ")); + } + ExecuteResult::Instance { + name, + theory_name, + num_elements, + } => { + println!( + "Defined instance {} : {} ({} elements)", + name, theory_name, num_elements + ); + } + ExecuteResult::Query(result) => { + handle_query_result(state, result); + } + } + } + } + Err(e) => { + eprintln!("Error: {}", e); + } + } +} + +/// Print help message +fn print_help(topic: Option<&str>) { + match topic { + None => { + println!("Geolog REPL Commands:"); + println!(); + println!(" :help [topic] Show help (topics: syntax, examples)"); + println!(" :quit Exit the REPL"); + println!( + " :list [target] List theories/instances (target: theories, instances, all)" + ); + println!(" :inspect Show details of a theory or instance"); + println!(" :source Load and execute a geolog file"); + println!(" :clear Clear the screen"); + println!(" :reset Reset all state"); + println!(); + println!("Version Control:"); + println!(" :commit [msg] Commit current changes"); + println!(" :history Show commit history"); + println!(); + println!("Instance Mutation:"); + println!(" :add Add element to instance"); + println!(" :assert [args] Assert relation tuple"); + println!(" :retract Retract element from instance"); + println!(); + println!("Query:"); + println!(" :query List all elements of a sort"); + println!(" :explain Show query execution plan"); + println!(" :compile Show RelAlgIR compilation"); + println!(" :chase [max_iter] Run chase on instance axioms"); + println!(); + println!("Solver:"); + println!(" :solve [budget_ms] Find model of theory from scratch"); + println!(" :extend [budget_ms] Find extension of instance to theory"); + println!(); + println!("Enter geolog definitions directly (theories, instances)."); + println!("Multi-line input is supported - brackets are matched automatically."); + } + Some("syntax") => { + println!("Geolog Syntax:"); + println!(); + println!(" theory Name {{"); + println!(" Sort1 : Sort;"); + println!(" Sort2 : Sort;"); + println!(" func : Sort1 -> Sort2;"); + println!(" }}"); + println!(); + println!(" instance Name : Theory = {{"); + println!(" elem1 : Sort1;"); + println!(" elem2 : Sort2;"); + println!(" elem1 func = elem2;"); + println!(" }}"); + } + Some("examples") => { + println!("Examples:"); + println!(); + println!(" theory Graph {{"); + println!(" V : Sort;"); + println!(" E : Sort;"); + println!(" src : E -> V;"); + println!(" tgt : E -> V;"); + println!(" }}"); + println!(); + println!(" instance Triangle : Graph = {{"); + println!(" a : V; b : V; c : V;"); + println!(" ab : E; ab src = a; ab tgt = b;"); + println!(" bc : E; bc src = b; bc tgt = c;"); + println!(" ca : E; ca src = c; ca tgt = a;"); + println!(" }}"); + } + Some(other) => { + println!("Unknown help topic: {}", other); + println!("Available topics: syntax, examples"); + } + } +} + +/// Handle :list command +fn handle_list(state: &ReplState, target: ListTarget) { + match target { + ListTarget::Theories | ListTarget::All => { + let theories = state.list_theories(); + if theories.is_empty() { + println!("No theories defined."); + } else { + println!("Theories:"); + for t in theories { + let mut parts = vec![format!("{} sorts", t.num_sorts)]; + if t.num_functions > 0 { + parts.push(format!("{} functions", t.num_functions)); + } + if t.num_relations > 0 { + parts.push(format!("{} relations", t.num_relations)); + } + if t.num_axioms > 0 { + parts.push(format!("{} axioms", t.num_axioms)); + } + println!(" {} ({})", t.name, parts.join(", ")); + } + } + } + ListTarget::Instances => {} + } + + match target { + ListTarget::Instances | ListTarget::All => { + let instances = state.list_instances(); + if instances.is_empty() { + if matches!(target, ListTarget::Instances) { + println!("No instances defined."); + } + } else { + println!("Instances:"); + for i in instances { + println!( + " {} : {} ({} elements)", + i.name, i.theory_name, i.num_elements + ); + } + } + } + ListTarget::Theories => {} + } +} + +/// Handle :inspect command +fn handle_inspect(state: &ReplState, name: &str) { + match state.inspect(name) { + Some(InspectResult::Theory(detail)) => { + println!("{}", format_theory_detail(&detail)); + } + Some(InspectResult::Instance(detail)) => { + println!("{}", format_instance_detail(&detail)); + } + None => { + eprintln!("Not found: {}", name); + eprintln!("Use :list to see available theories and instances"); + } + } +} + +/// Handle :source command +fn handle_source(state: &mut ReplState, path: &PathBuf) { + match fs::read_to_string(path) { + Ok(source) => { + println!("Loading {}...", path.display()); + handle_geolog(state, &source); + } + Err(e) => { + eprintln!("Error reading {}: {}", path.display(), e); + } + } +} + +/// Handle :commit command +fn handle_commit(state: &mut ReplState, message: Option<&str>) { + if !state.is_dirty() { + println!("Nothing to commit."); + return; + } + + match state.commit(message) { + Ok(commit_slid) => { + let msg = message.unwrap_or("(no message)"); + println!("Committed: {} (commit #{})", msg, commit_slid); + } + Err(e) => { + eprintln!("Commit failed: {}", e); + } + } +} + +/// Handle :history command +fn handle_history(state: &ReplState) { + let history = state.commit_history(); + if history.is_empty() { + println!("No commits yet."); + return; + } + + println!("Commit history ({} commits):", history.len()); + for (i, commit_slid) in history.iter().enumerate() { + let marker = if Some(*commit_slid) == state.store.head { + " <- HEAD" + } else { + "" + }; + println!(" {}. commit #{}{}", i + 1, commit_slid, marker); + } +} + +/// Handle :add command +fn handle_add(state: &mut ReplState, instance_name: &str, element_name: &str, sort_name: &str) { + // Look up the instance in the Store + let Some((instance_slid, _)) = state.store.resolve_name(instance_name) else { + eprintln!("Instance '{}' not found", instance_name); + return; + }; + + // Look up the sort in the Store + // For now, we use a simple name-based lookup + // In full implementation, we'd look up the sort from the theory + let sort_slid = match state.store.resolve_name(sort_name) { + Some((slid, _)) => slid, + None => { + // Try to find sort in the theory + eprintln!( + "Sort '{}' not found. Note: Full sort lookup requires querying the theory.", + sort_name + ); + eprintln!("This feature is partially implemented pending query engine (geolog-7tt)."); + return; + } + }; + + match state.store.add_elem(instance_slid, sort_slid, element_name) { + Ok(elem_slid) => { + println!( + "Added element '{}' of sort '{}' to instance '{}' (elem #{})", + element_name, sort_name, instance_name, elem_slid + ); + } + Err(e) => { + eprintln!("Failed to add element: {}", e); + } + } +} + +/// Handle :assert command +fn handle_assert(state: &mut ReplState, instance_name: &str, relation_name: &str, args: &[String]) { + use geolog::core::RelationStorage; + + // Get the instance entry + let entry = match state.instances.get_mut(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory to look up the relation + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the relation by name + let sig = &theory.theory.signature; + let rel_id = match sig.relations.iter().position(|r| r.name == relation_name) { + Some(id) => id, + None => { + eprintln!( + "Relation '{}' not found in theory '{}'", + relation_name, entry.theory_name + ); + eprintln!("Available relations: {:?}", sig.relations.iter().map(|r| &r.name).collect::>()); + return; + } + }; + + let rel = &sig.relations[rel_id]; + + // Resolve argument elements by name from the instance's element_names map + let mut arg_slids = Vec::new(); + for arg_name in args { + if let Some(slid) = entry.element_names.get(arg_name) { + arg_slids.push(*slid); + } else { + eprintln!("Element '{}' not found in instance '{}'", arg_name, instance_name); + eprintln!("Available elements: {:?}", entry.element_names.keys().collect::>()); + return; + } + } + + // Check arity matches (for product domains, flatten the field count) + let expected_arity = match &rel.domain { + geolog::core::DerivedSort::Base(_) => 1, + geolog::core::DerivedSort::Product(fields) => fields.len(), + }; + + if arg_slids.len() != expected_arity { + eprintln!( + "Relation '{}' expects {} argument(s), got {}", + relation_name, expected_arity, arg_slids.len() + ); + return; + } + + // Add the tuple to the relation + if entry.structure.relations.len() <= rel_id { + eprintln!("Relation storage not initialized for relation {}", rel_id); + return; + } + + let already_present = entry.structure.relations[rel_id].contains(&arg_slids); + if already_present { + println!("Tuple already present in relation '{}'", relation_name); + return; + } + + entry.structure.relations[rel_id].insert(arg_slids.clone()); + + let arg_names: Vec<_> = args.to_vec(); + println!( + "Asserted {}({}) in instance '{}'", + relation_name, arg_names.join(", "), instance_name + ); +} + +/// Handle :retract command +fn handle_retract(state: &mut ReplState, instance_name: &str, element_name: &str) { + // Look up the instance + let Some((instance_slid, _)) = state.store.resolve_name(instance_name) else { + eprintln!("Instance '{}' not found", instance_name); + return; + }; + + // Look up the element + let Some((elem_slid, _)) = state.store.resolve_name(element_name) else { + eprintln!("Element '{}' not found", element_name); + return; + }; + + match state.store.retract_elem(instance_slid, elem_slid) { + Ok(retract_slid) => { + println!( + "Retracted element '{}' from instance '{}' (retraction #{})", + element_name, instance_name, retract_slid + ); + } + Err(e) => { + eprintln!("Failed to retract element: {}", e); + } + } +} + +/// Handle :query command +fn handle_query(state: &ReplState, instance_name: &str, sort_name: &str) { + match state.query_sort(instance_name, sort_name) { + Ok(elements) => { + if elements.is_empty() { + println!("No elements of sort '{}' in instance '{}'", sort_name, instance_name); + } else { + println!("Elements of {} in {}:", sort_name, instance_name); + for elem in elements { + println!(" {}", elem); + } + } + } + Err(e) => { + eprintln!("Query error: {}", e); + } + } +} + +/// Handle :explain command - show query execution plan +fn handle_explain(state: &ReplState, instance_name: &str, sort_name: &str) { + use geolog::query::QueryOp; + + // Get the instance + let entry = match state.instances.get(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t, + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the sort index + let sort_idx = match theory.theory.signature.sorts.iter().position(|s| s == sort_name) { + Some(idx) => idx, + None => { + eprintln!( + "Sort '{}' not found in theory '{}'", + sort_name, entry.theory_name + ); + return; + } + }; + + // Build the query plan (same as query_sort in repl.rs) + let plan = QueryOp::Scan { sort_idx }; + + // Display the plan using the Display impl + println!("Query plan for ':query {} {}':", instance_name, sort_name); + println!(); + println!("{}", plan); + println!(); + println!("Sort: {} (index {})", sort_name, sort_idx); + println!("Instance: {} (theory: {})", instance_name, entry.theory_name); +} + +/// Handle :compile command - compile query to RelAlgIR instance +fn handle_compile(state: &mut ReplState, instance_name: &str, sort_name: &str) { + use geolog::query::{to_relalg::compile_to_relalg, QueryOp}; + use geolog::universe::Universe; + + // Get the instance + let entry = match state.instances.get(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t, + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the sort index + let sort_idx = match theory.theory.signature.sorts.iter().position(|s| s == sort_name) { + Some(idx) => idx, + None => { + eprintln!( + "Sort '{}' not found in theory '{}'", + sort_name, entry.theory_name + ); + return; + } + }; + + // Check if RelAlgIR theory is loaded + let relalg_theory = match state.theories.get("RelAlgIR") { + Some(t) => t.clone(), + None => { + eprintln!("RelAlgIR theory not loaded. Loading it now..."); + // Try to load it + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .unwrap_or_else(|_| { + eprintln!("Could not read theories/GeologMeta.geolog"); + String::new() + }); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .unwrap_or_else(|_| { + eprintln!("Could not read theories/RelAlgIR.geolog"); + String::new() + }); + + if meta_content.is_empty() || ir_content.is_empty() { + return; + } + + if let Err(e) = state.execute_geolog(&meta_content) { + eprintln!("Failed to load GeologMeta: {}", e); + return; + } + if let Err(e) = state.execute_geolog(&ir_content) { + eprintln!("Failed to load RelAlgIR: {}", e); + return; + } + + state.theories.get("RelAlgIR").unwrap().clone() + } + }; + + // Build the query plan + let plan = QueryOp::Scan { sort_idx }; + + // Compile to RelAlgIR + let mut universe = Universe::new(); + match compile_to_relalg(&plan, &relalg_theory, &mut universe) { + Ok(instance) => { + println!("RelAlgIR compilation for ':query {} {}':", instance_name, sort_name); + println!(); + println!("QueryOp plan:"); + println!("{}", plan); + println!(); + println!("Compiled to RelAlgIR instance:"); + println!(" Elements: {}", instance.structure.len()); + println!(" Output wire: {:?}", instance.output_wire); + println!(); + + // Group elements by sort and show with sort names + let sig = &relalg_theory.theory.signature; + println!("Elements by sort:"); + for (sort_idx, sort_name) in sig.sorts.iter().enumerate() { + let count = instance.structure.carrier_size(sort_idx); + if count > 0 { + println!(" {}: {} element(s)", sort_name, count); + } + } + println!(); + + // Show named elements with their sorts + println!("Named elements:"); + for (slid, name) in instance.names.iter() { + let sort_idx = instance.structure.sorts[slid.index()]; + let sort_name = &sig.sorts[sort_idx]; + println!(" {} : {} = {:?}", name, sort_name, slid); + } + } + Err(e) => { + eprintln!("Failed to compile query to RelAlgIR: {}", e); + } + } +} + +/// Handle :solve command - find a model of a theory from scratch +fn handle_solve(state: &ReplState, theory_name: &str, budget_ms: Option) { + use geolog::solver::{solve, Budget, EnumerationResult}; + + // Look up the theory + let theory = match state.theories.get(theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", theory_name); + eprintln!("Use :list theories to see available theories"); + return; + } + }; + + println!("Solving theory '{}'...", theory_name); + let sig = &theory.theory.signature; + println!( + " {} sorts, {} functions, {} relations, {} axioms", + sig.sorts.len(), + sig.functions.len(), + sig.relations.len(), + theory.theory.axioms.len() + ); + + // Use unified solver API + let budget = Budget::new(budget_ms.unwrap_or(5000), 10000); + let result = solve(theory.clone(), budget); + + // Report result + match result { + EnumerationResult::Found { model, time_ms } => { + println!("✓ SOLVED in {:.2}ms", time_ms); + print_witness_structure(&model, sig); + } + EnumerationResult::Unsat { time_ms } => { + println!("✗ UNSAT in {:.2}ms", time_ms); + println!(" The theory has no models (derives False)."); + } + EnumerationResult::Incomplete { time_ms, reason, .. } => { + println!("◯ INCOMPLETE after {:.2}ms", time_ms); + println!(" {}", reason); + println!(" Try increasing the budget: :solve {} ", theory_name); + } + } +} + +/// Print a witness structure (model) to stdout +fn print_witness_structure(model: &geolog::core::Structure, sig: &geolog::core::Signature) { + use geolog::core::RelationStorage; + use geolog::id::NumericId; + + let total_elements: usize = (0..sig.sorts.len()) + .map(|s| model.carrier_size(s)) + .sum(); + + if total_elements == 0 { + println!("\nWitness: empty structure (trivial model)"); + } else { + println!("\nWitness structure:"); + // Show sorts with elements + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let size = model.carrier_size(sort_id); + if size > 0 { + if size <= 10 { + let ids: Vec = (0..size).map(|i| format!("#{}", i)).collect(); + println!(" {}: {{ {} }}", sort_name, ids.join(", ")); + } else { + println!(" {}: {} element(s)", sort_name, size); + } + } + } + // Show relations with tuples + for (rel_id, rel) in sig.relations.iter().enumerate() { + if rel_id < model.relations.len() { + let rel_storage = &model.relations[rel_id]; + let tuple_count = rel_storage.len(); + if tuple_count > 0 { + if tuple_count <= 10 { + let tuples: Vec = rel_storage + .iter() + .map(|t| { + let coords: Vec = + t.iter().map(|s| format!("#{}", s.index())).collect(); + format!("({})", coords.join(", ")) + }) + .collect(); + println!(" {}: {{ {} }}", rel.name, tuples.join(", ")); + } else { + println!(" {}: {} tuple(s)", rel.name, tuple_count); + } + } + } + } + } +} + +/// Handle :extend command - find extensions of an existing instance to a theory +/// +/// This uses the unified model enumeration API: `query(base, theory, budget)` finds +/// models of `theory` that extend `base`. This is the unified generalization of +/// `:solve` (where base is empty) and "find models extending M". +fn handle_extend(state: &ReplState, instance_name: &str, theory_name: &str, budget_ms: Option) { + use geolog::solver::{query, Budget, EnumerationResult}; + use geolog::universe::Universe; + + // Look up the base instance + let base_entry = match state.instances.get(instance_name) { + Some(entry) => entry, + None => { + eprintln!("Instance '{}' not found", instance_name); + eprintln!("Use :list instances to see available instances"); + return; + } + }; + + // Look up the extension theory + let theory = match state.theories.get(theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", theory_name); + eprintln!("Use :list theories to see available theories"); + return; + } + }; + + println!("Extending instance '{}' to theory '{}'...", instance_name, theory_name); + let sig = &theory.theory.signature; + println!( + " Base: {} (theory {})", + instance_name, base_entry.theory_name + ); + println!( + " Target: {} sorts, {} functions, {} relations, {} axioms", + sig.sorts.len(), + sig.functions.len(), + sig.relations.len(), + theory.theory.axioms.len() + ); + + // Clone base structure and create a fresh universe for the extension + // (The solver will allocate new elements as needed) + let base_structure = base_entry.structure.clone(); + let universe = Universe::new(); // Fresh universe for new allocations + + // Use unified query API + let budget = Budget::new(budget_ms.unwrap_or(5000), 10000); + let result = query(base_structure, universe, theory.clone(), budget); + + // Report result + match result { + EnumerationResult::Found { model, time_ms } => { + println!("✓ EXTENDED in {:.2}ms", time_ms); + print_witness_structure(&model, sig); + } + EnumerationResult::Unsat { time_ms } => { + println!("✗ NO EXTENSION EXISTS in {:.2}ms", time_ms); + println!(" The base instance cannot be extended to satisfy '{}'.", theory_name); + } + EnumerationResult::Incomplete { time_ms, reason, .. } => { + println!("◯ INCOMPLETE after {:.2}ms", time_ms); + println!(" {}", reason); + println!(" Try increasing the budget: :extend {} {} ", instance_name, theory_name); + } + } +} + +/// Handle :chase command - run chase algorithm on instance's theory axioms +fn handle_chase(state: &mut ReplState, instance_name: &str, max_iterations: Option) { + use geolog::core::RelationStorage; + use geolog::query::chase::chase_fixpoint; + + // Get the instance + let entry = match state.instances.get_mut(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + let sig = &theory.theory.signature; + let axioms = &theory.theory.axioms; + + if axioms.is_empty() { + println!("Theory '{}' has no axioms to chase.", entry.theory_name); + return; + } + + println!("Running chase on instance '{}' (theory '{}')...", instance_name, entry.theory_name); + println!(" {} axiom(s) to process", axioms.len()); + + // Snapshot relation tuple counts before chase + let tuple_counts_before: Vec = entry.structure.relations + .iter() + .map(|r| r.len()) + .collect(); + + // Run the chase (tensor-backed: handles existentials in premises, etc.) + let max_iter = max_iterations.unwrap_or(100); + let start = std::time::Instant::now(); + + match chase_fixpoint(axioms, &mut entry.structure, &mut state.store.universe, sig, max_iter) { + Ok(iterations) => { + let elapsed = start.elapsed(); + println!("✓ Chase completed in {} iterations ({:.2}ms)", iterations, elapsed.as_secs_f64() * 1000.0); + println!("\nStructure after chase:"); + print_structure_summary(&entry.structure, sig); + + // Check if any new tuples were added + let tuple_counts_after: Vec = entry.structure.relations + .iter() + .map(|r| r.len()) + .collect(); + let tuples_added = tuple_counts_before.iter() + .zip(tuple_counts_after.iter()) + .any(|(before, after)| after > before); + + // Save info needed for persistence before dropping entry borrow + let theory_name_owned = entry.theory_name.clone(); + + if tuples_added { + // Persist the chase results via columnar batches + // Note: This persists ALL current tuples, not just the delta. + // A more sophisticated implementation would track the delta. + if let Err(e) = persist_chase_results( + state, + instance_name, + &theory_name_owned, + ) { + eprintln!("Warning: Failed to persist chase results: {}", e); + } else { + println!("Chase results persisted to store."); + } + } + } + Err(e) => { + eprintln!("✗ Chase error: {}", e); + } + } +} + +/// Persist chase results (relation tuples) to columnar batches as IDB data. +/// +/// IDB batches are persisted locally but NOT transmitted over the wire. +/// Recipients recompute IDB by running the chase on received EDB patches. +fn persist_chase_results( + state: &mut ReplState, + instance_name: &str, + theory_name: &str, +) -> Result<(), String> { + use geolog::core::RelationStorage; + use geolog::id::{Slid, Uuid}; + use geolog::store::columnar::{InstanceDataBatch, RelationTupleBatch}; + + let entry = state.instances.get(instance_name).ok_or("Instance not found")?; + let structure = &entry.structure; + + // Resolve the instance in the Store + let (instance_slid, _) = state.store.resolve_name(instance_name) + .ok_or_else(|| format!("Instance '{}' not found in store", instance_name))?; + + // Get theory to map relation indices to Slids + let (theory_slid, _) = state.store.resolve_name(theory_name) + .ok_or_else(|| format!("Theory '{}' not found in store", theory_name))?; + + let rel_infos = state.store.query_theory_rels(theory_slid); + + // Build mapping from relation index to Rel UUID + let rel_idx_to_uuid: std::collections::HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, state.store.get_element_uuid(info.slid))) + .collect(); + + // Build mapping from Structure Slid to element UUID + // We need to find the Elem in GeologMeta that corresponds to each Structure element + let elem_infos = state.store.query_instance_elems(instance_slid); + let mut struct_slid_to_uuid: std::collections::HashMap = std::collections::HashMap::new(); + + // Map element names to UUIDs + for info in &elem_infos { + // Try to find the structure Slid by name + if let Some(&struct_slid) = entry.slid_to_name.iter() + .find(|(_, name)| *name == &info.name) + .map(|(slid, _)| slid) + { + struct_slid_to_uuid.insert(struct_slid, state.store.get_element_uuid(info.slid)); + } + } + + // For chase-created elements that might not have names in slid_to_name, + // use the structure's UUID mapping + for slid_u64 in structure.luids.iter().map(|_| 0).enumerate().map(|(i, _)| i) { + let slid = Slid::from_usize(slid_u64); + if !struct_slid_to_uuid.contains_key(&slid) + && let Some(uuid) = structure.get_uuid(slid, &state.store.universe) { + struct_slid_to_uuid.insert(slid, uuid); + } + } + + // Get instance UUID + let instance_uuid = state.store.get_element_uuid(instance_slid); + + // Build columnar batch as IDB (chase-derived, not wire-transmittable) + let mut batch = InstanceDataBatch::new_idb(); + + for (rel_idx, relation) in structure.relations.iter().enumerate() { + let rel_uuid = match rel_idx_to_uuid.get(&rel_idx) { + Some(u) => *u, + None => continue, + }; + + if relation.is_empty() { + continue; + } + + let arity = rel_infos.get(rel_idx).map(|r| r.domain.arity()).unwrap_or(1); + let field_ids: Vec = (0..arity).map(|_| Uuid::nil()).collect(); + + let mut rel_batch = RelationTupleBatch::new(instance_uuid, rel_uuid, field_ids); + + for tuple in relation.iter() { + let uuid_tuple: Vec = tuple + .iter() + .filter_map(|struct_slid| struct_slid_to_uuid.get(struct_slid).copied()) + .collect(); + + if uuid_tuple.len() == tuple.len() { + rel_batch.push(&uuid_tuple); + } + } + + if !rel_batch.is_empty() { + batch.relation_tuples.push(rel_batch); + } + } + + // Save the batch + if !batch.relation_tuples.is_empty() { + let existing_batches = state.store.load_instance_data_batches(instance_uuid) + .unwrap_or_default(); + let version = existing_batches.len() as u64; + state.store.save_instance_data_batch(instance_uuid, version, &batch)?; + } + + Ok(()) +} + +/// Handle query result from `query { ? : Type; }` syntax +fn handle_query_result(_state: &ReplState, result: QueryResult) { + match result { + QueryResult::Found { query_name, theory_name, model, time_ms } => { + println!("✓ Query '{}' SOLVED in {:.2}ms", query_name, time_ms); + println!(" Found model of theory '{}'", theory_name); + + // For now, print a basic summary. We don't have access to the signature here, + // so just show raw structure info. + let total_elements: usize = model.sorts.len(); + if total_elements == 0 { + println!("\n Witness: empty structure (trivial model)"); + } else { + println!("\n Witness structure: {} elements", total_elements); + // Count elements by sort + let mut sort_counts: std::collections::HashMap = std::collections::HashMap::new(); + for &sort_id in &model.sorts { + *sort_counts.entry(sort_id).or_insert(0) += 1; + } + for (sort_id, count) in sort_counts { + println!(" Sort {}: {} element(s)", sort_id, count); + } + } + } + QueryResult::Unsat { query_name, theory_name, time_ms } => { + println!("✗ Query '{}' UNSAT in {:.2}ms", query_name, time_ms); + println!(" No model of '{}' exists extending the base.", theory_name); + } + QueryResult::Incomplete { query_name, theory_name, reason, time_ms } => { + println!("◯ Query '{}' INCOMPLETE after {:.2}ms", query_name, time_ms); + println!(" Theory: {}", theory_name); + println!(" Reason: {}", reason); + } + } +} + +/// Print a summary of structure contents +fn print_structure_summary(structure: &geolog::core::Structure, sig: &geolog::core::Signature) { + use geolog::core::RelationStorage; + + // Show carriers + let total_elements: usize = (0..sig.sorts.len()) + .map(|s| structure.carrier_size(s)) + .sum(); + println!(" Elements: {} total", total_elements); + + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let size = structure.carrier_size(sort_id); + if size > 0 { + println!(" {}: {} element(s)", sort_name, size); + } + } + + // Show relations + let mut has_relations = false; + for (rel_id, rel) in sig.relations.iter().enumerate() { + if rel_id < structure.relations.len() { + let count = structure.relations[rel_id].len(); + if count > 0 { + if !has_relations { + println!(" Relations:"); + has_relations = true; + } + println!(" {}: {} tuple(s)", rel.name, count); + } + } + } +} + +/// Get the history file path +fn dirs_history_path() -> Option { + // Try to use standard config directory + if let Some(config_dir) = dirs_config_dir() { + let mut path = config_dir; + path.push("geolog"); + path.push("history"); + return Some(path); + } + None +} + +/// Get the config directory (cross-platform) +fn dirs_config_dir() -> Option { + // Simple implementation - use HOME/.config on Unix, APPDATA on Windows + #[cfg(unix)] + { + std::env::var("HOME").ok().map(|h| { + let mut p = PathBuf::from(h); + p.push(".config"); + p + }) + } + #[cfg(windows)] + { + std::env::var("APPDATA").ok().map(PathBuf::from) + } + #[cfg(not(any(unix, windows)))] + { + None + } +} diff --git a/src/cc.rs b/src/cc.rs new file mode 100644 index 0000000..46d247c --- /dev/null +++ b/src/cc.rs @@ -0,0 +1,258 @@ +//! Congruence Closure for equality reasoning. +//! +//! This module provides a union-find based congruence closure implementation +//! that can be used by both the solver (for model enumeration) and the chase +//! (for computing derived relations with equality saturation). +//! +//! # Key Types +//! +//! - [`CongruenceClosure`]: Main struct wrapping union-find + pending equation queue +//! - [`PendingEquation`]: An equation waiting to be processed +//! - [`EquationReason`]: Why an equation was created (for debugging/explanation) +//! +//! # Usage +//! +//! ```ignore +//! use geolog::cc::{CongruenceClosure, EquationReason}; +//! +//! let mut cc = CongruenceClosure::new(); +//! +//! // Add equation: a = b +//! cc.add_equation(a, b, EquationReason::UserAsserted); +//! +//! // Process pending equations +//! while let Some(eq) = cc.pop_pending() { +//! cc.merge(eq.lhs, eq.rhs); +//! // Check for function conflicts, add congruence equations... +//! } +//! +//! // Query equivalence +//! assert!(cc.are_equal(a, b)); +//! ``` + +use std::collections::VecDeque; + +use egglog_union_find::UnionFind; + +use crate::id::{NumericId, Slid}; + +/// A pending equation to be processed. +/// +/// Equations arise from: +/// 1. Function conflicts: `f(a) = x` and `f(a) = y` implies `x = y` +/// 2. Axiom consequents: `∀x. P(x) → x = y` +/// 3. Record projections: `[fst: a, snd: b].fst = a` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PendingEquation { + /// Left-hand side element + pub lhs: Slid, + /// Right-hand side element + pub rhs: Slid, + /// Reason for the equation (for debugging/explanation) + pub reason: EquationReason, +} + +/// Reason an equation was created +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum EquationReason { + /// Function already maps domain to different values + FunctionConflict { func_id: usize, domain: Slid }, + /// Axiom consequent required this equality + AxiomConsequent { axiom_idx: usize }, + /// User asserted this equation + UserAsserted, + /// Congruence: f(a) = f(b) because a = b + Congruence { func_id: usize }, + /// Chase-derived: equality conclusion in chase + ChaseConclusion, +} + +/// Congruence closure state. +/// +/// This wraps a union-find structure and pending equation queue, +/// providing methods for merging elements and tracking equivalence classes. +/// +/// Note: This struct handles the union-find bookkeeping but does NOT +/// automatically propagate through function applications. The caller +/// (solver or chase) is responsible for detecting function conflicts +/// and adding congruence equations. +#[derive(Clone)] +pub struct CongruenceClosure { + /// Union-find for tracking equivalence classes + /// Uses Slid indices as keys + pub uf: UnionFind, + /// Pending equations to process + pub pending: VecDeque, + /// Number of merges performed (for statistics) + pub merge_count: usize, +} + +impl std::fmt::Debug for CongruenceClosure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CongruenceClosure") + .field("pending", &self.pending) + .field("merge_count", &self.merge_count) + .finish_non_exhaustive() + } +} + +impl Default for CongruenceClosure { + fn default() -> Self { + Self::new() + } +} + +impl CongruenceClosure { + /// Create a new congruence closure + pub fn new() -> Self { + Self { + uf: UnionFind::default(), + pending: VecDeque::new(), + merge_count: 0, + } + } + + /// Find the canonical representative of an element + /// Note: The UnionFind automatically reserves space as needed + pub fn find(&mut self, slid: Slid) -> usize { + self.uf.find(slid.index()) + } + + /// Check if two elements are in the same equivalence class + pub fn are_equal(&mut self, a: Slid, b: Slid) -> bool { + self.find(a) == self.find(b) + } + + /// Add a pending equation + pub fn add_equation(&mut self, lhs: Slid, rhs: Slid, reason: EquationReason) { + self.pending.push_back(PendingEquation { lhs, rhs, reason }); + } + + /// Pop the next pending equation, if any + pub fn pop_pending(&mut self) -> Option { + self.pending.pop_front() + } + + /// Check if there are pending equations + pub fn has_pending(&self) -> bool { + !self.pending.is_empty() + } + + /// Merge two elements, returning true if they were not already equal + pub fn merge(&mut self, a: Slid, b: Slid) -> bool { + let a_idx = a.index(); + let b_idx = b.index(); + + let ra = self.uf.find(a_idx); + let rb = self.uf.find(b_idx); + + if ra != rb { + self.uf.union(ra, rb); + self.merge_count += 1; + true + } else { + false + } + } + + /// Get the canonical Slid for an element + /// + /// Note: This returns a Slid with the canonical index, but the actual + /// element in the Structure is still at the original Slid. + pub fn canonical(&mut self, slid: Slid) -> Slid { + let idx = self.find(slid); + Slid::from_usize(idx) + } + + /// Get the number of elements tracked + pub fn num_elements(&self) -> usize { + self.merge_count + self.pending.len() // approximation + } + + /// Get statistics about the congruence closure: (merges, pending) + pub fn stats(&self) -> (usize, usize) { + (self.merge_count, self.pending.len()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_congruence_closure_basic() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + let c = Slid::from_usize(2); + + // Initially all different + assert!(!cc.are_equal(a, b)); + assert!(!cc.are_equal(b, c)); + assert!(!cc.are_equal(a, c)); + + // Merge a and b + assert!(cc.merge(a, b)); + assert!(cc.are_equal(a, b)); + assert!(!cc.are_equal(b, c)); + + // Merge b and c (should transitively merge a and c) + assert!(cc.merge(b, c)); + assert!(cc.are_equal(a, c)); + assert!(cc.are_equal(a, b)); + assert!(cc.are_equal(b, c)); + + // Merging already equal elements returns false + assert!(!cc.merge(a, c)); + } + + #[test] + fn test_congruence_closure_pending() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + + assert!(!cc.has_pending()); + + cc.add_equation(a, b, EquationReason::UserAsserted); + assert!(cc.has_pending()); + + let eq = cc.pop_pending().unwrap(); + assert_eq!(eq.lhs, a); + assert_eq!(eq.rhs, b); + assert!(!cc.has_pending()); + } + + #[test] + fn test_congruence_closure_stats() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + + assert_eq!(cc.stats(), (0, 0)); + + cc.merge(a, b); + assert_eq!(cc.stats(), (1, 0)); + + cc.add_equation(a, b, EquationReason::UserAsserted); + assert_eq!(cc.stats(), (1, 1)); + } + + #[test] + fn test_canonical() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(5); + let b = Slid::from_usize(10); + + // Before merge, each is its own canonical + let ca = cc.canonical(a); + let cb = cc.canonical(b); + assert_ne!(ca, cb); + + // After merge, both have same canonical + cc.merge(a, b); + let ca2 = cc.canonical(a); + let cb2 = cc.canonical(b); + assert_eq!(ca2, cb2); + } +} diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 0000000..00b93b3 --- /dev/null +++ b/src/core.rs @@ -0,0 +1,1511 @@ +//! Core internal representation for Geolog +//! +//! This is the typed, elaborated representation — closer to Owen's Lean formalization. +//! Surface syntax (ast.rs) elaborates into these types. + +use std::collections::HashMap; + +/// A unique identifier for sorts, used internally +pub type SortId = usize; + +/// A unique identifier for function symbols +pub type FuncId = usize; + +/// A unique identifier for relation symbols +pub type RelId = usize; + +/// Derived sorts: base sorts or products of derived sorts +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum DerivedSort { + /// A base sort + Base(SortId), + /// A product of derived sorts (record/tuple) + Product(Vec<(String, DerivedSort)>), +} + +impl DerivedSort { + pub fn base(id: SortId) -> Self { + DerivedSort::Base(id) + } + + pub fn product(fields: Vec<(String, DerivedSort)>) -> Self { + DerivedSort::Product(fields) + } + + pub fn unit() -> Self { + DerivedSort::Product(vec![]) + } + + /// Returns the arity (number of atomic sorts) of this derived sort. + /// For Product([x: A, y: B]), arity is 2. + /// For Base(s), arity is 1. + pub fn arity(&self) -> usize { + match self { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + } + } + + /// Returns the cardinality of this derived sort in a given structure. + /// + /// For Base(s), returns the carrier size of sort s. + /// For Product([x: A, y: B, ...]), returns the product of cardinalities. + /// An empty product (unit type) has cardinality 1. + pub fn cardinality(&self, structure: &Structure) -> usize { + match self { + DerivedSort::Base(sort_id) => structure.carrier_size(*sort_id), + DerivedSort::Product(fields) => { + if fields.is_empty() { + 1 // Unit type has one inhabitant + } else { + fields + .iter() + .map(|(_, field_sort)| field_sort.cardinality(structure)) + .product() + } + } + } + } +} + +/// A function symbol with its domain and codomain +#[derive(Clone, Debug)] +pub struct FunctionSymbol { + pub name: String, + pub domain: DerivedSort, + pub codomain: DerivedSort, +} + +/// A relation symbol with its domain (relations have no codomain — they're predicates) +#[derive(Clone, Debug)] +pub struct RelationSymbol { + pub name: String, + pub domain: DerivedSort, +} + +/// An instance field declaration (a field that holds a sub-instance) +/// e.g., `initial_marking : N Marking instance;` +#[derive(Clone, Debug)] +pub struct InstanceFieldSymbol { + pub name: String, + /// The theory type expression (e.g., "N Marking" as a string for now) + /// This needs to be resolved with actual parameter bindings during instance elaboration + pub theory_type: String, +} + +/// A signature: sorts + function symbols + relation symbols + instance fields +#[derive(Clone, Debug, Default)] +pub struct Signature { + /// Sort names, indexed by SortId + pub sorts: Vec, + /// Map from sort name to SortId + pub sort_names: HashMap, + /// Function symbols + pub functions: Vec, + /// Map from function name to FuncId + pub func_names: HashMap, + /// Relation symbols + pub relations: Vec, + /// Map from relation name to RelId + pub rel_names: HashMap, + /// Instance field declarations (fields that hold sub-instances) + pub instance_fields: Vec, + /// Map from instance field name to index + pub instance_field_names: HashMap, +} + +impl Signature { + pub fn new() -> Self { + Self::default() + } + + pub fn add_sort(&mut self, name: String) -> SortId { + let id = self.sorts.len(); + self.sort_names.insert(name.clone(), id); + self.sorts.push(name); + id + } + + pub fn add_function( + &mut self, + name: String, + domain: DerivedSort, + codomain: DerivedSort, + ) -> FuncId { + let id = self.functions.len(); + self.func_names.insert(name.clone(), id); + self.functions.push(FunctionSymbol { + name, + domain, + codomain, + }); + id + } + + pub fn add_relation(&mut self, name: String, domain: DerivedSort) -> RelId { + let id = self.relations.len(); + self.rel_names.insert(name.clone(), id); + self.relations.push(RelationSymbol { name, domain }); + id + } + + pub fn lookup_sort(&self, name: &str) -> Option { + self.sort_names.get(name).copied() + } + + pub fn lookup_func(&self, name: &str) -> Option { + self.func_names.get(name).copied() + } + + pub fn lookup_rel(&self, name: &str) -> Option { + self.rel_names.get(name).copied() + } + + /// Add an instance field declaration. + /// Returns the field index (0-based). + pub fn add_instance_field(&mut self, name: String, theory_type: String) -> usize { + let id = self.instance_fields.len(); + self.instance_field_names.insert(name.clone(), id); + self.instance_fields.push(InstanceFieldSymbol { name, theory_type }); + id + } + + /// Look up an instance field by name + pub fn lookup_instance_field(&self, name: &str) -> Option { + self.instance_field_names.get(name).copied() + } +} + +// ============ Relation Storage ============ + +use crate::id::{NumericId, Slid}; +use roaring::RoaringTreemap; + +/// Tuple ID: index into the append-only tuple log +pub type TupleId = usize; + +/// Trait for relation storage implementations. +/// +/// Different implementations optimize for different access patterns: +/// - VecRelation: append-only log + membership bitmap (good for patches) +/// - Future: Dancing Cells for backtracking, multi-order tries for joins +pub trait RelationStorage { + /// Check if a tuple is in the relation + fn contains(&self, tuple: &[Slid]) -> bool; + + /// Insert a tuple, returns true if newly inserted + fn insert(&mut self, tuple: Vec) -> bool; + + /// Remove a tuple by marking it as not in extent, returns true if was present + fn remove(&mut self, tuple: &[Slid]) -> bool; + + /// Number of tuples currently in the relation + fn len(&self) -> usize; + + /// Check if empty + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Arity of tuples in this relation + fn arity(&self) -> usize; +} + +/// Append-only tuple log with membership bitmap. +/// +/// Tuples are assigned stable IDs (their index in the log). +/// The extent bitmap tracks which tuples are currently "true". +/// This representation handles cardinality changes gracefully since +/// tuple IDs are independent of sort cardinalities. +#[derive(Clone, Debug)] +pub struct VecRelation { + /// Arity of this relation (number of Slids per tuple) + pub arity: usize, + /// Append-only log of all tuples ever inserted + pub tuples: Vec>, + /// Map from tuple to its ID (for O(1) lookup) + pub tuple_to_id: HashMap, TupleId>, + /// Bitmap of tuple IDs currently in the extent + pub extent: RoaringTreemap, +} + +impl VecRelation { + /// Create a new empty relation with given arity + pub fn new(arity: usize) -> Self { + Self { + arity, + tuples: Vec::new(), + tuple_to_id: HashMap::new(), + extent: RoaringTreemap::new(), + } + } + + /// Get a tuple by ID + pub fn get_tuple(&self, id: TupleId) -> Option<&[Slid]> { + self.tuples.get(id).map(|v| v.as_slice()) + } + + /// Iterate over all tuples currently in the extent + pub fn iter(&self) -> impl Iterator + '_ { + self.extent.iter().filter_map(|id| self.tuples.get(id as usize).map(|v| v.as_slice())) + } + + /// Iterate over tuple IDs currently in the extent + pub fn iter_ids(&self) -> impl Iterator + '_ { + self.extent.iter().map(|id| id as TupleId) + } +} + +impl RelationStorage for VecRelation { + fn contains(&self, tuple: &[Slid]) -> bool { + if let Some(&id) = self.tuple_to_id.get(tuple) { + self.extent.contains(id as u64) + } else { + false + } + } + + fn insert(&mut self, tuple: Vec) -> bool { + debug_assert_eq!(tuple.len(), self.arity, "tuple arity mismatch"); + + if let Some(&id) = self.tuple_to_id.get(&tuple) { + // Tuple exists in log, just mark as present + if self.extent.contains(id as u64) { + false // Already present + } else { + self.extent.insert(id as u64); + true + } + } else { + // New tuple, append to log + let id = self.tuples.len(); + self.tuple_to_id.insert(tuple.clone(), id); + self.tuples.push(tuple); + self.extent.insert(id as u64); + true + } + } + + fn remove(&mut self, tuple: &[Slid]) -> bool { + if let Some(&id) = self.tuple_to_id.get(tuple) { + self.extent.remove(id as u64) + } else { + false + } + } + + fn len(&self) -> usize { + self.extent.len() as usize + } + + fn arity(&self) -> usize { + self.arity + } +} + +/// A typing context: a list of (variable_name, sort) pairs +#[derive(Clone, Debug, Default)] +pub struct Context { + /// Variables in scope, with their sorts + pub vars: Vec<(String, DerivedSort)>, +} + +impl Context { + pub fn new() -> Self { + Self::default() + } + + pub fn extend(&self, name: String, sort: DerivedSort) -> Self { + let mut new_ctx = self.clone(); + new_ctx.vars.push((name, sort)); + new_ctx + } + + pub fn lookup(&self, name: &str) -> Option<(usize, &DerivedSort)> { + self.vars + .iter() + .enumerate() + .rev() + .find(|(_, (n, _))| n == name) + .map(|(i, (_, s))| (i, s)) + } +} + +/// A well-typed term +#[derive(Clone, Debug)] +pub enum Term { + /// Variable reference (de Bruijn index would be cleaner, but names are more debuggable) + Var(String, DerivedSort), + /// Function application + App(FuncId, Box), + /// Record/tuple construction + Record(Vec<(String, Term)>), + /// Field projection + Project(Box, String), +} + +impl Term { + /// Get the sort of this term + pub fn sort(&self, sig: &Signature) -> DerivedSort { + match self { + Term::Var(_, s) => s.clone(), + Term::App(f, _) => sig.functions[*f].codomain.clone(), + Term::Record(fields) => DerivedSort::Product( + fields + .iter() + .map(|(n, t)| (n.clone(), t.sort(sig))) + .collect(), + ), + Term::Project(t, field) => { + if let DerivedSort::Product(fields) = t.sort(sig) { + fields + .into_iter() + .find(|(n, _)| n == field) + .map(|(_, s)| s) + .expect("field not found in product") + } else { + panic!("projection on non-product") + } + } + } + } +} + +/// A well-typed geometric formula +#[derive(Clone, Debug)] +pub enum Formula { + /// Relation application: R(t) where R is a relation symbol and t is a term + Rel(RelId, Term), + /// Truth + True, + /// Falsity + False, + /// Conjunction + Conj(Vec), + /// Disjunction (infinitary in general, but finite for now) + Disj(Vec), + /// Equality of terms (must have same sort) + Eq(Term, Term), + /// Existential quantification + Exists(String, DerivedSort, Box), +} + +/// A sequent: premise ⊢ conclusion (both in the same context) +#[derive(Clone, Debug)] +pub struct Sequent { + /// The context (bound variables) + pub context: Context, + /// The premise (antecedent) + pub premise: Formula, + /// The conclusion (consequent) + pub conclusion: Formula, +} + +/// A theory: a signature plus a set of axioms (sequents) +#[derive(Clone, Debug)] +pub struct Theory { + pub name: String, + pub signature: Signature, + pub axioms: Vec, + /// Axiom names (parallel to axioms vec), e.g. "ax/input_complete" + pub axiom_names: Vec, +} + +/// A theory can have parameters (other theories it depends on) +/// Note: This is forward-declared; the actual type is `Rc` +/// but we can't reference it here due to ordering. We use a type alias. +#[derive(Clone, Debug)] +pub struct TheoryParam { + pub name: String, + // This will be an Rc in practice + pub theory_name: String, +} + +/// An elaborated theory with its parameters +#[derive(Clone, Debug)] +pub struct ElaboratedTheory { + pub params: Vec, + pub theory: Theory, +} + +// ============ Structures (instances/models) ============ + +use crate::id::{Luid, OptLuid, OptSlid, SortSlid, Uuid, get_slid, some_slid}; +use crate::universe::Universe; + +/// A function column: either local (Slid) or external (Luid) references. +/// +/// For functions with local codomain (e.g., `src : in -> P` where P is local), +/// we use `Local(Vec)` for tight columnar storage. +/// +/// For functions with external codomain (e.g., `token/of : token -> N/P` where +/// N/P comes from a parent instance), we use `External(Vec)` to +/// reference elements in the parent by their Luid. +/// Storage for product-domain functions. +/// +/// Uses nested Vecs for efficient access and natural handling of carrier growth. +/// Sort-local indices are append-only, so existing indices remain stable when +/// carriers grow — we just extend the inner/outer Vecs. +#[derive(Clone, Debug)] +pub enum ProductStorage { + /// Binary product `[x: A, y: B]` → `Vec>` + /// Outer dim is A (first field), inner is B (second field). + /// Access: `rows[x_local][y_local]` + Binary(Vec>), + + /// Ternary product `[x: A, y: B, z: C]` → `Vec>>` + Ternary(Vec>>), + + /// Higher-arity products: fall back to HashMap for flexibility. + /// Keys are tuples of sort-local indices. + General(HashMap, Slid>), +} + +impl ProductStorage { + /// Create storage for binary product with given carrier sizes + pub fn new_binary(size_a: usize, size_b: usize) -> Self { + ProductStorage::Binary(vec![vec![None; size_b]; size_a]) + } + + /// Create storage for ternary product with given carrier sizes + pub fn new_ternary(size_a: usize, size_b: usize, size_c: usize) -> Self { + ProductStorage::Ternary(vec![vec![vec![None; size_c]; size_b]; size_a]) + } + + /// Create storage for general (n-ary) product + pub fn new_general() -> Self { + ProductStorage::General(HashMap::new()) + } + + /// Create storage based on arity and carrier sizes + pub fn new(carrier_sizes: &[usize]) -> Self { + match carrier_sizes.len() { + 2 => Self::new_binary(carrier_sizes[0], carrier_sizes[1]), + 3 => Self::new_ternary(carrier_sizes[0], carrier_sizes[1], carrier_sizes[2]), + _ => Self::new_general(), + } + } + + /// Get value at the given tuple of sort-local indices + pub fn get(&self, tuple: &[usize]) -> Option { + match self { + ProductStorage::Binary(rows) => { + debug_assert_eq!(tuple.len(), 2); + let opt = rows.get(tuple[0])?.get(tuple[1])?; + get_slid(*opt) + } + ProductStorage::Ternary(planes) => { + debug_assert_eq!(tuple.len(), 3); + let opt = planes.get(tuple[0])?.get(tuple[1])?.get(tuple[2])?; + get_slid(*opt) + } + ProductStorage::General(map) => map.get(tuple).copied(), + } + } + + /// Set value at the given tuple of sort-local indices + /// Returns Err if conflicting definition exists + pub fn set(&mut self, tuple: &[usize], value: Slid) -> Result<(), Slid> { + match self { + ProductStorage::Binary(rows) => { + debug_assert_eq!(tuple.len(), 2); + // Grow if needed (append-only growth) + while rows.len() <= tuple[0] { + rows.push(Vec::new()); + } + while rows[tuple[0]].len() <= tuple[1] { + rows[tuple[0]].push(None); + } + if let Some(existing) = get_slid(rows[tuple[0]][tuple[1]]) + && existing != value { + return Err(existing); + } + rows[tuple[0]][tuple[1]] = some_slid(value); + Ok(()) + } + ProductStorage::Ternary(planes) => { + debug_assert_eq!(tuple.len(), 3); + while planes.len() <= tuple[0] { + planes.push(Vec::new()); + } + while planes[tuple[0]].len() <= tuple[1] { + planes[tuple[0]].push(Vec::new()); + } + while planes[tuple[0]][tuple[1]].len() <= tuple[2] { + planes[tuple[0]][tuple[1]].push(None); + } + if let Some(existing) = get_slid(planes[tuple[0]][tuple[1]][tuple[2]]) + && existing != value { + return Err(existing); + } + planes[tuple[0]][tuple[1]][tuple[2]] = some_slid(value); + Ok(()) + } + ProductStorage::General(map) => { + if let Some(&existing) = map.get(tuple) + && existing != value { + return Err(existing); + } + map.insert(tuple.to_vec(), value); + Ok(()) + } + } + } + + /// Count of defined (Some) entries + pub fn defined_count(&self) -> usize { + match self { + ProductStorage::Binary(rows) => rows + .iter() + .flat_map(|row| row.iter()) + .filter(|&&v| v.is_some()) + .count(), + ProductStorage::Ternary(planes) => planes + .iter() + .flat_map(|plane| plane.iter()) + .flat_map(|row| row.iter()) + .filter(|&&v| v.is_some()) + .count(), + ProductStorage::General(map) => map.len(), + } + } + + /// Check if all entries are defined (total function) + pub fn is_total(&self, carrier_sizes: &[usize]) -> bool { + let expected = carrier_sizes.iter().product::(); + self.defined_count() == expected + } + + /// Iterate over all defined entries as (tuple, value) pairs + pub fn iter_defined(&self) -> Box, Slid)> + '_> { + match self { + ProductStorage::Binary(rows) => Box::new( + rows.iter() + .enumerate() + .flat_map(|(i, row)| { + row.iter().enumerate().filter_map(move |(j, &v)| { + get_slid(v).map(|s| (vec![i, j], s)) + }) + }), + ), + ProductStorage::Ternary(planes) => Box::new( + planes.iter().enumerate().flat_map(|(i, plane)| { + plane.iter().enumerate().flat_map(move |(j, row)| { + row.iter().enumerate().filter_map(move |(k, &v)| { + get_slid(v).map(|s| (vec![i, j, k], s)) + }) + }) + }), + ), + ProductStorage::General(map) => { + Box::new(map.iter().map(|(k, &v)| (k.clone(), v))) + } + } + } +} + +#[derive(Clone, Debug)] +pub enum FunctionColumn { + /// Base domain with local codomain: values are Slids within this structure + Local(Vec), + /// Base domain with external codomain (from parent): values are Luids + External(Vec), + /// Product domain with local codomain. + /// Stores field sort IDs for carrier size lookups during growth. + ProductLocal { + storage: ProductStorage, + field_sorts: Vec, + }, + /// Base domain with product codomain (multiple fields). + /// Each domain element maps to a tuple of codomain Slids. + ProductCodomain { + /// One column per field - field_columns[i][domain_idx] = codomain Slid for field i + field_columns: Vec>, + /// Field names in order + field_names: Vec, + /// Sort IDs for each codomain field + field_sorts: Vec, + /// Domain sort ID (for carrier size lookups during growth) + domain_sort: SortId, + }, +} + +/// Linearize a tuple of sort-local indices into a flat column index. +/// Uses row-major (lexicographic) order. +/// E.g., for field_sizes = [3, 4], tuple [1, 2] → 1*4 + 2 = 6 +pub fn linearize_tuple(tuple: &[usize], field_sizes: &[usize]) -> usize { + debug_assert_eq!(tuple.len(), field_sizes.len()); + let mut index = 0; + let mut stride = 1; + // Process in reverse for row-major order + for (i, &size) in field_sizes.iter().enumerate().rev() { + index += tuple[i] * stride; + stride *= size; + } + index +} + +/// Delinearize a flat column index back to tuple of sort-local indices. +pub fn delinearize_index(mut index: usize, field_sizes: &[usize]) -> Vec { + let mut tuple = vec![0; field_sizes.len()]; + // Process in reverse for row-major order + for (i, &size) in field_sizes.iter().enumerate().rev() { + tuple[i] = index % size; + index /= size; + } + tuple +} + +/// Compute total size of product domain (product of field carrier sizes) +pub fn product_domain_size(field_sizes: &[usize]) -> usize { + field_sizes.iter().product() +} + +impl FunctionColumn { + /// Get the total number of domain slots (for base domains only). + /// For product domains, returns 0 — use `defined_count()` instead. + pub fn len(&self) -> usize { + match self { + FunctionColumn::Local(v) => v.len(), + FunctionColumn::External(v) => v.len(), + FunctionColumn::ProductLocal { .. } => 0, // Product domains have dynamic size + FunctionColumn::ProductCodomain { field_columns, .. } => { + field_columns.first().map(|c| c.len()).unwrap_or(0) + } + } + } + + /// Get the number of defined entries (not total slots) + pub fn defined_count(&self) -> usize { + match self { + FunctionColumn::Local(v) => v.iter().filter(|x| x.is_some()).count(), + FunctionColumn::External(v) => v.iter().filter(|x| x.is_some()).count(), + FunctionColumn::ProductLocal { storage, .. } => storage.defined_count(), + FunctionColumn::ProductCodomain { field_columns, .. } => { + // Count entries where ALL fields are defined + if field_columns.is_empty() { + return 0; + } + let len = field_columns[0].len(); + (0..len) + .filter(|&i| field_columns.iter().all(|col| col.get(i).and_then(|x| *x).is_some())) + .count() + } + } + } + + /// Check if empty (no defined entries) + pub fn is_empty(&self) -> bool { + self.defined_count() == 0 + } + + /// Check if this is a local column (base domain, local codomain) + pub fn is_local(&self) -> bool { + matches!(self, FunctionColumn::Local(_)) + } + + /// Check if this is a product codomain column + pub fn is_product_codomain(&self) -> bool { + matches!(self, FunctionColumn::ProductCodomain { .. }) + } + + /// Check if this is an external column (base domain, external codomain) + pub fn is_external(&self) -> bool { + matches!(self, FunctionColumn::External(_)) + } + + /// Check if this is a product-domain column + pub fn is_product(&self) -> bool { + matches!(self, FunctionColumn::ProductLocal { .. }) + } + + /// Get local value at index (panics if not local or out of bounds) + pub fn get_local(&self, idx: usize) -> OptSlid { + match self { + FunctionColumn::Local(v) => v[idx], + FunctionColumn::External(_) => panic!("get_local called on external column"), + FunctionColumn::ProductLocal { .. } => panic!("get_local called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("get_local called on product codomain column"), + } + } + + /// Get external value at index (panics if not external or out of bounds) + pub fn get_external(&self, idx: usize) -> OptLuid { + match self { + FunctionColumn::External(v) => v[idx], + FunctionColumn::Local(_) => panic!("get_external called on local column"), + FunctionColumn::ProductLocal { .. } => panic!("get_external called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("get_external called on product codomain column"), + } + } + + /// Iterate over local values (panics if not local) + pub fn iter_local(&self) -> impl Iterator { + match self { + FunctionColumn::Local(v) => v.iter(), + FunctionColumn::External(_) => panic!("iter_local called on external column"), + FunctionColumn::ProductLocal { .. } => panic!("iter_local called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("iter_local called on product codomain column"), + } + } + + /// Iterate over external values (panics if not external) + pub fn iter_external(&self) -> impl Iterator { + match self { + FunctionColumn::External(v) => v.iter(), + FunctionColumn::Local(_) => panic!("iter_external called on local column"), + FunctionColumn::ProductLocal { .. } => panic!("iter_external called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("iter_external called on product codomain column"), + } + } + + /// Get as local column (returns None if external or product) + pub fn as_local(&self) -> Option<&Vec> { + match self { + FunctionColumn::Local(v) => Some(v), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get as mutable local column (returns None if external or product) + pub fn as_local_mut(&mut self) -> Option<&mut Vec> { + match self { + FunctionColumn::Local(v) => Some(v), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get product value for a tuple of sort-local indices + pub fn get_product(&self, tuple: &[usize]) -> Option { + match self { + FunctionColumn::ProductLocal { storage, .. } => storage.get(tuple), + _ => None, + } + } + + /// Get field sort IDs for product column (returns None if not product) + pub fn field_sorts(&self) -> Option<&[SortId]> { + match self { + FunctionColumn::ProductLocal { field_sorts, .. } => Some(field_sorts), + _ => None, + } + } + + /// Get product storage (returns None if not product) + pub fn as_product(&self) -> Option<&ProductStorage> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage), + _ => None, + } + } + + /// Get mutable product storage (returns None if not product) + pub fn as_product_mut(&mut self) -> Option<&mut ProductStorage> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage), + _ => None, + } + } + + /// Iterate over defined product entries as (tuple, value) pairs + pub fn iter_product_defined(&self) -> Option, Slid)> + '_>> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage.iter_defined()), + _ => None, + } + } +} + +/// A structure: interpretation of a signature in FinSet +/// +/// This is a model/instance of a theory — a functor from the signature to FinSet: +/// - Each sort maps to a finite set of elements +/// - Each function symbol maps to a function between those sets +/// - Each relation symbol maps to a set of tuples (subset of product of carriers) +/// +/// Elements are identified by Luids (Locally Universal IDs) which reference +/// UUIDs in the global Universe. This allows efficient integer operations +/// while maintaining stable identity across versions. +/// +/// Note: Human-readable names are stored separately in a NamingIndex, keyed by UUID. +/// This structure contains only UUIDs and their relationships. +#[derive(Clone, Debug)] +pub struct Structure { + /// Reference to the theory this is an instance of (Luid of the Theory element) + /// None for structures that ARE theories (e.g., GeologMeta instances) + pub theory_luid: Option, + + /// Global identity: Slid → Luid (references Universe for UUID lookup) + pub luids: Vec, + + /// Reverse lookup: Luid → Slid (for finding elements by their global ID) + pub luid_to_slid: HashMap, + + /// Element sorts: Slid → SortId + pub sorts: Vec, + + /// Carriers: SortId → RoaringTreemap of Slids in that sort + pub carriers: Vec, + + /// Functions: FuncId → FunctionColumn + /// Each column is indexed by domain SortSlid and contains codomain references. + /// Local codomains use Slid; external codomains (from parents) use Luid. + pub functions: Vec, + + /// Relations: RelId → VecRelation (append-only tuple log + membership bitmap) + pub relations: Vec, + + /// Parent instances for parameterized theories (virtual import). + /// Maps param name → UUID of immutable parent instance. + /// E.g., for `problem0 : ExampleNet ReachabilityProblem`, this contains {"N": uuid_of_ExampleNet} + pub parents: HashMap, + + /// Nested structures (for instance-valued fields). + /// Maps field name → nested Structure. + /// E.g., for `initial_marking = { ... }`, this contains {"initial_marking": Structure} + pub nested: HashMap, +} + +/// Function init info: domain sort ID and whether codomain is external +#[derive(Clone, Debug)] +pub struct FunctionInitInfo { + pub domain_sort_id: Option, + pub codomain_is_external: bool, +} + +/// Domain info for function initialization +#[derive(Clone, Debug)] +pub enum FunctionDomainInfo { + /// Base sort domain: just the sort ID + Base(SortId), + /// Product domain: list of sort IDs for each field + Product(Vec), +} + +/// Full function initialization info (domain + codomain) +#[derive(Clone, Debug)] +pub struct FunctionFullInfo { + pub domain: FunctionDomainInfo, + pub codomain: FunctionCodomainInfo, +} + +/// Codomain info for function initialization +#[derive(Clone, Debug)] +pub enum FunctionCodomainInfo { + /// Base sort codomain (local): values are Slids within this structure + Local(SortId), + /// Base sort codomain (external): values are Luids from parent + External, + /// Product codomain: field names and sort IDs + Product { field_names: Vec, field_sorts: Vec }, +} + +impl Structure { + /// Create a new empty structure. + /// Note: functions and relations are not pre-allocated here; call + /// `init_functions()` and `init_relations()` after elements are added. + pub fn new(num_sorts: usize) -> Self { + Self { + theory_luid: None, + luids: Vec::new(), + luid_to_slid: HashMap::new(), + sorts: Vec::new(), + carriers: vec![RoaringTreemap::new(); num_sorts], + functions: Vec::new(), // Initialized later via init_functions() + relations: Vec::new(), // Initialized later via init_relations() + parents: HashMap::new(), + nested: HashMap::new(), + } + } + + /// Create a structure that is an instance of the given theory + pub fn new_instance(theory_luid: Luid, num_sorts: usize) -> Self { + Self { + theory_luid: Some(theory_luid), + ..Self::new(num_sorts) + } + } + + /// Initialize function storage based on domain carrier sizes. + /// Must be called after all elements are added. + /// + /// For simple (non-parameterized) instances, use `init_functions_local()`. + /// For parameterized instances with external codomains, use this method. + pub fn init_functions_ext(&mut self, func_info: &[FunctionInitInfo]) { + self.functions = func_info + .iter() + .map(|info| { + let size = match info.domain_sort_id { + Some(sort_id) => self.carrier_size(sort_id), + None => 0, // Product domains deferred + }; + if info.codomain_is_external { + FunctionColumn::External(vec![None; size]) + } else { + FunctionColumn::Local(vec![None; size]) + } + }) + .collect(); + } + + /// Initialize function storage for simple (non-parameterized) instances. + /// All codomains are assumed to be local. + /// Pass `None` for product-domain functions; pass `Some(sort_id)` for base-domain functions. + pub fn init_functions(&mut self, domain_sort_ids: &[Option]) { + self.functions = domain_sort_ids + .iter() + .map(|opt_sort_id| match opt_sort_id { + Some(sort_id) => FunctionColumn::Local(vec![None; self.carrier_size(*sort_id)]), + None => { + // Legacy: product domains without size info get empty ProductLocal + // Use init_functions_full for proper initialization + FunctionColumn::ProductLocal { + storage: ProductStorage::new_general(), + field_sorts: Vec::new(), + } + } + }) + .collect(); + } + + /// Initialize function storage with full domain info (supports product domains). + pub fn init_functions_full(&mut self, domains: &[FunctionDomainInfo]) { + self.functions = domains + .iter() + .map(|domain| match domain { + FunctionDomainInfo::Base(sort_id) => { + FunctionColumn::Local(vec![None; self.carrier_size(*sort_id)]) + } + FunctionDomainInfo::Product(field_sort_ids) => { + let carrier_sizes: Vec = field_sort_ids + .iter() + .map(|&sort_id| self.carrier_size(sort_id)) + .collect(); + FunctionColumn::ProductLocal { + storage: ProductStorage::new(&carrier_sizes), + field_sorts: field_sort_ids.clone(), + } + } + }) + .collect(); + } + + /// Initialize function storage with complete info (domain AND codomain types). + /// This supports product codomains in addition to product domains. + pub fn init_functions_complete(&mut self, funcs: &[FunctionFullInfo]) { + self.functions = funcs + .iter() + .map(|info| { + match (&info.domain, &info.codomain) { + // Base domain, base local codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::Local(_)) => { + FunctionColumn::Local(vec![None; self.carrier_size(*domain_sort)]) + } + // Base domain, external codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::External) => { + FunctionColumn::External(vec![None; self.carrier_size(*domain_sort)]) + } + // Base domain, product codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::Product { field_names, field_sorts }) => { + let size = self.carrier_size(*domain_sort); + FunctionColumn::ProductCodomain { + field_columns: vec![vec![None; size]; field_names.len()], + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + // Product domain, local codomain + (FunctionDomainInfo::Product(field_sort_ids), FunctionCodomainInfo::Local(_)) => { + let carrier_sizes: Vec = field_sort_ids + .iter() + .map(|&sort_id| self.carrier_size(sort_id)) + .collect(); + FunctionColumn::ProductLocal { + storage: ProductStorage::new(&carrier_sizes), + field_sorts: field_sort_ids.clone(), + } + } + // Product domain with external or product codomain - not yet supported + (FunctionDomainInfo::Product(_), _) => { + // Fall back to ProductLocal with empty storage + FunctionColumn::ProductLocal { + storage: ProductStorage::new_general(), + field_sorts: Vec::new(), + } + } + } + }) + .collect(); + } + + /// Initialize relation storage based on arities. + /// Must be called after all elements are added. + /// + /// `arities[rel_id]` is the number of fields in the relation's domain. + /// For a relation `child : [parent: Node, child: Node]`, arity is 2. + pub fn init_relations(&mut self, arities: &[usize]) { + self.relations = arities.iter().map(|&arity| VecRelation::new(arity)).collect(); + } + + /// Assert a tuple in a relation: R(tuple) + /// Returns true if the tuple was newly inserted. + pub fn assert_relation(&mut self, rel_id: RelId, tuple: Vec) -> bool { + self.relations[rel_id].insert(tuple) + } + + /// Retract a tuple from a relation + /// Returns true if the tuple was present. + pub fn retract_relation(&mut self, rel_id: RelId, tuple: &[Slid]) -> bool { + self.relations[rel_id].remove(tuple) + } + + /// Check if a tuple is in a relation + pub fn query_relation(&self, rel_id: RelId, tuple: &[Slid]) -> bool { + self.relations[rel_id].contains(tuple) + } + + /// Get a reference to a relation's storage + pub fn get_relation(&self, rel_id: RelId) -> &VecRelation { + &self.relations[rel_id] + } + + /// Get a mutable reference to a relation's storage + pub fn get_relation_mut(&mut self, rel_id: RelId) -> &mut VecRelation { + &mut self.relations[rel_id] + } + + /// Get the number of relations in this structure + pub fn num_relations(&self) -> usize { + self.relations.len() + } + + /// Add a new element to the structure, registering its UUID in the universe. + /// Returns the (Slid, Luid) for the new element. + /// Note: Names are registered separately in a NamingIndex. + pub fn add_element(&mut self, universe: &mut Universe, sort_id: SortId) -> (Slid, Luid) { + let uuid = Uuid::now_v7(); + let luid = universe.intern(uuid); + let slid = self.add_element_with_luid(luid, sort_id); + (slid, luid) + } + + /// Add an element with a specific Luid (used when applying patches or loading) + pub fn add_element_with_luid(&mut self, luid: Luid, sort_id: SortId) -> Slid { + let slid = Slid::from_usize(self.luids.len()); + + self.luids.push(luid); + self.luid_to_slid.insert(luid, slid); + self.sorts.push(sort_id); + self.carriers[sort_id].insert(slid.index() as u64); + + slid + } + + /// Add an element with a specific UUID, registering it in the universe. + /// Used when applying patches that reference UUIDs. + pub fn add_element_with_uuid( + &mut self, + universe: &mut Universe, + uuid: Uuid, + sort_id: SortId, + ) -> (Slid, Luid) { + let luid = universe.intern(uuid); + let slid = self.add_element_with_luid(luid, sort_id); + (slid, luid) + } + + /// Define a function value for a local codomain (Slid → Slid). + /// Uses SortSlid indexing into the columnar function storage. + /// Automatically grows the column if needed. + pub fn define_function( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_slid: Slid, + ) -> Result<(), String> { + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::Local(col) => { + // Grow column if needed + if idx >= col.len() { + col.resize(idx + 1, None); // None = undefined + } + if let Some(existing) = get_slid(col[idx]) + && existing != codomain_slid + { + return Err(format!( + "conflicting definition: func {}(slid {}) already defined as slid {}, cannot redefine as slid {}", + func_id, domain_slid, existing, codomain_slid + )); + } + col[idx] = some_slid(codomain_slid); + Ok(()) + } + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for an external codomain (Slid → Luid). + /// Used for functions referencing parent instance elements. + /// Automatically grows the column if needed. + pub fn define_function_ext( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_luid: Luid, + ) -> Result<(), String> { + use crate::id::{get_luid, some_luid}; + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::External(col) => { + // Grow column if needed + if idx >= col.len() { + col.resize(idx + 1, None); // None = undefined + } + if let Some(existing) = get_luid(col[idx]) + && existing != codomain_luid + { + return Err(format!( + "conflicting definition: func {}(slid {}) already defined as luid {}, cannot redefine as luid {}", + func_id, domain_slid, existing, codomain_luid + )); + } + col[idx] = some_luid(codomain_luid); + Ok(()) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has local codomain, use define_function", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for a product domain (tuple of Slids → Slid). + /// Used for functions like `mul : [x: M, y: M] -> M`. + /// + /// The domain_tuple contains Slids which are converted to sort-local indices + /// for storage in the nested Vec structure. + pub fn define_function_product( + &mut self, + func_id: FuncId, + domain_tuple: &[Slid], + codomain_slid: Slid, + ) -> Result<(), String> { + // Convert Slids to sort-local indices for storage + let local_indices: Vec = domain_tuple + .iter() + .map(|&slid| self.sort_local_id(slid).index()) + .collect(); + + match &mut self.functions[func_id] { + FunctionColumn::ProductLocal { storage, .. } => { + storage.set(&local_indices, codomain_slid).map_err(|existing| { + format!( + "conflicting definition: func {}({:?}) already defined as slid {}, cannot redefine as slid {}", + func_id, domain_tuple, existing, codomain_slid + ) + }) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has base domain, use define_function", + func_id + )), + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for a product codomain (Slid → tuple of Slids). + /// Used for functions like `f : A -> [x: B, y: C]`. + /// + /// The codomain_values is a slice of (field_name, Slid) pairs. + pub fn define_function_product_codomain( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_values: &[(&str, Slid)], + ) -> Result<(), String> { + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::ProductCodomain { field_columns, field_names, domain_sort, .. } => { + // Grow columns if needed + for col in field_columns.iter_mut() { + if idx >= col.len() { + col.resize(idx + 1, None); + } + } + + // Set each field value + for (field_name, slid) in codomain_values { + let field_idx = field_names.iter() + .position(|n| n == field_name) + .ok_or_else(|| format!( + "unknown field '{}' in product codomain (available: {:?})", + field_name, field_names + ))?; + + if let Some(existing) = get_slid(field_columns[field_idx][idx]) + && existing != *slid { + return Err(format!( + "conflicting definition: func {}(slid {}).{} already defined as slid {}, cannot redefine as slid {}", + func_id, domain_slid, field_name, existing, slid + )); + } + field_columns[field_idx][idx] = some_slid(*slid); + } + let _ = domain_sort; // silence unused warning + Ok(()) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has local codomain, use define_function", + func_id + )), + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + } + } + + /// Get function value for local codomain (base domain only). + pub fn get_function(&self, func_id: FuncId, domain_sort_slid: SortSlid) -> Option { + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::Local(col) => col.get(idx).and_then(|&opt| get_slid(opt)), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get function value for product codomain. + /// Returns a Vec of (field_name, Slid) pairs, or None if not fully defined. + pub fn get_function_product_codomain( + &self, + func_id: FuncId, + domain_sort_slid: SortSlid, + ) -> Option> { + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::ProductCodomain { field_columns, field_names, .. } => { + // All fields must be defined + let mut result = Vec::with_capacity(field_names.len()); + for (i, name) in field_names.iter().enumerate() { + let slid = get_slid(*field_columns[i].get(idx)?)?; + result.push((name.clone(), slid)); + } + Some(result) + } + _ => None, + } + } + + /// Get function value for external codomain (returns Luid). + pub fn get_function_ext(&self, func_id: FuncId, domain_sort_slid: SortSlid) -> Option { + use crate::id::get_luid; + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::External(col) => col.get(idx).and_then(|&opt| get_luid(opt)), + FunctionColumn::Local(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get function value for product domain. + /// Takes a tuple of Slids and converts them to sort-local indices for lookup. + pub fn get_function_product(&self, func_id: FuncId, domain_tuple: &[Slid]) -> Option { + // Convert Slids to sort-local indices + let local_indices: Vec = domain_tuple + .iter() + .map(|&slid| self.sort_local_id(slid).index()) + .collect(); + + match &self.functions[func_id] { + FunctionColumn::ProductLocal { storage, .. } => storage.get(&local_indices), + FunctionColumn::Local(_) + | FunctionColumn::External(_) + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get the sort-local index for an element (0-based position within its carrier). + /// + /// # Roaring bitmap rank() semantics + /// `rank(x)` returns the count of elements ≤ x in the bitmap. + /// For a bitmap containing {4}: rank(3)=0, rank(4)=1, rank(5)=1. + /// So 0-based index = rank(x) - 1. + pub fn sort_local_id(&self, slid: Slid) -> SortSlid { + let sort_id = self.sorts[slid.index()]; + SortSlid::from_usize((self.carriers[sort_id].rank(slid.index() as u64) - 1) as usize) + } + + /// Look up element by Luid + pub fn lookup_luid(&self, luid: Luid) -> Option { + self.luid_to_slid.get(&luid).copied() + } + + /// Get the Luid for a Slid + pub fn get_luid(&self, slid: Slid) -> Luid { + self.luids[slid.index()] + } + + /// Get the UUID for a Slid (requires Universe lookup) + pub fn get_uuid(&self, slid: Slid, universe: &Universe) -> Option { + universe.get(self.luids[slid.index()]) + } + + /// Get element count + pub fn len(&self) -> usize { + self.luids.len() + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.luids.is_empty() + } + + /// Get carrier size for a sort + pub fn carrier_size(&self, sort_id: SortId) -> usize { + self.carriers[sort_id].len() as usize + } + + /// Get the number of sorts in this structure + pub fn num_sorts(&self) -> usize { + self.carriers.len() + } + + /// Get the number of functions in this structure + pub fn num_functions(&self) -> usize { + self.functions.len() + } +} + +impl std::fmt::Display for DerivedSort { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DerivedSort::Base(id) => write!(f, "Sort#{}", id), + DerivedSort::Product(fields) if fields.is_empty() => write!(f, "()"), + DerivedSort::Product(fields) => { + write!(f, "[")?; + for (i, (name, sort)) in fields.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}: {}", name, sort)?; + } + write!(f, "]") + } + } + } +} + +// ============ Display implementations for debugging ============ + +// Main unit tests moved to tests/proptest_structure.rs + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_derived_sort_cardinality_base() { + let mut structure = Structure::new(2); + // Add elements to sort 0: 3 elements + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Add elements to sort 1: 2 elements + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + + let base0 = DerivedSort::Base(0); + let base1 = DerivedSort::Base(1); + + assert_eq!(base0.cardinality(&structure), 3); + assert_eq!(base1.cardinality(&structure), 2); + } + + #[test] + fn test_derived_sort_cardinality_product() { + let mut structure = Structure::new(2); + // Sort 0: 3 elements + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: 2 elements + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + + // Product [x: A, y: B] where |A| = 3, |B| = 2 should have cardinality 6 + let product = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(1)), + ]); + assert_eq!(product.cardinality(&structure), 6); + } + + #[test] + fn test_derived_sort_cardinality_unit() { + let structure = Structure::new(1); + + // Unit type (empty product) has cardinality 1 + let unit = DerivedSort::unit(); + assert_eq!(unit.cardinality(&structure), 1); + } + + #[test] + fn test_derived_sort_cardinality_empty_carrier() { + let structure = Structure::new(1); + + // Empty carrier has cardinality 0 + let base = DerivedSort::Base(0); + assert_eq!(base.cardinality(&structure), 0); + + // Product with empty carrier has cardinality 0 + let product = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ]); + assert_eq!(product.cardinality(&structure), 0); + } +} diff --git a/src/elaborate/env.rs b/src/elaborate/env.rs new file mode 100644 index 0000000..b2b0e75 --- /dev/null +++ b/src/elaborate/env.rs @@ -0,0 +1,315 @@ +//! Elaboration environment and basic elaboration functions. + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::ast; +use crate::core::*; + +use super::error::{ElabError, ElabResult}; + +/// Environment for elaboration — tracks what's in scope +#[derive(Clone, Debug, Default)] +pub struct Env { + /// Known theories, by name + pub theories: HashMap>, + /// Current theory being elaborated (if any) + pub current_theory: Option, + /// Local signature being built + pub signature: Signature, + /// Parameters in scope (for parameterized theories) + pub params: Vec<(String, Rc)>, +} + +impl Env { + pub fn new() -> Self { + Self::default() + } + + /// Resolve a path like "N/P" where N is a parameter and P is a sort in N's theory. + /// + /// All param sorts are copied into the local signature with qualified names (e.g., "N/P"), + /// so we just need to look up the joined path in the current signature. + pub fn resolve_sort_path(&self, path: &ast::Path) -> ElabResult { + // Join all segments with "/" — this handles both simple names like "F" + // and qualified names like "N/P" + let full_name = path.segments.join("/"); + if let Some(id) = self.signature.lookup_sort(&full_name) { + return Ok(DerivedSort::Base(id)); + } + Err(ElabError::UnknownSort(full_name)) + } + + /// Resolve a function path like "N/in/src" or "F/of". + /// + /// All param functions are copied into the local signature with qualified names, + /// so we just need to look up the joined path. + pub fn resolve_func_path(&self, path: &ast::Path) -> ElabResult { + let full_name = path.segments.join("/"); + if let Some(id) = self.signature.lookup_func(&full_name) { + return Ok(id); + } + Err(ElabError::UnknownFunction(full_name)) + } +} + +/// Elaborate a type expression into a DerivedSort +/// +/// Uses the concatenative stack-based type evaluator. +pub fn elaborate_type(env: &Env, ty: &ast::TypeExpr) -> ElabResult { + use super::types::eval_type_expr; + + let val = eval_type_expr(ty, env)?; + val.as_derived_sort(env) +} + +/// Elaborate a term in a given context +pub fn elaborate_term(env: &Env, ctx: &Context, term: &ast::Term) -> ElabResult { + match term { + ast::Term::Path(path) => { + if path.segments.len() == 1 { + // Simple variable + let name = &path.segments[0]; + if let Some((_, sort)) = ctx.lookup(name) { + return Ok(Term::Var(name.clone(), sort.clone())); + } + return Err(ElabError::UnknownVariable(name.clone())); + } + // Qualified path — could be a variable or a function reference + // For now, treat as variable lookup failure + Err(ElabError::UnknownVariable(path.to_string())) + } + ast::Term::App(base, func) => { + // In surface syntax, application is postfix: `x f` means apply f to x + // So App(base, func) where base is the argument and func is the function + // First, elaborate the base (the argument) + let elab_arg = elaborate_term(env, ctx, base)?; + let arg_sort = elab_arg.sort(&env.signature); + + // Then figure out what the function is + match func.as_ref() { + ast::Term::Path(path) => { + let func_id = env.resolve_func_path(path)?; + let func_sym = &env.signature.functions[func_id]; + + // Type check: argument sort must match function domain + if arg_sort != func_sym.domain { + return Err(ElabError::TypeMismatch { + expected: func_sym.domain.clone(), + got: arg_sort, + }); + } + + Ok(Term::App(func_id, Box::new(elab_arg))) + } + _ => { + // Higher-order application — not supported yet + Err(ElabError::UnsupportedFeature( + "higher-order application".to_string(), + )) + } + } + } + ast::Term::Project(base, field) => { + let elab_base = elaborate_term(env, ctx, base)?; + Ok(Term::Project(Box::new(elab_base), field.clone())) + } + ast::Term::Record(fields) => { + let elab_fields: Result, _> = fields + .iter() + .map(|(name, term)| elaborate_term(env, ctx, term).map(|t| (name.clone(), t))) + .collect(); + Ok(Term::Record(elab_fields?)) + } + } +} + +/// Elaborate a formula +pub fn elaborate_formula(env: &Env, ctx: &Context, formula: &ast::Formula) -> ElabResult { + match formula { + ast::Formula::True => Ok(Formula::True), + ast::Formula::False => Ok(Formula::False), + ast::Formula::Eq(lhs, rhs) => { + let elab_lhs = elaborate_term(env, ctx, lhs)?; + let elab_rhs = elaborate_term(env, ctx, rhs)?; + + // Type check: both sides must have the same sort + let lhs_sort = elab_lhs.sort(&env.signature); + let rhs_sort = elab_rhs.sort(&env.signature); + if lhs_sort != rhs_sort { + return Err(ElabError::TypeMismatch { + expected: lhs_sort, + got: rhs_sort, + }); + } + + Ok(Formula::Eq(elab_lhs, elab_rhs)) + } + ast::Formula::And(conjuncts) => { + let elab: Result, _> = conjuncts + .iter() + .map(|f| elaborate_formula(env, ctx, f)) + .collect(); + Ok(Formula::Conj(elab?)) + } + ast::Formula::Or(disjuncts) => { + let elab: Result, _> = disjuncts + .iter() + .map(|f| elaborate_formula(env, ctx, f)) + .collect(); + Ok(Formula::Disj(elab?)) + } + ast::Formula::Exists(vars, body) => { + // Extend context with quantified variables + let mut extended_ctx = ctx.clone(); + for qv in vars { + let sort = elaborate_type(env, &qv.ty)?; + for name in &qv.names { + extended_ctx = extended_ctx.extend(name.clone(), sort.clone()); + } + } + let elab_body = elaborate_formula(env, &extended_ctx, body)?; + + // Build nested existentials (one for each variable) + let mut result = elab_body; + for qv in vars.iter().rev() { + let sort = elaborate_type(env, &qv.ty)?; + for name in qv.names.iter().rev() { + result = Formula::Exists(name.clone(), sort.clone(), Box::new(result)); + } + } + Ok(result) + } + ast::Formula::RelApp(rel_name, arg) => { + // Look up the relation + let rel_id = env + .signature + .lookup_rel(rel_name) + .ok_or_else(|| ElabError::UnknownRel(rel_name.clone()))?; + + // Elaborate the argument + let elab_arg = elaborate_term(env, ctx, arg)?; + + // Type check: argument must match relation domain + let rel_sym = &env.signature.relations[rel_id]; + let arg_sort = elab_arg.sort(&env.signature); + if arg_sort != rel_sym.domain { + return Err(ElabError::TypeMismatch { + expected: rel_sym.domain.clone(), + got: arg_sort, + }); + } + + Ok(Formula::Rel(rel_id, elab_arg)) + } + } +} + +/// Remap a DerivedSort for nested instance fields. +/// +/// When copying sorts/functions from a nested instance field's theory into the local signature, +/// we need different remapping rules: +/// - Unqualified sorts (like "Token" in Marking) get prefixed with field_prefix (e.g., "RP/initial/Token") +/// - Already-qualified sorts (like "N/P" in Marking) map to the parent param (e.g., just "N/P") +/// +/// # Arguments +/// * `field_prefix` - The prefix for the nested field (e.g., "RP/initial") +/// * `parent_param` - The parent parameter name (e.g., "RP"), used to strip when mapping qualified sorts +#[allow(dead_code)] +pub(crate) fn remap_derived_sort_for_nested( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + field_prefix: &str, + parent_param: &str, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + let qualified_name = if sort_name.contains('/') { + // Already qualified (e.g., "N/P" from a parameterized theory) + // Try to find it directly in the target (e.g., "N/P" should exist from outer param) + // If not found, try with parent param prefix (e.g., "RP/N/P") + if target_sig.lookup_sort(sort_name).is_some() { + sort_name.clone() + } else { + format!("{}/{}", parent_param, sort_name) + } + } else { + // Unqualified sort from the field's theory - prefix with field_prefix + format!("{}/{}", field_prefix, sort_name) + }; + if let Some(target_id) = target_sig.lookup_sort(&qualified_name) { + DerivedSort::Base(target_id) + } else { + // Fallback: just use the source ID (shouldn't happen in well-formed code) + eprintln!( + "Warning: could not remap sort '{}' (qualified: '{}') in nested field", + sort_name, qualified_name + ); + sort.clone() + } + } + DerivedSort::Product(fields) => { + let remapped_fields = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort_for_nested(s, source_sig, target_sig, field_prefix, parent_param), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} + +/// Remap a DerivedSort from one signature namespace to another. +/// +/// When copying sorts/functions from a param theory into the local signature, +/// the sort IDs need to be remapped. For example, if PetriNet has sort P at id=0, +/// and we copy it as "N/P" into local signature at id=2, then any DerivedSort::Base(0) +/// needs to become DerivedSort::Base(2). +/// +/// The `preserve_existing_prefix` flag controls requalification behavior: +/// - false (instance params): always prefix with param_name. N/X becomes M/N/X. +/// - true (extends): preserve existing qualifier. N/X stays N/X. +pub(crate) fn remap_derived_sort( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_name: &str, + preserve_existing_prefix: bool, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + // Look up the sort name in the source signature + let sort_name = &source_sig.sorts[*source_id]; + // Find the corresponding qualified name in target signature + let qualified_name = if preserve_existing_prefix && sort_name.contains('/') { + // Extends case: already-qualified names keep their original qualifier + sort_name.clone() + } else { + // Instance param case OR unqualified name: prefix with param_name + format!("{}/{}", param_name, sort_name) + }; + let target_id = target_sig + .lookup_sort(&qualified_name) + .expect("qualified sort should have been added"); + DerivedSort::Base(target_id) + } + DerivedSort::Product(fields) => { + let remapped_fields = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort(s, source_sig, target_sig, param_name, preserve_existing_prefix), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} diff --git a/src/elaborate/error.rs b/src/elaborate/error.rs new file mode 100644 index 0000000..bdec074 --- /dev/null +++ b/src/elaborate/error.rs @@ -0,0 +1,185 @@ +//! Elaboration error types. + +use crate::core::DerivedSort; + +/// A concrete counterexample showing which variable bindings violate an axiom. +#[derive(Clone, Debug)] +pub struct CounterExample { + /// (variable_name, element_name) pairs showing the violating assignment + pub bindings: Vec<(String, String)>, +} + +impl std::fmt::Display for CounterExample { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let parts: Vec = self + .bindings + .iter() + .map(|(var, elem)| format!("{} = {}", var, elem)) + .collect(); + write!(f, "{{{}}}", parts.join(", ")) + } +} + +/// Elaboration errors +#[derive(Clone, Debug)] +pub enum ElabError { + UnknownSort(String), + UnknownTheory(String), + UnknownFunction(String), + UnknownRel(String), + UnknownVariable(String), + TypeMismatch { + expected: DerivedSort, + got: DerivedSort, + }, + NotASort(String), + NotAFunction(String), + NotARecord(String), + NoSuchField { + record: String, + field: String, + }, + InvalidPath(String), + DuplicateDefinition(String), + UnsupportedFeature(String), + PartialFunction { + func_name: String, + missing_elements: Vec, + }, + /// Type error in function application: element's sort doesn't match function's domain + DomainMismatch { + func_name: String, + element_name: String, + expected_sort: String, + actual_sort: String, + }, + /// Type error in equation: RHS sort doesn't match function's codomain + CodomainMismatch { + func_name: String, + element_name: String, + expected_sort: String, + actual_sort: String, + }, + /// Axiom violation during instance checking + AxiomViolation { + axiom_index: usize, + axiom_name: Option, + num_violations: usize, + /// Concrete counterexamples (limited to first few for readability) + counterexamples: Vec, + }, + /// Chase algorithm failed (e.g., didn't converge) + ChaseFailed(String), + + /// Not enough arguments for a parameterized theory + NotEnoughArgs { + name: String, + expected: usize, + got: usize, + }, + + /// Type expression evaluation error + TypeExprError(String), +} + +impl std::fmt::Display for ElabError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ElabError::UnknownSort(s) => write!(f, "unknown sort: {}", s), + ElabError::UnknownTheory(s) => write!(f, "unknown theory: {}", s), + ElabError::UnknownFunction(s) => write!(f, "unknown function: {}", s), + ElabError::UnknownRel(s) => write!(f, "unknown relation: {}", s), + ElabError::UnknownVariable(s) => write!(f, "unknown variable: {}", s), + ElabError::TypeMismatch { expected, got } => { + write!(f, "type mismatch: expected {}, got {}", expected, got) + } + ElabError::NotASort(s) => write!(f, "not a sort: {}", s), + ElabError::NotAFunction(s) => write!(f, "not a function: {}", s), + ElabError::NotARecord(s) => write!(f, "not a record type: {}", s), + ElabError::NoSuchField { record, field } => { + write!(f, "no field '{}' in record {}", field, record) + } + ElabError::InvalidPath(s) => write!(f, "invalid path: {}", s), + ElabError::DuplicateDefinition(s) => write!(f, "duplicate definition: {}", s), + ElabError::UnsupportedFeature(s) => write!(f, "unsupported feature: {}", s), + ElabError::PartialFunction { + func_name, + missing_elements, + } => { + write!( + f, + "partial function '{}': missing definitions for {:?}", + func_name, missing_elements + ) + } + ElabError::DomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + write!( + f, + "type error: '{}' has sort '{}', but function '{}' expects domain sort '{}'", + element_name, actual_sort, func_name, expected_sort + ) + } + ElabError::CodomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + write!( + f, + "type error: '{}' has sort '{}', but function '{}' has codomain sort '{}'", + element_name, actual_sort, func_name, expected_sort + ) + } + ElabError::AxiomViolation { + axiom_index, + axiom_name, + num_violations, + counterexamples, + } => { + let axiom_desc = if let Some(name) = axiom_name { + format!("axiom '{}' (#{}) violated", name, axiom_index) + } else { + format!("axiom #{} violated", axiom_index) + }; + + if counterexamples.is_empty() { + write!(f, "{}: {} counterexample(s) found", axiom_desc, num_violations) + } else { + writeln!(f, "{}: {} counterexample(s) found", axiom_desc, num_violations)?; + for (i, ce) in counterexamples.iter().enumerate() { + writeln!(f, " #{}: {}", i + 1, ce)?; + } + if *num_violations > counterexamples.len() { + write!( + f, + " ... and {} more", + num_violations - counterexamples.len() + )?; + } + Ok(()) + } + } + ElabError::ChaseFailed(msg) => write!(f, "chase failed: {}", msg), + ElabError::NotEnoughArgs { + name, + expected, + got, + } => { + write!( + f, + "'{}' expects {} argument(s), but only {} provided", + name, expected, got + ) + } + ElabError::TypeExprError(msg) => write!(f, "type expression error: {}", msg), + } + } +} + +pub type ElabResult = Result; diff --git a/src/elaborate/instance.rs b/src/elaborate/instance.rs new file mode 100644 index 0000000..33af6c4 --- /dev/null +++ b/src/elaborate/instance.rs @@ -0,0 +1,1431 @@ +//! Instance elaboration. + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::ast; +use crate::core::*; +use crate::id::{NumericId, Slid}; +use crate::query::chase::chase_fixpoint; +use crate::tensor::check_theory_axioms; +use crate::universe::Universe; + +use super::env::Env; +use super::error::{CounterExample, ElabError, ElabResult}; + +// Re-use remapping utilities from theory elaboration +use super::theory::{collect_type_args_from_theory_type, build_param_subst, remap_sort_for_param_import}; + +/// Minimal context for instance elaboration - what we need from the caller. +/// +/// This replaces the old `Workspace` dependency, making elaboration more modular. +pub struct ElaborationContext<'a> { + /// Available theories + pub theories: &'a HashMap>, + /// Existing instances (for parameterized instance support) + pub instances: &'a HashMap, + /// Universe for allocating new Luids + pub universe: &'a mut Universe, + /// Sibling nested instances (for cross-references within a parent instance) + /// When elaborating nested instances, this contains already-elaborated siblings + pub siblings: HashMap, +} + +/// Result of elaborating an instance. +/// +/// Contains the structure and element name mappings. +#[derive(Debug)] +pub struct InstanceElaborationResult { + /// The elaborated structure + pub structure: Structure, + /// Mapping from Slid to element name (for display) + pub slid_to_name: HashMap, + /// Mapping from element name to Slid (for lookups) + pub name_to_slid: HashMap, + /// Metadata for nested instances (theory name and element names) + pub nested_meta: HashMap, +} + +/// Nested instance metadata for name resolution +#[derive(Clone, Debug, Default)] +pub struct NestedInstanceMeta { + /// Theory name of the nested instance + pub theory_name: String, + /// Map from element names to Slids (within the nested structure) + pub name_to_slid: HashMap, + /// Reverse map from Slids to names + pub slid_to_name: HashMap, +} + +/// An instance entry for elaboration context. +/// +/// This is a simpler version than what's in the REPL - just enough for elaboration. +pub struct InstanceEntry { + /// The structure containing the instance data + pub structure: Structure, + /// The base theory name this instance is of (e.g., "ReachabilityProblem") + pub theory_name: String, + /// The full theory type string (e.g., "ExampleNet ReachabilityProblem") + /// This is needed to compute parameter substitutions when importing elements. + pub theory_type: String, + /// Map from element names to Slids + pub element_names: HashMap, + /// Reverse map from Slids to names + pub slid_to_name: HashMap, + /// Metadata for nested instances (for cross-instance references) + pub nested_meta: HashMap, +} + +impl InstanceEntry { + /// Create a new instance entry + pub fn new(structure: Structure, theory_name: String, theory_type: String) -> Self { + Self { + structure, + theory_name, + theory_type, + element_names: HashMap::new(), + slid_to_name: HashMap::new(), + nested_meta: HashMap::new(), + } + } + + /// Register an element with a name + pub fn register_element(&mut self, name: String, slid: Slid) { + self.element_names.insert(name.clone(), slid); + self.slid_to_name.insert(slid, name); + } + + /// Look up element by local name + pub fn get_element(&self, name: &str) -> Option { + self.element_names.get(name).copied() + } + + /// Get name for Slid + pub fn get_name(&self, slid: Slid) -> Option<&str> { + self.slid_to_name.get(&slid).map(|s| s.as_str()) + } +} + +/// Elaborate an instance declaration into a Structure with element name mappings. +/// +/// This is the context-aware version that supports cross-instance references. +/// For parameterized instances like `marking0 : ExampleNet Marking`, elements +/// from param instances (ExampleNet) are imported into the new structure. +/// +/// Returns both the structure and the element name mappings, so the caller +/// can track names for both local and imported elements. +pub fn elaborate_instance_ctx( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, +) -> ElabResult { + // If needs_chase is set, we skip totality (chase will fill in missing values) + elaborate_instance_ctx_inner(ctx, instance, instance.needs_chase) +} + +/// Elaborate an instance without validating totality. +/// Use this when the chase algorithm will fill in missing function values. +pub fn elaborate_instance_ctx_partial( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, +) -> ElabResult { + elaborate_instance_ctx_inner(ctx, instance, true) +} + +fn elaborate_instance_ctx_inner( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, + skip_totality: bool, +) -> ElabResult { + // Build Env from context theories for theory lookups + let env = Env { + theories: ctx.theories.clone(), + ..Env::new() + }; + + // 1. Resolve the theory type (handles parameterized types like `ExampleNet ReachabilityProblem`) + let resolved = resolve_instance_type(&env, &instance.theory)?; + let theory = env + .theories + .get(&resolved.theory_name) + .ok_or_else(|| ElabError::UnknownTheory(resolved.theory_name.clone()))?; + + // 2. Initialize structure (functions will be initialized after first pass) + let mut structure = Structure::new(theory.theory.signature.sorts.len()); + + // Initialize relation storage from signature + let relation_arities: Vec = theory + .theory + .signature + .relations + .iter() + .map(|rel| rel.domain.arity()) + .collect(); + structure.init_relations(&relation_arities); + + // Track name → Slid for resolving references within this instance + // Also track Slid → name for error messages + let mut name_to_slid: HashMap = HashMap::new(); + let mut slid_to_name: HashMap = HashMap::new(); + + // Track nested instance metadata for cross-instance references + let mut nested_meta: HashMap = HashMap::new(); + + // 2b. Import elements from param instances + // For each param binding (N, ExampleNet), import all elements from ExampleNet + // with their sorts mapped to the local signature (N/P, N/T, etc.) + // + // Also build a mapping from (param_slid -> local_slid) for each param instance + // so we can later import function values. + let mut param_slid_to_local: HashMap<(String, Slid), Slid> = HashMap::new(); + + for (param_name, arg_value) in &resolved.arguments { + // Case 1: argument is an instance name (e.g., "ExampleNet" for N : PetriNet instance) + if let Some(param_entry) = ctx.instances.get(arg_value) { + // Get the param theory to know sort mappings + let param_theory_name = ¶m_entry.theory_name; + if let Some(param_theory) = ctx.theories.get(param_theory_name) { + // Build parameter substitution map for this param instance + // This tells us how to remap sorts from the param instance to local sorts. + // + // For example, if param_entry is `problem0 : ExampleNet ReachabilityProblem`: + // - param_theory = ReachabilityProblem, which has param (N : PetriNet) + // - type_args = ["ExampleNet"] (from problem0's theory_type) + // - param_subst = {"N" -> "ExampleNet"} + let type_args = collect_type_args_from_theory_type(¶m_entry.theory_type); + let param_subst = build_param_subst(param_theory, &type_args); + + // For each element in the param instance, import it + for (&slid, elem_name) in ¶m_entry.slid_to_name { + // Get the element's sort in the param instance + let param_sort_id = param_entry.structure.sorts[slid.index()]; + let param_sort_name = ¶m_theory.theory.signature.sorts[param_sort_id]; + + // Map to local sort using parameter substitution + // This handles cases like "N/P" in problem0 -> "N/P" in solution0 + // (not "RP/N/P" which doesn't exist) + let local_sort_name = remap_sort_for_param_import( + param_sort_name, + param_name, + ¶m_subst, + &resolved.arguments, + ); + let local_sort_id = theory + .theory + .signature + .lookup_sort(&local_sort_name) + .ok_or_else(|| ElabError::UnknownSort(local_sort_name.clone()))?; + + // Get the Luid for this element + let luid = param_entry.structure.get_luid(slid); + + // Add to local structure with the SAME Luid + let local_slid = structure.add_element_with_luid(luid, local_sort_id); + + // Register names: both "N/elemname" and "InstanceName/elemname" + // Also register unqualified "elemname" for convenient access + // (local elements declared later will shadow these if there's a collision) + let qualified_param = format!("{}/{}", param_name, elem_name); + let qualified_instance = format!("{}/{}", arg_value, elem_name); + + name_to_slid.insert(elem_name.clone(), local_slid); + name_to_slid.insert(qualified_param.clone(), local_slid); + name_to_slid.insert(qualified_instance.clone(), local_slid); + slid_to_name.insert(local_slid, qualified_instance); + + // Record mapping for function value import + param_slid_to_local.insert((arg_value.clone(), slid), local_slid); + } + } + } + // Case 2: argument is a sort path (for Sort params) + // Supports paths like: + // - "As/a" -> Instance/Sort + // - "trace/input_terminal" -> SiblingNestedInstance/Sort + // - "problem0/initial_marking/token" -> Instance/NestedInstance/Sort + else if arg_value.contains('/') { + let segments: Vec<&str> = arg_value.split('/').collect(); + + // Helper closure to import elements from a structure/theory pair + let import_elements_from_structure = | + source_structure: &Structure, + source_slid_to_name: &HashMap, + source_theory: &ElaboratedTheory, + source_sort_name: &str, + qualified_prefix: &str, + structure: &mut Structure, + name_to_slid: &mut HashMap, + slid_to_name: &mut HashMap, + param_slid_to_local: &mut HashMap<(String, Slid), Slid>, + param_name: &str, + theory: &ElaboratedTheory, + | -> ElabResult<()> { + if let Some(source_sort_id) = source_theory.theory.signature.lookup_sort(source_sort_name) { + for (&slid, elem_name) in source_slid_to_name { + let elem_sort_id = source_structure.sorts[slid.index()]; + if elem_sort_id == source_sort_id { + let local_sort_id = theory + .theory + .signature + .lookup_sort(param_name) + .ok_or_else(|| ElabError::UnknownSort(param_name.to_string()))?; + + let luid = source_structure.get_luid(slid); + let local_slid = structure.add_element_with_luid(luid, local_sort_id); + + let qualified_source = format!("{}/{}", qualified_prefix, elem_name); + + name_to_slid.insert(elem_name.clone(), local_slid); + name_to_slid.insert(qualified_source.clone(), local_slid); + slid_to_name.insert(local_slid, qualified_source.clone()); + + param_slid_to_local.insert((qualified_prefix.to_string(), slid), local_slid); + } + } + } + Ok(()) + }; + + match segments.len() { + // Case 2a: "Instance/Sort" or "Sibling/Sort" + 2 => { + let source_instance_name = segments[0]; + let source_sort_name = segments[1]; + + if let Some(source_entry) = ctx.instances.get(source_instance_name) + .or_else(|| ctx.siblings.get(source_instance_name)) { + let source_theory_name = &source_entry.theory_name; + if let Some(source_theory) = ctx.theories.get(source_theory_name) { + import_elements_from_structure( + &source_entry.structure, + &source_entry.slid_to_name, + source_theory, + source_sort_name, + source_instance_name, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } + } + // Case 2b: "Instance/NestedInstance/Sort" (e.g., "problem0/initial_marking/token") + 3 => { + let top_instance_name = segments[0]; + let nested_instance_name = segments[1]; + let source_sort_name = segments[2]; + + if let Some(top_entry) = ctx.instances.get(top_instance_name) + .or_else(|| ctx.siblings.get(top_instance_name)) { + // Find the nested structure + if let Some(nested_structure) = top_entry.structure.nested.get(nested_instance_name) { + // Use nested_meta if available for accurate name resolution + if let Some(nested_meta) = top_entry.nested_meta.get(nested_instance_name) { + if let Some(nested_theory) = ctx.theories.get(&nested_meta.theory_name) { + let qualified_prefix = format!("{}/{}", top_instance_name, nested_instance_name); + import_elements_from_structure( + nested_structure, + &nested_meta.slid_to_name, + nested_theory, + source_sort_name, + &qualified_prefix, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } else { + // Fallback: Try to infer from parent theory's instance fields + if let Some(parent_theory) = ctx.theories.get(&top_entry.theory_name) + && let Some(field_idx) = parent_theory.theory.signature.lookup_instance_field(nested_instance_name) { + let field = &parent_theory.theory.signature.instance_fields[field_idx]; + // Get the nested theory name (last word of the type) + let nested_theory_name = field.theory_type + .split_whitespace() + .last() + .unwrap_or(&field.theory_type); + + if let Some(nested_theory) = ctx.theories.get(nested_theory_name) { + // Build slid_to_name for the nested structure by scanning parent's element_names + let mut nested_slid_to_name: HashMap = HashMap::new(); + + // Check the parent's element_names for nested paths like "initial_marking/tok" + for name in top_entry.element_names.keys() { + if let Some(stripped) = name.strip_prefix(&format!("{}/", nested_instance_name)) { + // Find the corresponding slid in the nested structure + // by matching sort-local indices + for slid_idx in 0..nested_structure.len() { + let slid = Slid::from_usize(slid_idx); + if let std::collections::hash_map::Entry::Vacant(e) = nested_slid_to_name.entry(slid) { + e.insert(stripped.to_string()); + break; + } + } + } + } + + let qualified_prefix = format!("{}/{}", top_instance_name, nested_instance_name); + import_elements_from_structure( + nested_structure, + &nested_slid_to_name, + nested_theory, + source_sort_name, + &qualified_prefix, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } + } + } + } + } + _ => { + // Unsupported path depth - silently skip + } + } + } + } + + // 3. First pass: create elements (new elements declared in this instance) + for item in &instance.body { + if let ast::InstanceItem::Element(names, sort_expr) = &item.node { + // Resolve the sort + let sort_id = resolve_instance_sort(&theory.theory.signature, sort_expr)?; + + // Add element for each name in the comma-separated list + for name in names { + // Add element to structure (returns Slid, Luid) + let (slid, _luid) = structure.add_element(ctx.universe, sort_id); + name_to_slid.insert(name.clone(), slid); + slid_to_name.insert(slid, name.clone()); + } + } + } + + // 3b. Initialize function storage now that carrier sizes are known + // Extract both domain and codomain info for each function + let func_infos: Vec = theory + .theory + .signature + .functions + .iter() + .map(|func| { + let domain = match &func.domain { + DerivedSort::Base(id) => FunctionDomainInfo::Base(*id), + DerivedSort::Product(fields) => { + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, // Nested products not supported + }) + .collect(); + FunctionDomainInfo::Product(field_sorts) + } + }; + let codomain = match &func.codomain { + DerivedSort::Base(id) => FunctionCodomainInfo::Local(*id), + DerivedSort::Product(fields) => { + let field_names: Vec = fields.iter().map(|(name, _)| name.clone()).collect(); + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, // Nested products not supported + }) + .collect(); + FunctionCodomainInfo::Product { field_names, field_sorts } + } + }; + FunctionFullInfo { domain, codomain } + }) + .collect(); + structure.init_functions_complete(&func_infos); + + // 3c. Import function values from param instances + // For each param (N, ExampleNet), for each function in param theory (src, tgt), + // import the function values using the local func name (N/src, N/tgt). + for (param_name, instance_name) in &resolved.arguments { + if let Some(param_entry) = ctx.instances.get(instance_name) { + let param_theory_name = ¶m_entry.theory_name; + if let Some(param_theory) = ctx.theories.get(param_theory_name) { + // Build parameter substitution map (same as for element import) + let type_args = collect_type_args_from_theory_type(¶m_entry.theory_type); + let param_subst = build_param_subst(param_theory, &type_args); + + // For each function in the param theory + for (param_func_id, param_func) in + param_theory.theory.signature.functions.iter().enumerate() + { + // Find the corresponding local function using the same remapping logic + let local_func_name = remap_sort_for_param_import( + ¶m_func.name, + param_name, + ¶m_subst, + &resolved.arguments, + ); + let local_func_id = match theory.theory.signature.lookup_func(&local_func_name) { + Some(id) => id, + None => { + // Function might be from a shared param and already imported + // (e.g., N/in/src when N is shared between params) + continue; + } + }; + + // For each element in the domain, copy the function value + if let DerivedSort::Base(param_domain_sort) = ¶m_func.domain { + for param_domain_slid in + param_entry.structure.carriers[*param_domain_sort].iter() + { + let param_domain_slid = Slid::from_usize(param_domain_slid as usize); + + // Get the function value in the param instance + let param_sort_local_id = + param_entry.structure.sort_local_id(param_domain_slid); + if let Some(param_value_slid) = param_entry + .structure + .get_function(param_func_id, param_sort_local_id) + { + // Map both domain and codomain slids to local + if let (Some(&local_domain_slid), Some(&local_value_slid)) = ( + param_slid_to_local + .get(&(instance_name.clone(), param_domain_slid)), + param_slid_to_local + .get(&(instance_name.clone(), param_value_slid)), + ) { + // Define the function value in the local structure + let _ = structure.define_function( + local_func_id, + local_domain_slid, + local_value_slid, + ); + } + } + } + } + } + } + } + } + + // 4. Second pass: process equations (define function values) with type checking + for item in &instance.body { + if let ast::InstanceItem::Equation(lhs, rhs) = &item.node { + // Decompose lhs: `element func_path` or `[x: a, y: b] func_path` + let decomposed = + decompose_func_app(lhs, &name_to_slid, &theory.theory.signature)?; + + match decomposed { + DecomposedFuncApp::Base { elem, func_id } => { + // Type checking: verify element sort matches function domain + let func = &theory.theory.signature.functions[func_id]; + let elem_sort_id = structure.sorts[elem.index()]; + if let DerivedSort::Base(expected_domain) = &func.domain + && elem_sort_id != *expected_domain + { + return Err(ElabError::DomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&elem) + .cloned() + .unwrap_or_else(|| format!("slid_{}", elem)), + expected_sort: theory.theory.signature.sorts[*expected_domain].clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + + // Check if codomain is a product (needs Record RHS) or base (needs element RHS) + match &func.codomain { + DerivedSort::Base(expected_codomain) => { + // Base codomain: resolve RHS to single element + let value_slid = resolve_instance_element(rhs, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + if value_sort_id != *expected_codomain { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&value_slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", value_slid)), + expected_sort: theory.theory.signature.sorts[*expected_codomain].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + // Define the function value + structure + .define_function(func_id, elem, value_slid) + .map_err(ElabError::DuplicateDefinition)?; + } + DerivedSort::Product(codomain_fields) => { + // Product codomain: RHS must be a Record + let rhs_fields = match rhs { + ast::Term::Record(fields) => fields, + _ => return Err(ElabError::UnsupportedFeature(format!( + "function {} has product codomain, RHS must be a record literal like [field1: v1, field2: v2], got {:?}", + func.name, rhs + ))), + }; + + // Resolve each field value and type-check + let mut codomain_values: Vec<(&str, Slid)> = Vec::with_capacity(rhs_fields.len()); + for (field_name, field_term) in rhs_fields { + // Find the expected sort for this field + let expected_sort = codomain_fields.iter() + .find(|(name, _)| name == field_name) + .ok_or_else(|| ElabError::UnsupportedFeature(format!( + "unknown field '{}' in codomain of function {} (expected: {:?})", + field_name, func.name, + codomain_fields.iter().map(|(n, _)| n).collect::>() + )))?; + + let value_slid = resolve_instance_element(field_term, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + + if let DerivedSort::Base(expected_sort_id) = &expected_sort.1 + && value_sort_id != *expected_sort_id { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: format!("field '{}': {}", field_name, + slid_to_name.get(&value_slid).cloned().unwrap_or_else(|| format!("slid_{}", value_slid))), + expected_sort: theory.theory.signature.sorts[*expected_sort_id].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + + codomain_values.push((field_name.as_str(), value_slid)); + } + + // Define the product codomain function value + structure + .define_function_product_codomain(func_id, elem, &codomain_values) + .map_err(ElabError::DuplicateDefinition)?; + } + } + } + + DecomposedFuncApp::Product { tuple, func_id } => { + let func = &theory.theory.signature.functions[func_id]; + + // Type checking: verify tuple elements match product domain fields + if let DerivedSort::Product(domain_fields) = &func.domain { + if tuple.len() != domain_fields.len() { + return Err(ElabError::UnsupportedFeature(format!( + "product domain arity mismatch: expected {}, got {}", + domain_fields.len(), + tuple.len() + ))); + } + + for (slid, (field_name, field_sort)) in tuple.iter().zip(domain_fields.iter()) { + let elem_sort_id = structure.sorts[slid.index()]; + if let DerivedSort::Base(expected_sort) = field_sort + && elem_sort_id != *expected_sort { + return Err(ElabError::DomainMismatch { + func_name: func.name.clone(), + element_name: format!( + "field {} ({})", + field_name, + slid_to_name + .get(slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)) + ), + expected_sort: theory.theory.signature.sorts[*expected_sort] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id] + .clone(), + }); + } + } + } else { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product LHS but non-product domain {:?}", + func.name, func.domain + ))); + } + + // Handle codomain: base vs product + match &func.codomain { + DerivedSort::Base(expected_codomain) => { + // Resolve RHS to single element + let value_slid = resolve_instance_element(rhs, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + if value_sort_id != *expected_codomain { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&value_slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", value_slid)), + expected_sort: theory.theory.signature.sorts[*expected_codomain].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + // Define the function value for product domain + structure + .define_function_product(func_id, &tuple, value_slid) + .map_err(ElabError::DuplicateDefinition)?; + } + DerivedSort::Product(_) => { + // Product domain with product codomain: not yet supported + return Err(ElabError::UnsupportedFeature(format!( + "function {} has both product domain and product codomain (not yet supported)", + func.name + ))); + } + } + } + } + } + } + + // 5. Third pass: relation assertions (assert relation tuples) + for item in &instance.body { + if let ast::InstanceItem::RelationAssertion(term, rel_name) = &item.node { + // Find the relation in the signature + let rel_id = theory + .theory + .signature + .lookup_rel(rel_name) + .ok_or_else(|| ElabError::UnknownRel(rel_name.clone()))?; + + let rel = &theory.theory.signature.relations[rel_id]; + + // Build the tuple of Slids from the term + let domain = &rel.domain; + let tuple = match (term, domain) { + // Unary relation with simple element: `element relation;` + (ast::Term::Path(_), DerivedSort::Product(expected_fields)) + if expected_fields.len() == 1 => + { + let slid = resolve_instance_element(term, &name_to_slid)?; + + // Type check + let elem_sort_id = structure.sorts[slid.index()]; + if let &DerivedSort::Base(expected_sort_id) = &expected_fields[0].1 + && elem_sort_id != expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[expected_sort_id] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + vec![slid] + } + + // Multi-ary relation with record: `[field: value, ...] relation;` + (ast::Term::Record(fields), DerivedSort::Product(expected_fields)) => { + if fields.len() != expected_fields.len() { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} expects {} fields, got {}", + rel_name, + expected_fields.len(), + fields.len() + ))); + } + + // Build tuple in the correct field order + // Supports both named fields and positional fields: + // - Named: `[item: a, on: b]` matches by field name + // - Positional: `[a, b]` maps "0" to first field, "1" to second, etc. + // - Mixed: `[a, on: b]` uses position for "0", name for "on" + let mut tuple = Vec::with_capacity(expected_fields.len()); + for (idx, (expected_name, expected_sort)) in expected_fields.iter().enumerate() + { + let field_value = fields + .iter() + .find(|(name, _)| { + // Positional fields (named "0", "1", etc.) match by index + if let Ok(pos_idx) = name.parse::() { + pos_idx == idx + } else { + // Named fields match by name + name == expected_name.as_str() + } + }) + .ok_or_else(|| { + ElabError::UnsupportedFeature(format!( + "missing field '{}' (position {}) in relation assertion", + expected_name, idx + )) + })?; + + // Resolve the field value to a Slid + let slid = resolve_instance_element(&field_value.1, &name_to_slid)?; + + // Type check: verify element sort matches field sort + let elem_sort_id = structure.sorts[slid.index()]; + if let &DerivedSort::Base(expected_sort_id) = expected_sort + && elem_sort_id != expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[expected_sort_id] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + + tuple.push(slid); + } + tuple + } + + // Unary relation with base sort domain: `element relation;` + // This handles `rel : Sort -> Prop` (without bracket syntax) + (ast::Term::Path(_), DerivedSort::Base(expected_sort_id)) => { + let slid = resolve_instance_element(term, &name_to_slid)?; + + // Type check + let elem_sort_id = structure.sorts[slid.index()]; + if elem_sort_id != *expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[*expected_sort_id].clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + vec![slid] + } + + // Mismatch: using simple element for non-unary relation + (ast::Term::Path(_), DerivedSort::Product(expected_fields)) => { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} has {} fields, use record syntax [field: value, ...]", + rel_name, + expected_fields.len() + ))); + } + + _ => { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} has non-product domain {:?}", + rel_name, domain + ))); + } + }; + + // Assert the relation tuple + structure.assert_relation(rel_id, tuple); + } + } + + // 6. Fourth pass: nested instances + // For each nested instance like `initial_marking = { t : Token; ... };` + // 1. Find the instance field declaration in the theory + // 2. Resolve its theory type (e.g., "N Marking") with parameter substitution + // 3. Recursively elaborate the nested instance body + // 4. Store in the parent structure's `nested` HashMap + for item in &instance.body { + if let ast::InstanceItem::NestedInstance(field_name, nested_decl) = &item.node { + // 1. Look up the instance field in the parent theory signature + let field_idx = theory + .theory + .signature + .lookup_instance_field(field_name) + .ok_or_else(|| { + ElabError::UnknownVariable(format!("nested instance field: {}", field_name)) + })?; + + let instance_field = &theory.theory.signature.instance_fields[field_idx]; + + // 2. Resolve the theory type with parameter substitution + // The theory_type string can be like: + // - "N Marking" -> simple param at start + // - "(trace/input_terminal) (RP/initial_marking/token) Iso" -> params in paths + // We need to substitute parameter names in paths, handling: + // - Exact matches: "N" -> "ExampleNet" + // - Path prefixes: "RP/initial_marking/token" -> "problem0/initial_marking/token" + let resolved_theory_type = { + let mut result = instance_field.theory_type.clone(); + for (param_name, actual_instance_name) in &resolved.arguments { + // Replace param at path start: "RP/..." -> "problem0/..." + let path_pattern = format!("{}/", param_name); + let path_replacement = format!("{}/", actual_instance_name); + result = result.replace(&path_pattern, &path_replacement); + + // Replace exact param (word boundary): only if surrounded by non-alphanumeric + // Split by whitespace and handle each token + let parts: Vec = result + .split_whitespace() + .map(|p| { + // Strip parens for comparison + let stripped = p.trim_start_matches('(').trim_end_matches(')'); + if stripped == param_name { + // Replace the param name, keeping any parens + let prefix = if p.starts_with('(') { "(" } else { "" }; + let suffix = if p.ends_with(')') { ")" } else { "" }; + format!("{}{}{}", prefix, actual_instance_name, suffix) + } else { + p.to_string() + } + }) + .collect(); + result = parts.join(" "); + } + result + }; + + // 3. Find the resolved theory + // Parse the resolved type string to get the theory name + // For "ExampleNet Marking", we need to get the "Marking" theory + let nested_theory_name = resolved_theory_type + .split_whitespace() + .last() + .unwrap_or(&resolved_theory_type) + .to_string(); + + let nested_theory = ctx.theories.get(&nested_theory_name).ok_or_else(|| { + ElabError::UnknownTheory(format!( + "nested instance theory: {} (from field type: {})", + nested_theory_name, instance_field.theory_type + )) + })?; + + // 4. Create a new instance declaration with the resolved type + // Build the type expression from the resolved string + let nested_instance_decl = ast::InstanceDecl { + theory: parse_type_expr_from_string(&resolved_theory_type)?, + name: format!("{}_{}", instance.name, field_name), + body: nested_decl.body.clone(), + needs_chase: false, // Nested instances don't separately chase + }; + + // 5. Recursively elaborate the nested instance + let nested_result = elaborate_instance_ctx(ctx, &nested_instance_decl)?; + + // 6. Store the nested structure using the field name as the key + structure.nested.insert(field_name.clone(), nested_result.structure.clone()); + + // 7. Add this nested instance to siblings for cross-referencing by subsequent nested instances + // e.g., after elaborating `trace = {...}`, make it available so `initial_iso` can reference `trace/it` + let sibling_entry = InstanceEntry { + structure: nested_result.structure.clone(), + theory_name: nested_theory_name.clone(), + theory_type: resolved_theory_type.clone(), + element_names: nested_result.name_to_slid.clone(), + slid_to_name: nested_result.slid_to_name.clone(), + nested_meta: nested_result.nested_meta.clone(), + }; + ctx.siblings.insert(field_name.clone(), sibling_entry); + + // 8. Record nested metadata for inclusion in elaboration result + nested_meta.insert(field_name.clone(), NestedInstanceMeta { + theory_name: nested_theory_name.clone(), + name_to_slid: nested_result.name_to_slid, + slid_to_name: nested_result.slid_to_name, + }); + + // Suppress unused variable warning + let _ = nested_theory; // Used for type checking (could add validation later) + } + } + + // 6. Validate totality: all functions must be defined on all elements of their domain + // Skip this check when creating instances for chase (which will fill in missing values) + if !skip_totality { + validate_totality(&structure, &theory.theory.signature, &slid_to_name)?; + } + + // 7. Run chase if requested (fills in missing values according to axioms) + if instance.needs_chase { + const MAX_CHASE_ITERATIONS: usize = 1000; + // Chase now uses tensor system for premise evaluation - handles existentials, etc. + chase_fixpoint( + &theory.theory.axioms, + &mut structure, + ctx.universe, + &theory.theory.signature, + MAX_CHASE_ITERATIONS, + ) + .map_err(|e| ElabError::ChaseFailed(e.to_string()))?; + } + + // 8. Check axioms - all instances must satisfy the theory's axioms + let axioms: Vec<_> = theory.theory.axioms.clone(); + let violations = check_theory_axioms(&axioms, &structure, &theory.theory.signature); + + if !violations.is_empty() { + // Report the first violation with detailed counterexamples + let (axiom_idx, violation_list) = &violations[0]; + + // Get the actual axiom name from the theory + let axiom_name = theory + .theory + .axiom_names + .get(*axiom_idx) + .cloned(); + + // Build counterexamples with element names (limit to 5 for readability) + let axiom = &theory.theory.axioms[*axiom_idx]; + let counterexamples: Vec = violation_list + .iter() + .take(5) + .map(|v| { + let bindings: Vec<(String, String)> = v + .variable_names + .iter() + .zip(&v.assignment) + .map(|(var_name, &idx)| { + // Look up the variable's sort from the axiom context by name + let elem_name = axiom + .context + .vars + .iter() + .find(|(name, _)| name == var_name) + .and_then(|(_, sort)| { + // Get the sort id (assuming DerivedSort::Base for now) + if let DerivedSort::Base(sort_id) = sort { + // Get the Slid at index idx from the carrier (RoaringTreemap) + structure.carriers.get(*sort_id).and_then(|carrier| { + // Iterate to the idx-th element + carrier.iter().nth(idx).map(|slid_u64| { + let slid = Slid::from_usize(slid_u64 as usize); + slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("#{}", idx)) + }) + }) + } else { + None + } + }) + .unwrap_or_else(|| format!("#{}", idx)); + (var_name.clone(), elem_name) + }) + .collect(); + CounterExample { bindings } + }) + .collect(); + + return Err(ElabError::AxiomViolation { + axiom_index: *axiom_idx, + axiom_name, + num_violations: violation_list.len(), + counterexamples, + }); + } + + Ok(InstanceElaborationResult { + structure, + slid_to_name, + name_to_slid, + nested_meta, + }) +} + +// ============================================================================ +// HELPER TYPES AND FUNCTIONS +// ============================================================================ + +/// Result of resolving a (possibly parameterized) instance type. +/// +/// For `ExampleNet ReachabilityProblem`: +/// - theory_name = "ReachabilityProblem" +/// - arguments = vec![("N", "ExampleNet")] +/// +/// For simple `PetriNet`: +/// - theory_name = "PetriNet" +/// - arguments = vec![] +struct ResolvedInstanceType { + theory_name: String, + /// (param_name, instance_name) pairs + arguments: Vec<(String, String)>, +} + +/// Resolve a type expression to a theory name and its arguments. +/// +/// In curried application syntax, the theory is at the end: +/// - Simple: `PetriNet` -> ("PetriNet", []) +/// - Single param: `ExampleNet Marking` -> ("Marking", [("N", "ExampleNet")]) +/// - Multiple params: `ExampleNet problem0 ReachabilityProblem/Solution` -> ("ReachabilityProblem/Solution", [("N", "ExampleNet"), ("RP", "problem0")]) +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// The last path token is the theory name, earlier ones are arguments. +fn resolve_instance_type(env: &Env, ty: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // Collect all path tokens (the theory and its arguments) + let paths: Vec = ty + .tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + if paths.is_empty() { + return Err(ElabError::TypeExprError( + "no theory name in type expression".to_string(), + )); + } + + // Last path is the theory name + let theory_name = paths.last().unwrap().clone(); + + // Earlier paths are arguments (in order) + let args: Vec = paths[..paths.len() - 1].to_vec(); + + // Look up the theory to get parameter names + let theory = env + .theories + .get(&theory_name) + .ok_or_else(|| ElabError::UnknownTheory(theory_name.clone()))?; + + // Match up arguments with parameters + if args.len() != theory.params.len() { + return Err(ElabError::NotEnoughArgs { + name: theory_name, + expected: theory.params.len(), + got: args.len(), + }); + } + + let arguments: Vec<(String, String)> = theory + .params + .iter() + .zip(args.iter()) + .map(|(param, arg)| (param.name.clone(), arg.clone())) + .collect(); + + Ok(ResolvedInstanceType { + theory_name, + arguments, + }) +} + +/// Resolve a sort expression within an instance (using the theory's signature) +fn resolve_instance_sort(sig: &Signature, sort_expr: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // For sort expressions, we expect a single path token + if let Some(path) = sort_expr.as_single_path() { + let name = path.to_string(); + sig.lookup_sort(&name) + .ok_or(ElabError::UnknownSort(name)) + } else { + // Check if there's any path token at all + for token in &sort_expr.tokens { + if let TypeToken::Path(path) = token { + let name = path.to_string(); + return sig + .lookup_sort(&name) + .ok_or(ElabError::UnknownSort(name)); + } + } + Err(ElabError::TypeExprError(format!( + "no path in sort expression: {:?}", + sort_expr + ))) + } +} + +/// Result of decomposing a function application's LHS +enum DecomposedFuncApp { + /// Base domain: `element func` → single element + Base { elem: Slid, func_id: FuncId }, + /// Product domain: `[x: a, y: b] func` → tuple of elements + Product { tuple: Vec, func_id: FuncId }, +} + +/// Decompose a function application term like `ab_in in/src` or `[x: u, y: u] mul` +/// Returns either Base (single element) or Product (tuple of elements) with func_id +fn decompose_func_app( + term: &ast::Term, + name_to_slid: &HashMap, + sig: &Signature, +) -> ElabResult { + match term { + ast::Term::App(base, func) => { + // func should be a function path + let func_id = match func.as_ref() { + ast::Term::Path(path) => { + let func_name = path.to_string(); + sig.lookup_func(&func_name) + .ok_or(ElabError::UnknownFunction(func_name)) + } + _ => Err(ElabError::NotAFunction(format!("{:?}", func))), + }?; + + // base can be either: + // - a single element name (base domain) + // - a record like [x: a, y: b] (product domain) + match base.as_ref() { + ast::Term::Record(fields) => { + // Product domain: [x: a, y: b] func + let tuple: Vec = fields + .iter() + .map(|(_, term)| resolve_instance_element(term, name_to_slid)) + .collect::>>()?; + Ok(DecomposedFuncApp::Product { tuple, func_id }) + } + _ => { + // Base domain: element func + let elem_slid = resolve_instance_element(base, name_to_slid)?; + Ok(DecomposedFuncApp::Base { + elem: elem_slid, + func_id, + }) + } + } + } + _ => Err(ElabError::NotAFunction(format!( + "expected function application, got {:?}", + term + ))), + } +} + +/// Resolve a term to an element Slid +/// +/// Handles both simple names ("v1") and qualified paths ("ExampleNet/t1"). +/// For multi-segment paths, joins with "/" and looks up in name_to_slid. +fn resolve_instance_element( + term: &ast::Term, + name_to_slid: &HashMap, +) -> ElabResult { + match term { + ast::Term::Path(path) => { + // Join all segments with "/" for lookup + // This handles both "v1" and "ExampleNet/t1" + let name = path.segments.join("/"); + name_to_slid + .get(&name) + .copied() + .ok_or(ElabError::UnknownVariable(name)) + } + _ => Err(ElabError::UnsupportedFeature(format!( + "complex element reference: {:?}", + term + ))), + } +} + +/// Check that all functions in the structure are total (defined on every element of their domain) +fn validate_totality( + structure: &Structure, + sig: &Signature, + slid_to_name: &HashMap, +) -> ElabResult<()> { + use crate::core::FunctionColumn; + + for (func_id, func_sym) in sig.functions.iter().enumerate() { + let mut missing = Vec::new(); + let func_col = &structure.functions[func_id]; + + match (&func_sym.domain, func_col) { + // Base domain with local codomain + (DerivedSort::Base(domain_sort_id), FunctionColumn::Local(col)) => { + for (sort_slid, opt_slid) in col.iter().enumerate() { + if opt_slid.is_none() { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + missing.push(name); + } + } + } + + // Base domain with external codomain + (DerivedSort::Base(domain_sort_id), FunctionColumn::External(col)) => { + for (sort_slid, opt_luid) in col.iter().enumerate() { + if opt_luid.is_none() { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + missing.push(name); + } + } + } + + // Product domain: check all tuples in the cartesian product + (DerivedSort::Product(fields), FunctionColumn::ProductLocal { storage, .. }) => { + // Collect carriers for each field + let field_carriers: Vec> = fields + .iter() + .map(|(_, ds)| match ds { + DerivedSort::Base(sort_id) => structure.carriers[*sort_id] + .iter() + .map(|s| Slid::from_usize(s as usize)) + .collect(), + DerivedSort::Product(_) => { + // Nested products not yet supported + vec![] + } + }) + .collect(); + + // Enumerate all tuples via cartesian product + for tuple in cartesian_product(&field_carriers) { + // Convert Slids to sort-local indices for storage lookup + let local_indices: Vec = tuple + .iter() + .map(|slid| structure.sort_local_id(*slid).index()) + .collect(); + + if storage.get(&local_indices).is_none() { + // Format the missing tuple nicely + let tuple_str: Vec = tuple + .iter() + .zip(fields.iter()) + .map(|(slid, (field_name, _))| { + let elem_name = slid_to_name + .get(slid) + .cloned() + .unwrap_or_else(|| format!("#{}", slid)); + format!("{}: {}", field_name, elem_name) + }) + .collect(); + missing.push(format!("[{}]", tuple_str.join(", "))); + } + } + } + + // Base domain with product codomain: check all field columns + (DerivedSort::Base(domain_sort_id), FunctionColumn::ProductCodomain { field_columns, field_names, .. }) => { + // For product codomains, a value is defined if ALL fields are defined + let carrier_size = structure.carrier_size(*domain_sort_id); + for sort_slid in 0..carrier_size { + // Check if any field is undefined for this element + let all_defined = field_columns.iter().all(|col| { + col.get(sort_slid) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_some() + }); + if !all_defined { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + // Find which fields are missing + let missing_fields: Vec<_> = field_columns.iter() + .zip(field_names.iter()) + .filter(|(col, _)| { + col.get(sort_slid) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_none() + }) + .map(|(_, name)| name.as_str()) + .collect(); + missing.push(format!("{} (fields: {:?})", name, missing_fields)); + } + } + } + + // Mismatched domain/column types (shouldn't happen if init is correct) + (DerivedSort::Base(_), FunctionColumn::ProductLocal { .. }) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has base domain but product storage", + func_sym.name + ))); + } + (DerivedSort::Product(_), FunctionColumn::Local(_) | FunctionColumn::External(_)) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product domain but columnar storage", + func_sym.name + ))); + } + (DerivedSort::Product(_), FunctionColumn::ProductCodomain { .. }) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product domain with product codomain (not yet supported)", + func_sym.name + ))); + } + } + + if !missing.is_empty() { + return Err(ElabError::PartialFunction { + func_name: func_sym.name.clone(), + missing_elements: missing, + }); + } + } + + Ok(()) +} + +/// Generate cartesian product of vectors +fn cartesian_product(sets: &[Vec]) -> Vec> { + if sets.is_empty() { + return vec![vec![]]; // Single empty tuple for nullary product + } + + let mut result = vec![vec![]]; + for set in sets { + let mut new_result = Vec::new(); + for tuple in &result { + for &elem in set { + let mut new_tuple = tuple.clone(); + new_tuple.push(elem); + new_result.push(new_tuple); + } + } + result = new_result; + } + result +} + +/// Parse a simple type expression from a string like "ExampleNet Marking" +/// +/// With concatenative parsing, this just creates a flat list of path tokens. +fn parse_type_expr_from_string(s: &str) -> ElabResult { + use crate::ast::TypeToken; + + let tokens: Vec<&str> = s.split_whitespace().collect(); + + if tokens.is_empty() { + return Err(ElabError::TypeExprError( + "empty type expression".to_string(), + )); + } + + // Simply create a TypeToken::Path for each token + let type_tokens: Vec = tokens + .iter() + .map(|&t| TypeToken::Path(ast::Path::single(t.to_string()))) + .collect(); + + Ok(ast::TypeExpr { tokens: type_tokens }) +} diff --git a/src/elaborate/mod.rs b/src/elaborate/mod.rs new file mode 100644 index 0000000..f8a5976 --- /dev/null +++ b/src/elaborate/mod.rs @@ -0,0 +1,17 @@ +//! Elaboration: surface syntax → typed core representation +//! +//! This module transforms the untyped AST into the typed core representation, +//! performing name resolution and type checking along the way. + +mod env; +mod error; +mod instance; +mod theory; +pub mod types; + +// Re-export main types and functions +pub use env::{elaborate_formula, elaborate_term, elaborate_type, Env}; +pub use error::{ElabError, ElabResult}; +pub use instance::{ElaborationContext, InstanceElaborationResult, InstanceEntry, elaborate_instance_ctx, elaborate_instance_ctx_partial}; +pub use theory::elaborate_theory; +pub use types::{eval_type_expr, TypeValue}; diff --git a/src/elaborate/theory.rs b/src/elaborate/theory.rs new file mode 100644 index 0000000..fde2549 --- /dev/null +++ b/src/elaborate/theory.rs @@ -0,0 +1,739 @@ +//! Theory elaboration. + +use std::collections::HashMap; + +use crate::ast; +use crate::core::*; + +use super::env::{elaborate_formula, elaborate_type, remap_derived_sort, Env}; +use super::error::{ElabError, ElabResult}; + +/// Elaborate a theory declaration +pub fn elaborate_theory(env: &mut Env, theory: &ast::TheoryDecl) -> ElabResult { + // Set up the environment for this theory + let mut local_env = env.clone(); + local_env.current_theory = Some(theory.name.clone()); + local_env.signature = Signature::new(); + + // Track extended theories for transitive closure semantics + let mut extends_chain: Vec = Vec::new(); + + // Process extends clause (if any) + // This is like a parameter, but: + // 1. Uses the parent theory name as the qualifier (e.g., GeologMeta/Srt) + // 2. Establishes an "is-a" relationship with transitive closure + // + // For transitive extends (A extends B extends C), we use "requalified" semantics: + // - Sorts/funcs already qualified (from grandparents) keep their original qualifier + // - Only unqualified items (parent's own) get the parent prefix + // This gives A: { C/X, C/Y, B/Foo } rather than { B/C/X, B/C/Y, B/Foo } + if let Some(ref parent_path) = theory.extends { + let parent_name = parent_path.segments.join("/"); + if let Some(parent_theory) = env.theories.get(&parent_name) { + // Record the extends relationship (including transitive parents) + extends_chain.push(parent_name.clone()); + + // Helper: check if a name is already qualified from a grandparent + // A name like "Grandparent/X" is grandparent-qualified if "Grandparent" is NOT + // a sort in the parent theory (i.e., it's a theory name, not a naming convention). + // Names like "Func/dom" where "Func" IS a sort use '/' as naming convention. + let is_grandparent_qualified = |name: &str| -> bool { + if let Some((prefix, _)) = name.split_once('/') { + // If the prefix is a sort in parent, it's naming convention, not grandparent + parent_theory.theory.signature.lookup_sort(prefix).is_none() + } else { + false + } + }; + + // Helper: qualify a name - only prefix if not already qualified from grandparent + let qualify = |name: &str| -> String { + if is_grandparent_qualified(name) { + // Already qualified from grandparent - keep as-is + name.to_string() + } else { + // Parent's own item (possibly with naming convention '/') - add parent prefix + format!("{}/{}", parent_name, name) + } + }; + + // Copy all sorts with requalified names + for sort_name in &parent_theory.theory.signature.sorts { + let qualified_name = qualify(sort_name); + local_env.signature.add_sort(qualified_name); + } + + // Copy all functions with requalified names + for func in &parent_theory.theory.signature.functions { + let qualified_name = qualify(&func.name); + // For domain/codomain remapping, always use parent_name because + // the source signature uses the parent's namespace. The + // preserve_existing_prefix flag handles grandparent-qualified sorts. + let domain = remap_derived_sort( + &func.domain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + let codomain = remap_derived_sort( + &func.codomain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + local_env + .signature + .add_function(qualified_name, domain, codomain); + } + + // Copy all relations with requalified names + for rel in &parent_theory.theory.signature.relations { + let qualified_name = qualify(&rel.name); + // Same as functions: always use parent_name for remapping + let domain = remap_derived_sort( + &rel.domain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + local_env.signature.add_relation(qualified_name, domain); + } + + // Note: axioms are inherited but we don't copy them yet + // (they reference the parent's sort/func IDs which need remapping) + } else { + return Err(ElabError::UnknownTheory(parent_name)); + } + } + + // Process parameters + // When we have `theory (N : PetriNet instance) Trace { ... }`, we need to: + // 1. Copy all sorts from PetriNet into local signature with qualified names (N/P, N/T, etc.) + // 2. Copy all functions with qualified names (N/in/src, etc.) + // This ensures all sort/func IDs are in a single namespace. + let mut params = Vec::new(); + for param in &theory.params { + // "T instance" parameters — the theory depends on an instance of another theory + if param.ty.is_instance() { + let inner = param.ty.instance_inner().unwrap(); + // Handle both simple (PetriNet instance) and parameterized (N ReachabilityProblem instance) cases + let theory_name = extract_theory_name(&inner)?; + if let Some(base_theory) = env.theories.get(&theory_name) { + // Build mapping from base_theory's instance params to our type args + // For `RP : N ReachabilityProblem instance`: + // - collect_type_args returns ["N"] (all paths except the theory name) + // - base_theory.params = [("N", "PetriNet")] + // - mapping = {"N" -> "N"} + let mut type_args = Vec::new(); + collect_type_args(&inner, &mut type_args); + + // Build param substitution map: base_theory param name -> our type arg value + let mut param_subst: HashMap = HashMap::new(); + for (bp, arg) in base_theory.params.iter().zip(type_args.iter()) { + if bp.theory_name != "Sort" { + // Instance param - map its name to the type arg + param_subst.insert(bp.name.clone(), arg.clone()); + } + } + + // Copy all sorts from param theory into local signature + // But for sorts that come from a param that we're binding to an outer param, + // reuse the outer param's sort instead of creating a duplicate. + for sort_name in &base_theory.theory.signature.sorts { + // Check if this sort starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = sort_name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + // This sort is from a param we're binding - use the substituted prefix + let substituted_name = format!("{}/{}", subst, suffix); + // If this sort already exists (from an outer param), don't add it again + if local_env.signature.lookup_sort(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + // Not from a substituted param - prefix with our param name + format!("{}/{}", param.name, sort_name) + } + } else { + // Unqualified sort (the theory's own sort) - prefix with our param name + format!("{}/{}", param.name, sort_name) + }; + local_env.signature.add_sort(qualified_name); + } + + // Copy all functions from param theory with qualified names + for func in &base_theory.theory.signature.functions { + // Check if this function starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = func.name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + // This func is from a param we're binding - use the substituted prefix + let substituted_name = format!("{}/{}", subst, suffix); + // If this function already exists (from an outer param), don't add it again + if local_env.signature.lookup_func(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + // Not from a substituted param - prefix with our param name + format!("{}/{}", param.name, func.name) + } + } else { + // Unqualified func - prefix with our param name + format!("{}/{}", param.name, func.name) + }; + // Remap domain and codomain to use local signature's sort IDs + // We need to handle substitution for sorts too + let domain = remap_derived_sort_with_subst( + &func.domain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + let codomain = remap_derived_sort_with_subst( + &func.codomain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + local_env + .signature + .add_function(qualified_name, domain, codomain); + } + + // Copy all relations from param theory with qualified names + for rel in &base_theory.theory.signature.relations { + // Check if this relation starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = rel.name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + let substituted_name = format!("{}/{}", subst, suffix); + if local_env.signature.lookup_rel(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + format!("{}/{}", param.name, rel.name) + } + } else { + format!("{}/{}", param.name, rel.name) + }; + let domain = remap_derived_sort_with_subst( + &rel.domain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + local_env.signature.add_relation(qualified_name, domain); + } + + // NOTE: Instance field content (sorts/functions) is already included in + // base_theory.theory.signature because it was added when that theory + // was elaborated. We don't need to process instance fields again here. + + params.push(TheoryParam { + name: param.name.clone(), + theory_name: theory_name.clone(), + }); + local_env + .params + .push((param.name.clone(), base_theory.clone())); + } else { + return Err(ElabError::UnknownTheory(theory_name)); + } + } else if param.ty.is_sort() { + // "Sort" parameters — the theory is parameterized over a sort + // Add the parameter as a sort in the local signature + local_env.signature.add_sort(param.name.clone()); + // Also record it as a "sort parameter" for the theory + params.push(TheoryParam { + name: param.name.clone(), + theory_name: "Sort".to_string(), // Special marker + }); + } else { + return Err(ElabError::UnsupportedFeature(format!( + "parameter type {:?}", + param.ty + ))); + } + } + + // First pass: collect all sorts + for item in &theory.body { + if let ast::TheoryItem::Sort(name) = &item.node { + local_env.signature.add_sort(name.clone()); + } + } + + // Second pass: collect all functions and relations + for item in &theory.body { + match &item.node { + ast::TheoryItem::Function(f) => { + // Check if codomain is Prop — if so, this is a relation declaration + if f.codomain.is_prop() { + let domain = elaborate_type(&local_env, &f.domain)?; + local_env + .signature + .add_relation(f.name.to_string(), domain); + } else { + let domain = elaborate_type(&local_env, &f.domain)?; + let codomain = elaborate_type(&local_env, &f.codomain)?; + local_env + .signature + .add_function(f.name.to_string(), domain, codomain); + } + } + // Legacy: A Field with a Record type is a relation declaration + // (kept for backwards compatibility, may remove later) + ast::TheoryItem::Field(name, ty) if ty.as_record().is_some() => { + let domain = elaborate_type(&local_env, ty)?; + local_env.signature.add_relation(name.clone(), domain); + } + // Instance-typed field declarations (nested instances) + // e.g., `initial_marking : N Marking instance;` + ast::TheoryItem::Field(name, ty) if ty.is_instance() => { + let inner = ty.instance_inner().unwrap(); + // Store the theory type expression as a string + let theory_type_str = format_type_expr(&inner); + local_env + .signature + .add_instance_field(name.clone(), theory_type_str.clone()); + + // Also add the content (sorts, functions) from the field's theory + // This enables accessing things like iso/fwd when we have `iso : X Y Iso instance` + if let Ok(field_theory_name) = extract_theory_name(&inner) + && let Some(field_theory) = env.theories.get(&field_theory_name) { + let field_prefix = name.clone(); + + // Build a mapping from source sort names to target sort names + // - Sort parameters get substituted from type expression args + // - Instance param sorts (e.g., "N/P") map to local sorts with same name + // - Local sorts (e.g., "Token") get prefixed with field name + let sort_param_map = collect_sort_params(&inner, field_theory); + + // First, add any non-param sorts from the field's theory with prefix + for sort_name in &field_theory.theory.signature.sorts { + // Skip sorts that came from instance params (already qualified) + if sort_name.contains('/') { + continue; + } + // Skip Sort parameters (will be substituted) + let is_sort_param = field_theory + .params + .iter() + .any(|p| p.theory_name == "Sort" && p.name == *sort_name); + if is_sort_param { + continue; + } + // Add as prefixed sort + let qualified_name = format!("{}/{}", field_prefix, sort_name); + local_env.signature.add_sort(qualified_name); + } + + // Add functions from the field's theory + for func in &field_theory.theory.signature.functions { + // Skip functions that came from instance params (prefix matches param name) + // But keep naming-convention functions like "input_terminal/of" + let is_from_param = if let Some(prefix) = func.name.split('/').next() { + field_theory.params.iter().any(|p| p.name == prefix) + } else { + false + }; + if is_from_param { + continue; + } + let qualified_name = format!("{}/{}", field_prefix, func.name); + let domain = remap_for_instance_field( + &func.domain, + &field_theory.theory.signature, + &local_env.signature, + &sort_param_map, + &field_prefix, + ); + let codomain = remap_for_instance_field( + &func.codomain, + &field_theory.theory.signature, + &local_env.signature, + &sort_param_map, + &field_prefix, + ); + if let (Some(d), Some(c)) = (domain, codomain) { + local_env.signature.add_function(qualified_name, d, c); + } + } + } + } + _ => {} + } + } + + // Third pass: elaborate axioms + let mut axioms = Vec::new(); + let mut axiom_names = Vec::new(); + for item in &theory.body { + if let ast::TheoryItem::Axiom(ax) = &item.node { + // Build context from quantified variables + let mut ctx = Context::new(); + for qv in &ax.quantified { + let sort = elaborate_type(&local_env, &qv.ty)?; + for name in &qv.names { + ctx = ctx.extend(name.clone(), sort.clone()); + } + } + + // Elaborate hypothesis (conjunction of all hypotheses) + let premise = if ax.hypotheses.is_empty() { + Formula::True + } else { + let hyps: Result, _> = ax + .hypotheses + .iter() + .map(|h| elaborate_formula(&local_env, &ctx, h)) + .collect(); + Formula::Conj(hyps?) + }; + + // Elaborate conclusion + let conclusion = elaborate_formula(&local_env, &ctx, &ax.conclusion)?; + + // Collect axiom name (e.g., "ax/input_complete") + axiom_names.push(ax.name.to_string()); + + axioms.push(Sequent { + context: ctx, + premise, + conclusion, + }); + } + } + + Ok(ElaboratedTheory { + params, + theory: Theory { + name: theory.name.clone(), + signature: local_env.signature, + axioms, + axiom_names, + }, + }) +} + +/// Remap a DerivedSort for an instance-typed field in a theory body. +/// Handles both Sort parameters (substituted from type args) and instance param sorts. +fn remap_for_instance_field( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + sort_param_map: &HashMap, + field_prefix: &str, +) -> Option { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + + // Check Sort parameter substitution (e.g., X -> RP/initial/Token) + if let Some(replacement) = sort_param_map.get(sort_name) + && let Some(target_id) = target_sig.lookup_sort(replacement) { + return Some(DerivedSort::Base(target_id)); + } + + // Check if it's an instance param sort (already qualified, e.g., N/P) + if sort_name.contains('/') + && let Some(target_id) = target_sig.lookup_sort(sort_name) { + return Some(DerivedSort::Base(target_id)); + } + + // Check if it's a local sort (needs prefix, e.g., Token -> initial/Token) + let prefixed = format!("{}/{}", field_prefix, sort_name); + if let Some(target_id) = target_sig.lookup_sort(&prefixed) { + return Some(DerivedSort::Base(target_id)); + } + + None + } + DerivedSort::Product(fields) => { + let remapped: Option> = fields + .iter() + .map(|(n, s)| { + remap_for_instance_field(s, source_sig, target_sig, sort_param_map, field_prefix) + .map(|r| (n.clone(), r)) + }) + .collect(); + remapped.map(DerivedSort::Product) + } + } +} + +/// Collect sort parameter mappings from a type expression. +/// E.g., `RP/initial/Token RP/target/Token Iso` returns {"X" -> "RP/initial/Token", "Y" -> "RP/target/Token"} +fn collect_sort_params( + ty: &ast::TypeExpr, + field_theory: &std::rc::Rc, +) -> HashMap { + let mut args = Vec::new(); + collect_type_args(ty, &mut args); + + // Match args with sort parameters in order + let mut map = HashMap::new(); + for (param, arg) in field_theory.params.iter().zip(args.iter()) { + if param.theory_name == "Sort" { + map.insert(param.name.clone(), arg.clone()); + } + } + map +} + +/// Recursively collect type arguments from an App chain. +/// For `A B C Foo`, this returns ["A", "B", "C"] (Foo is the theory name). +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// All path tokens except the last one are type arguments. +pub fn collect_type_args(ty: &ast::TypeExpr, args: &mut Vec) { + use crate::ast::TypeToken; + + // Collect all path tokens + let paths: Vec = ty + .tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + // All but the last one are type arguments + if paths.len() > 1 { + args.extend(paths[..paths.len() - 1].iter().cloned()); + } +} + +/// Substitute sort parameters in a DerivedSort using a mapping. +/// Returns None if the sort cannot be resolved in the target signature. +#[allow(dead_code)] +fn substitute_sort_params( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_map: &HashMap, +) -> Option { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + // Check if this sort is a parameter that should be substituted + if let Some(replacement) = param_map.get(sort_name) { + // Look up the replacement sort in the target signature + if let Some(target_id) = target_sig.lookup_sort(replacement) { + return Some(DerivedSort::Base(target_id)); + } + // Couldn't find the replacement - this is an error case + eprintln!( + "Warning: sort param substitution failed for {} -> {}", + sort_name, replacement + ); + return None; + } + // Not a parameter - try to find in target as-is + target_sig.lookup_sort(sort_name).map(DerivedSort::Base) + } + DerivedSort::Product(fields) => { + let remapped_fields: Option> = fields + .iter() + .map(|(name, s)| { + substitute_sort_params(s, source_sig, target_sig, param_map) + .map(|remapped| (name.clone(), remapped)) + }) + .collect(); + remapped_fields.map(DerivedSort::Product) + } + } +} + +/// Remap a DerivedSort with instance parameter substitution. +/// For sorts like "N/P" where N is being substituted for an outer param, +/// we look up the substituted name instead of prefixing. +fn remap_derived_sort_with_subst( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_name: &str, + param_subst: &HashMap, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + + // Check if this sort starts with a param name that we're substituting + if let Some((prefix, suffix)) = sort_name.split_once('/') + && let Some(subst) = param_subst.get(prefix) { + // Substitute the prefix + let substituted_name = format!("{}/{}", subst, suffix); + if let Some(target_id) = target_sig.lookup_sort(&substituted_name) { + return DerivedSort::Base(target_id); + } + } + + // Otherwise, use the default prefixing behavior + let qualified_name = format!("{}/{}", param_name, sort_name); + if let Some(target_id) = target_sig.lookup_sort(&qualified_name) { + DerivedSort::Base(target_id) + } else if let Some(target_id) = target_sig.lookup_sort(sort_name) { + // Fallback: try without prefix (for sorts that weren't duplicated) + DerivedSort::Base(target_id) + } else { + panic!( + "remap_derived_sort_with_subst: could not find sort {} or {}", + qualified_name, sort_name + ); + } + } + DerivedSort::Product(fields) => { + let remapped_fields: Vec<_> = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort_with_subst( + s, + source_sig, + target_sig, + param_name, + param_subst, + ), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} + +/// Extract the base theory name from a type expression. +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// The last path token is the theory name. +fn extract_theory_name(ty: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // Find the last path token - that's the theory name + for token in ty.tokens.iter().rev() { + if let TypeToken::Path(path) = token { + return Ok(path.to_string()); + } + } + + Err(ElabError::TypeExprError(format!( + "cannot extract theory name from {:?}", + ty + ))) +} + +/// Collect type arguments from a theory type string like "ExampleNet ReachabilityProblem". +/// Returns the arguments (everything except the final theory name). +pub fn collect_type_args_from_theory_type(theory_type: &str) -> Vec { + let tokens: Vec<&str> = theory_type.split_whitespace().collect(); + if tokens.len() <= 1 { + vec![] + } else { + // All but the last token are arguments + tokens[..tokens.len()-1].iter().map(|s| s.to_string()).collect() + } +} + +/// Build a parameter substitution map for importing elements from a parameterized instance. +/// +/// Given a param instance with a certain theory type (e.g., "ExampleNet ReachabilityProblem"), +/// this builds a mapping from that theory's param names to the actual bindings. +/// +/// For example, if: +/// - `param_theory_type` = "ExampleNet ReachabilityProblem" +/// - ReachabilityProblem has param `(N : PetriNet instance)` +/// - The type args are ["ExampleNet"] +/// +/// Returns: {"N" -> "ExampleNet"} +pub fn build_param_subst( + param_theory: &ElaboratedTheory, + type_args: &[String], +) -> HashMap { + let mut param_subst = HashMap::new(); + for (bp, arg) in param_theory.params.iter().zip(type_args.iter()) { + if bp.theory_name != "Sort" { + // Instance param - map its name to the type arg + param_subst.insert(bp.name.clone(), arg.clone()); + } + } + param_subst +} + +/// Remap a sort name from a param instance to the local theory's sort namespace. +/// +/// This handles the case where a param instance has sorts from its own params, +/// and we need to figure out which local sorts they correspond to. +/// +/// For example, when importing from `problem0` (an `ExampleNet ReachabilityProblem`) +/// into `solution0` (an `ExampleNet problem0 Solution`): +/// - problem0 has sort "N/P" where N = ExampleNet +/// - solution0 has sort "N/P" where N = ExampleNet (from outer param) +/// - So "N/P" from problem0 maps to "N/P" in solution0 (not "RP/N/P") +/// +/// Arguments: +/// - `sort_name`: The sort name in the param instance's signature (e.g., "N/P") +/// - `param_name`: The local param name (e.g., "RP") +/// - `param_subst`: Mapping from param instance's param names to their bindings (e.g., {"N" -> "ExampleNet"}) +/// - `local_arguments`: The local instance's param bindings (e.g., [("N", "ExampleNet"), ("RP", "problem0")]) +/// +/// Returns the sort name to use in the local signature. +pub fn remap_sort_for_param_import( + sort_name: &str, + param_name: &str, + param_subst: &HashMap, + local_arguments: &[(String, String)], +) -> String { + // Check if this sort starts with a param name that we're substituting + if let Some((prefix, suffix)) = sort_name.split_once('/') + && let Some(bound_instance) = param_subst.get(prefix) { + // This sort is from a param in the param instance. + // Find which local param is bound to the same instance. + for (local_param_name, local_instance) in local_arguments { + if local_instance == bound_instance { + // Found it! Use the local param's prefix instead. + return format!("{}/{}", local_param_name, suffix); + } + } + // Fallback: the instance isn't directly a local param, + // just use param_name prefix + return format!("{}/{}", param_name, sort_name); + } + + // Unqualified sort or no substitution applicable - prefix with param_name + format!("{}/{}", param_name, sort_name) +} + +/// Format a type expression as a string (for storing instance field types) +fn format_type_expr(ty: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + let mut parts = Vec::new(); + + for token in &ty.tokens { + match token { + TypeToken::Path(path) => parts.push(path.to_string()), + TypeToken::Sort => parts.push("Sort".to_string()), + TypeToken::Prop => parts.push("Prop".to_string()), + TypeToken::Instance => parts.push("instance".to_string()), + TypeToken::Arrow => parts.push("->".to_string()), + TypeToken::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, field_ty)| format!("{}: {}", name, format_type_expr(field_ty))) + .collect(); + parts.push(format!("[{}]", field_strs.join(", "))); + } + } + } + + parts.join(" ") +} diff --git a/src/elaborate/types.rs b/src/elaborate/types.rs new file mode 100644 index 0000000..16640d7 --- /dev/null +++ b/src/elaborate/types.rs @@ -0,0 +1,265 @@ +//! Type expression evaluation (concatenative stack-based) +//! +//! Evaluates flat TypeExpr token sequences into resolved types, +//! using the symbol table to determine theory arities. + +use crate::ast::{Path, TypeExpr, TypeToken}; +use crate::core::DerivedSort; +use crate::elaborate::error::{ElabError, ElabResult}; +use crate::elaborate::Env; + +/// A value on the type evaluation stack +#[derive(Clone, Debug)] +pub enum TypeValue { + /// The Sort kind (for parameter declarations like `X : Sort`) + SortKind, + + /// The Prop kind (for relation codomains) + PropKind, + + /// A resolved base sort (index into signature) + Sort(DerivedSort), + + /// An unresolved path (instance ref, sort path, or theory name) + /// Will be resolved based on context + Path(Path), + + /// A theory applied to arguments + AppliedTheory { + theory_name: String, + args: Vec, + }, + + /// Instance type: wraps another type value + Instance(Box), + + /// Function/arrow type + Arrow { + domain: Box, + codomain: Box, + }, + + /// Record/product type + Record(Vec<(String, TypeValue)>), +} + +impl TypeValue { + /// Try to convert this type value to a DerivedSort + pub fn as_derived_sort(&self, env: &Env) -> ElabResult { + match self { + TypeValue::Sort(s) => Ok(s.clone()), + + TypeValue::Path(path) => { + // Try to resolve as a sort path + env.resolve_sort_path(path) + } + + TypeValue::Record(fields) => { + let resolved: Result, _> = fields + .iter() + .map(|(name, val)| val.as_derived_sort(env).map(|s| (name.clone(), s))) + .collect(); + Ok(DerivedSort::Product(resolved?)) + } + + TypeValue::SortKind => Err(ElabError::NotASort( + "Sort is a kind, not a type".to_string(), + )), + + TypeValue::PropKind => Err(ElabError::NotASort( + "Prop is a kind, not a type".to_string(), + )), + + TypeValue::AppliedTheory { theory_name, .. } => Err(ElabError::NotASort(format!( + "applied theory '{}' is not a sort", + theory_name + ))), + + TypeValue::Instance(_) => Err(ElabError::NotASort( + "instance type is not a sort".to_string(), + )), + + TypeValue::Arrow { .. } => Err(ElabError::NotASort( + "arrow type is not a sort".to_string(), + )), + } + } + + /// Check if this is the Sort kind + pub fn is_sort_kind(&self) -> bool { + matches!(self, TypeValue::SortKind) + } + + /// Check if this is an instance type + pub fn is_instance(&self) -> bool { + matches!(self, TypeValue::Instance(_)) + } + + /// Get the inner type if this is an instance type + pub fn instance_inner(&self) -> Option<&TypeValue> { + match self { + TypeValue::Instance(inner) => Some(inner), + _ => None, + } + } + + /// Get the theory name and args if this is an applied theory + pub fn as_applied_theory(&self) -> Option<(&str, &[TypeValue])> { + match self { + TypeValue::AppliedTheory { theory_name, args } => Some((theory_name, args)), + _ => None, + } + } +} + +/// Evaluate a type expression using the environment +/// +/// This is the core stack-based evaluator. It processes tokens left-to-right, +/// using the symbol table to determine theory arities. +pub fn eval_type_expr(expr: &TypeExpr, env: &Env) -> ElabResult { + let mut stack: Vec = Vec::new(); + + for token in &expr.tokens { + match token { + TypeToken::Sort => { + stack.push(TypeValue::SortKind); + } + + TypeToken::Prop => { + stack.push(TypeValue::PropKind); + } + + TypeToken::Path(path) => { + // Check if this is a theory name with known arity + let path_str = path.to_string(); + + if let Some(theory) = env.theories.get(&path_str) { + let arity = theory.params.len(); + if arity > 0 { + // Theory takes arguments - pop them from stack + if stack.len() < arity { + return Err(ElabError::NotEnoughArgs { + name: path_str, + expected: arity, + got: stack.len(), + }); + } + let args = stack.split_off(stack.len() - arity); + stack.push(TypeValue::AppliedTheory { + theory_name: path_str, + args, + }); + } else { + // Zero-arity theory + stack.push(TypeValue::AppliedTheory { + theory_name: path_str, + args: vec![], + }); + } + } else { + // Not a theory - could be a sort path or instance reference + // Push as unresolved path + stack.push(TypeValue::Path(path.clone())); + } + } + + TypeToken::Instance => { + let top = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'instance' with empty stack".to_string()) + })?; + stack.push(TypeValue::Instance(Box::new(top))); + } + + TypeToken::Arrow => { + // Pop codomain first (right-associative) + let codomain = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'->' missing codomain".to_string()) + })?; + let domain = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'->' missing domain".to_string()) + })?; + stack.push(TypeValue::Arrow { + domain: Box::new(domain), + codomain: Box::new(codomain), + }); + } + + TypeToken::Record(fields) => { + // Evaluate each field's type expression recursively + let mut resolved_fields = Vec::new(); + for (name, field_expr) in fields { + let field_val = eval_type_expr(field_expr, env)?; + resolved_fields.push((name.clone(), field_val)); + } + stack.push(TypeValue::Record(resolved_fields)); + } + } + } + + // Stack should have exactly one element + if stack.is_empty() { + return Err(ElabError::TypeExprError("empty type expression".to_string())); + } + if stack.len() > 1 { + return Err(ElabError::TypeExprError(format!( + "type expression left {} values on stack (expected 1)", + stack.len() + ))); + } + + Ok(stack.pop().unwrap()) +} + +/// Convenience: evaluate a type expression and convert to DerivedSort +pub fn eval_as_sort(expr: &TypeExpr, env: &Env) -> ElabResult { + let val = eval_type_expr(expr, env)?; + val.as_derived_sort(env) +} + +/// Extract the theory name from a type expression (for simple cases) +/// +/// This is used when we just need the theory name without full evaluation. +/// Returns None if the expression is more complex than a simple path or applied theory. +pub fn extract_theory_name(expr: &TypeExpr) -> Option { + // Look for the last path token that isn't followed by Instance + let mut last_theory_candidate: Option<&Path> = None; + + for token in &expr.tokens { + match token { + TypeToken::Path(p) => { + last_theory_candidate = Some(p); + } + TypeToken::Instance => { + // The previous path was the theory name + if let Some(p) = last_theory_candidate { + return Some(p.to_string()); + } + } + _ => {} + } + } + + // If no Instance token, the last path is the theory name + last_theory_candidate.map(|p| p.to_string()) +} + +/// Check if a type expression represents the Sort kind +pub fn is_sort_kind(expr: &TypeExpr) -> bool { + expr.tokens.len() == 1 && matches!(expr.tokens[0], TypeToken::Sort) +} + +/// Check if a type expression ends with `instance` +pub fn is_instance_type(expr: &TypeExpr) -> bool { + expr.tokens.last() == Some(&TypeToken::Instance) +} + +/// Get all path tokens from a type expression (useful for parameter extraction) +pub fn get_paths(expr: &TypeExpr) -> Vec<&Path> { + expr.tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p), + _ => None, + }) + .collect() +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..fa02ecd --- /dev/null +++ b/src/error.rs @@ -0,0 +1,211 @@ +//! Error formatting for Geolog +//! +//! Provides user-friendly error messages using ariadne for nice formatting. + +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::prelude::Simple; +use std::ops::Range; + +use crate::lexer::Token; + +/// Format lexer errors into a user-friendly string +pub fn format_lexer_errors(source: &str, errors: Vec>) -> String { + let mut output = Vec::new(); + + for error in errors { + let span = error.span(); + let report = Report::build(ReportKind::Error, (), span.start) + .with_message("Lexical error") + .with_label( + Label::new(span.clone()) + .with_message(format_lexer_error(&error)) + .with_color(Color::Red), + ); + + report + .finish() + .write(Source::from(source), &mut output) + .expect("Failed to write error report"); + } + + String::from_utf8(output).unwrap_or_else(|_| "Error formatting failed".to_string()) +} + +/// Format a single lexer error into a readable message +fn format_lexer_error(error: &Simple) -> String { + let found = error + .found() + .map(|c| format!("'{}'", c)) + .unwrap_or_else(|| "end of input".to_string()); + + if let Some(_expected) = error.expected().next() { + format!( + "Unexpected {}, expected {}", + found, + format_char_set(error.expected()) + ) + } else { + format!("Unexpected character {}", found) + } +} + +/// Format parser errors into a user-friendly string +pub fn format_parser_errors( + source: &str, + errors: Vec>, + token_spans: &[(Token, Range)], +) -> String { + let mut output = Vec::new(); + + for error in errors { + let span = error.span(); + + // Map token span to character span + // The span could be either: + // 1. A token index (0, 1, 2, ..., n-1 for n tokens) - look up in token_spans + // 2. Already a character position (from custom errors that captured spans) + // + // Best heuristic: check if the span matches a token's character range. + // If so, it's a character position. Otherwise, treat as token index. + let is_char_position = token_spans + .iter() + .any(|(_, char_range)| char_range.start == span.start && char_range.end == span.end); + + let char_span = if is_char_position { + // Span exactly matches a token's character range - use as-is + span.clone() + } else if span.start < token_spans.len() { + // Span.start is a valid token index - use token's character range + token_spans[span.start].1.clone() + } else if span.start == token_spans.len() { + // End of input marker - use the end of the last token + if let Some((_, last_range)) = token_spans.last() { + last_range.end..last_range.end + } else { + 0..0 + } + } else { + // Fallback: treat as character position + let start = span.start.min(source.len()); + let end = span.end.min(source.len()); + start..end + }; + + let report = Report::build(ReportKind::Error, (), char_span.start) + .with_message("Parse error") + .with_label( + Label::new(char_span.clone()) + .with_message(format_parser_error(&error)) + .with_color(Color::Red), + ); + + report + .finish() + .write(Source::from(source), &mut output) + .expect("Failed to write error report"); + } + + String::from_utf8(output).unwrap_or_else(|_| "Error formatting failed".to_string()) +} + +/// Format a single parser error into a readable message +fn format_parser_error(error: &Simple) -> String { + use chumsky::error::SimpleReason; + + let found = error + .found() + .map(|t| format!("'{}'", format_token(t))) + .unwrap_or_else(|| "end of input".to_string()); + + // Check for custom error messages first (from Simple::custom()) + if let SimpleReason::Custom(msg) = error.reason() { + return msg.clone(); + } + + let expected = format_token_set(error.expected()); + + if !expected.is_empty() { + // Check for common patterns and provide helpful messages + let expected_str = expected.join(", "); + + // Detect common mistakes + if expected.contains(&"';'".to_string()) && error.found() == Some(&Token::Colon) { + return format!( + "Expected semicolon ';' to end declaration, found '{}'", + format_token(error.found().unwrap()) + ); + } + + if expected.contains(&"':'".to_string()) && error.found() == Some(&Token::Semicolon) { + return format!( + "Expected colon ':' before type, found '{}'", + format_token(error.found().unwrap()) + ); + } + + format!("Unexpected {}, expected one of: {}", found, expected_str) + } else if let Some(label) = error.label() { + label.to_string() + } else { + format!("Unexpected token {}", found) + } +} + +/// Format a token for display +fn format_token(token: &Token) -> String { + match token { + Token::Namespace => "namespace".to_string(), + Token::Theory => "theory".to_string(), + Token::Instance => "instance".to_string(), + Token::Query => "query".to_string(), + Token::Sort => "Sort".to_string(), + Token::Prop => "Prop".to_string(), + Token::Forall => "forall".to_string(), + Token::Exists => "exists".to_string(), + Token::True => "true".to_string(), + Token::False => "false".to_string(), + Token::Ident(s) => s.clone(), + Token::LBrace => "{".to_string(), + Token::RBrace => "}".to_string(), + Token::LParen => "(".to_string(), + Token::RParen => ")".to_string(), + Token::LBracket => "[".to_string(), + Token::RBracket => "]".to_string(), + Token::Colon => ":".to_string(), + Token::Semicolon => ";".to_string(), + Token::Comma => ",".to_string(), + Token::Dot => ".".to_string(), + Token::Slash => "/".to_string(), + Token::Arrow => "->".to_string(), + Token::Eq => "=".to_string(), + Token::Turnstile => "|-".to_string(), + Token::And => r"/\".to_string(), + Token::Or => r"\/".to_string(), + Token::Question => "?".to_string(), + Token::Chase => "chase".to_string(), + } +} + +/// Format a set of expected tokens +fn format_token_set<'a>(expected: impl Iterator>) -> Vec { + expected + .filter_map(|opt| opt.as_ref()) + .map(|t| format!("'{}'", format_token(t))) + .collect() +} + +/// Format a set of expected characters +fn format_char_set<'a>(expected: impl Iterator>) -> String { + let chars: Vec = expected + .filter_map(|opt| opt.as_ref()) + .map(|c| format!("'{}'", c)) + .collect(); + + if chars.is_empty() { + "valid character".to_string() + } else if chars.len() == 1 { + chars[0].clone() + } else { + chars.join(" or ") + } +} diff --git a/src/id.rs b/src/id.rs new file mode 100644 index 0000000..253be8b --- /dev/null +++ b/src/id.rs @@ -0,0 +1,114 @@ +//! ID types for geolog, following chit's multi-level ID design +//! +//! The key insight is that different operations benefit from different ID granularities: +//! - UUIDs for global identity (persistence, version control, cross-structure references) +//! - Luids for installation-wide identity (stable across structures, persisted) +//! - Slids for structure-local computation (cache-friendly, compact) +//! +//! We use egglog's `define_id!` macro to create newtype wrappers around usize, +//! giving us type safety (can't mix up Slid with Luid) and nice Debug output. + +// Re-export NumericId trait and IdVec for typed indexing +pub use egglog_numeric_id::{define_id, IdVec, NumericId}; +pub use nonminmax::NonMaxUsize; +pub use uuid::Uuid; + +// We define our own macro that wraps egglog's define_id! and adds rkyv derives +macro_rules! define_id_with_rkyv { + ($v:vis $name:ident, $repr:ty, $doc:tt) => { + #[doc = $doc] + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + #[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] + #[archive(check_bytes)] + #[repr(transparent)] + $v struct $name { + /// The underlying representation (public for zero-copy archived access) + pub rep: $repr, + } + + impl NumericId for $name { + type Rep = $repr; + type Atomic = std::sync::atomic::AtomicUsize; + + fn new(val: $repr) -> Self { + Self { rep: val } + } + + fn from_usize(index: usize) -> Self { + Self { rep: index as $repr } + } + + fn index(self) -> usize { + self.rep as usize + } + + fn rep(self) -> $repr { + self.rep + } + } + + impl std::fmt::Debug for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}({})", stringify!($name), self.rep) + } + } + + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.rep) + } + } + }; +} + +define_id_with_rkyv!( + pub Luid, + usize, + "Locally Universal ID: index into the global universe of UUIDs. Stable across installation, persisted." +); + +define_id_with_rkyv!( + pub Slid, + usize, + "Structure-Local ID: index within a structure's element universe. Primary working ID." +); + +define_id_with_rkyv!( + pub SortSlid, + usize, + "Sort-Local ID: index within a particular sort's carrier. Computed on-demand." +); + +/// A Slid that can be stored in Option without doubling size. +/// Uses `NonMaxUsize` so that `Option` is the same size as `usize`, +/// with `usize::MAX` serving as the niche for `None`. +pub type OptSlid = Option; + +/// Convert a Slid to OptSlid. +/// Returns None if slid == usize::MAX (which would be an astronomically large structure). +#[inline] +pub fn some_slid(slid: Slid) -> OptSlid { + NonMaxUsize::new(slid.index()) +} + +/// Extract a Slid from OptSlid. +#[inline] +pub fn get_slid(opt: OptSlid) -> Option { + opt.map(|n| Slid::from_usize(n.get())) +} + +/// A Luid that can be stored in Option without doubling size. +/// Analogous to OptSlid but for cross-instance references. +pub type OptLuid = Option; + +/// Convert a Luid to OptLuid. +#[inline] +pub fn some_luid(luid: Luid) -> OptLuid { + NonMaxUsize::new(luid.index()) +} + +/// Extract a Luid from OptLuid. +#[inline] +pub fn get_luid(opt: OptLuid) -> Option { + opt.map(|n| Luid::from_usize(n.get())) +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..117b1ff --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,143 @@ +//! Lexer for Geolog +//! +//! Tokenizes source into a stream for the parser. + +use chumsky::prelude::*; +use std::ops::Range; + +/// Token types for Geolog +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Token { + // Keywords + Namespace, + Theory, + Instance, + Query, + Sort, + Prop, + Forall, + Exists, + True, + False, + Chase, + + // Identifiers + Ident(String), + + // Punctuation + LBrace, // { + RBrace, // } + LParen, // ( + RParen, // ) + LBracket, // [ + RBracket, // ] + Colon, // : + Semicolon, // ; + Comma, // , + Dot, // . + Slash, // / + Arrow, // -> + Eq, // = + Turnstile, // |- + And, // /\ + Or, // \/ + Question, // ? +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::Namespace => write!(f, "namespace"), + Token::Theory => write!(f, "theory"), + Token::Instance => write!(f, "instance"), + Token::Query => write!(f, "query"), + Token::Sort => write!(f, "Sort"), + Token::Prop => write!(f, "Prop"), + Token::Forall => write!(f, "forall"), + Token::Exists => write!(f, "exists"), + Token::True => write!(f, "true"), + Token::False => write!(f, "false"), + Token::Chase => write!(f, "chase"), + Token::Ident(s) => write!(f, "{}", s), + Token::LBrace => write!(f, "{{"), + Token::RBrace => write!(f, "}}"), + Token::LParen => write!(f, "("), + Token::RParen => write!(f, ")"), + Token::LBracket => write!(f, "["), + Token::RBracket => write!(f, "]"), + Token::Colon => write!(f, ":"), + Token::Semicolon => write!(f, ";"), + Token::Comma => write!(f, ","), + Token::Dot => write!(f, "."), + Token::Slash => write!(f, "/"), + Token::Arrow => write!(f, "->"), + Token::Eq => write!(f, "="), + Token::Turnstile => write!(f, "|-"), + Token::And => write!(f, r"/\"), + Token::Or => write!(f, r"\/"), + Token::Question => write!(f, "?"), + } + } +} + +/// Type alias for spans +pub type Span = Range; + +/// Create a lexer for Geolog +pub fn lexer() -> impl Parser, Error = Simple> { + let keyword_or_ident = text::ident().map(|s: String| match s.as_str() { + "namespace" => Token::Namespace, + "theory" => Token::Theory, + "instance" => Token::Instance, + "query" => Token::Query, + "Sort" => Token::Sort, + "Prop" => Token::Prop, + "forall" => Token::Forall, + "exists" => Token::Exists, + "true" => Token::True, + "false" => Token::False, + "chase" => Token::Chase, + _ => Token::Ident(s), + }); + + let punctuation = choice(( + just("->").to(Token::Arrow), + just("|-").to(Token::Turnstile), + just(r"/\").to(Token::And), + just(r"\/").to(Token::Or), + just('{').to(Token::LBrace), + just('}').to(Token::RBrace), + just('(').to(Token::LParen), + just(')').to(Token::RParen), + just('[').to(Token::LBracket), + just(']').to(Token::RBracket), + just(':').to(Token::Colon), + just(';').to(Token::Semicolon), + just(',').to(Token::Comma), + just('.').to(Token::Dot), + just('/').to(Token::Slash), + just('=').to(Token::Eq), + just('?').to(Token::Question), + )); + + // Comments: // to end of line (handles both mid-file and end-of-file) + // IMPORTANT: Must check for // BEFORE single / to avoid tokenizing as two Slash tokens + let line_comment = just("//") + .then(none_of('\n').repeated()) + .then(just('\n').or_not()) // Either newline or EOF + .ignored(); + + // Token OR comment - comments produce None, tokens produce Some + let token_or_skip = line_comment + .to(None) + .or(keyword_or_ident.or(punctuation).map(Some)); + + token_or_skip + .map_with_span(|opt_tok, span| opt_tok.map(|tok| (tok, span))) + .padded() + .repeated() + .then_ignore(end()) + .map(|items| items.into_iter().flatten().collect()) +} + +// Unit tests moved to tests/unit_parsing.rs diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e5f8c89 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,51 @@ +//! Geolog: A language for geometric logic +//! +//! Geolog is a type theory with semantics in topoi and geometric morphisms, +//! designed as a unified language for database schemas, queries, and migrations. + +pub mod ast; +pub mod cc; +pub mod core; +pub mod elaborate; +pub mod error; +pub mod id; +pub mod lexer; +pub mod meta; +pub mod naming; +pub mod overlay; +pub mod parser; +pub mod patch; +pub mod pretty; +pub mod query; +pub mod repl; +pub mod serialize; +pub mod solver; +pub mod store; +pub mod tensor; +pub mod universe; +pub mod version; +pub mod zerocopy; + +pub use ast::*; +pub use lexer::lexer; +pub use parser::parser; +pub use pretty::pretty_print; + +/// Parse a Geolog source string into an AST +pub fn parse(input: &str) -> Result { + use chumsky::prelude::*; + + let tokens = lexer::lexer() + .parse(input) + .map_err(|errs| error::format_lexer_errors(input, errs))?; + + let token_stream: Vec<_> = tokens.iter().map(|(t, s)| (t.clone(), s.clone())).collect(); + let len = input.len(); + + parser::parser() + .parse(chumsky::Stream::from_iter( + len..len + 1, + token_stream.into_iter(), + )) + .map_err(|errs| error::format_parser_errors(input, errs, &tokens)) +} diff --git a/src/meta.rs b/src/meta.rs new file mode 100644 index 0000000..dac4499 --- /dev/null +++ b/src/meta.rs @@ -0,0 +1,1106 @@ +//! Conversion between ElaboratedTheory and GeologMeta instances +//! +//! This module provides homoiconic serialization: geolog theories can be +//! represented as instances of the GeologMeta theory, enabling persistence +//! and meta-programming. +//! +//! Note: Human-readable names are stored separately in a NamingIndex (keyed by UUID), +//! not in the Structure itself. This module populates both the Structure and NamingIndex. + +use std::collections::HashMap; +use std::sync::{Arc, OnceLock}; + +use crate::core::{ + Context, DerivedSort, ElaboratedTheory, Formula, FuncId, RelId, Sequent, Signature, SortId, + Structure, Term, TheoryParam, +}; +use crate::elaborate::{Env, elaborate_theory}; +use crate::id::{NumericId, Slid}; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +/// GeologMeta source, embedded at compile time +const GEOLOG_META_SOURCE: &str = include_str!("../theories/GeologMeta.geolog"); + +/// Cached elaborated GeologMeta theory +static GEOLOG_META: OnceLock> = OnceLock::new(); + +/// Get the elaborated GeologMeta theory, parsing and elaborating on first access +pub fn geolog_meta() -> Arc { + GEOLOG_META + .get_or_init(|| { + let file = crate::parse(GEOLOG_META_SOURCE).expect("GeologMeta.geolog should parse"); + + let mut env = Env::new(); + for decl in &file.declarations { + if let crate::ast::Declaration::Theory(t) = &decl.node { + let elab = elaborate_theory(&mut env, t).expect("GeologMeta should elaborate"); + return Arc::new(elab); + } + } + panic!("GeologMeta.geolog should contain a theory declaration"); + }) + .clone() +} + +/// A builder for constructing GeologMeta instances +/// +/// This manages the mapping from theory components to element IDs (Slids) +/// in the target structure. +pub struct MetaBuilder { + /// The theory element (there's exactly one per ElaboratedTheory) + pub theory_slid: u32, + + /// Maps SortId -> Srt element slid + pub sort_map: HashMap, + + /// Maps function name -> Func element slid + pub func_map: HashMap, + + /// Maps relation name -> Rel element slid + pub rel_map: HashMap, + + /// Maps field name -> Field element slid (for RecEntry/field and ProjT/field) + /// Note: This is a simplification; properly would need to track by product type + pub field_map: HashMap, + + /// Counter for generating fresh element IDs + next_slid: u32, + + /// Accumulated elements by sort (sort_name -> [(elem_name, slid)]) + /// Names are stored here for NamingIndex population, not in Structure + elements: HashMap>, + + /// Accumulated function values (func_name -> [(domain_slid, codomain_slid)]) + functions: HashMap>, +} + +impl Default for MetaBuilder { + fn default() -> Self { + Self::new() + } +} + +impl MetaBuilder { + pub fn new() -> Self { + Self { + theory_slid: 0, + sort_map: HashMap::new(), + func_map: HashMap::new(), + rel_map: HashMap::new(), + field_map: HashMap::new(), + next_slid: 0, + elements: HashMap::new(), + functions: HashMap::new(), + } + } + + /// Allocate a fresh element in a given sort + fn alloc(&mut self, sort: &str, name: String) -> u32 { + let slid = self.next_slid; + self.next_slid += 1; + self.elements + .entry(sort.to_string()) + .or_default() + .push((name, slid)); + slid + } + + /// Record a function value: domain_slid maps to codomain_slid via func_name + fn set_func(&mut self, func_name: &str, domain_slid: u32, codomain_slid: u32) { + self.functions + .entry(func_name.to_string()) + .or_default() + .push((domain_slid, codomain_slid)); + } +} + +/// Convert an ElaboratedTheory to a GeologMeta instance description +/// +/// Returns a MetaBuilder containing all the elements and function values +/// needed to construct the Structure. +pub fn theory_to_meta(theory: &ElaboratedTheory, _universe: &mut Universe) -> MetaBuilder { + let mut builder = MetaBuilder::new(); + + // Create the Theory element + let theory_name = &theory.theory.name; + builder.theory_slid = builder.alloc("Theory", theory_name.clone()); + + // Convert signature + convert_signature(&mut builder, &theory.theory.signature); + + // Convert params + for param in &theory.params { + convert_param(&mut builder, param); + } + + // Convert axioms (using axiom_names if available, otherwise fallback to ax_N) + for (i, axiom) in theory.theory.axioms.iter().enumerate() { + let axiom_name = theory + .theory + .axiom_names + .get(i) + .cloned() + .unwrap_or_else(|| format!("ax_{}", i)); + convert_sequent(&mut builder, axiom, &axiom_name); + } + + builder +} + +fn convert_signature(builder: &mut MetaBuilder, sig: &Signature) { + // Convert sorts + // Note: Human-readable names are stored in NamingIndex, not here + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let srt_slid = builder.alloc("Srt", sort_name.clone()); + builder.sort_map.insert(sort_id, srt_slid); + + // Srt/theory points to the theory + builder.set_func("Srt/theory", srt_slid, builder.theory_slid); + } + + // Convert functions + for func in &sig.functions { + let func_slid = builder.alloc("Func", func.name.clone()); + builder.func_map.insert(func.name.clone(), func_slid); + + // Func/theory + builder.set_func("Func/theory", func_slid, builder.theory_slid); + + // Func/dom and Func/cod + let dom_slid = convert_dsort(builder, &func.domain); + let cod_slid = convert_dsort(builder, &func.codomain); + builder.set_func("Func/dom", func_slid, dom_slid); + builder.set_func("Func/cod", func_slid, cod_slid); + } + + // Convert relations + for rel in &sig.relations { + let rel_slid = builder.alloc("Rel", rel.name.clone()); + builder.rel_map.insert(rel.name.clone(), rel_slid); + + // Rel/theory + builder.set_func("Rel/theory", rel_slid, builder.theory_slid); + + // Rel/dom + let dom_slid = convert_dsort(builder, &rel.domain); + builder.set_func("Rel/dom", rel_slid, dom_slid); + } +} + +fn convert_dsort(builder: &mut MetaBuilder, dsort: &DerivedSort) -> u32 { + match dsort { + DerivedSort::Base(sort_id) => { + // Create a BaseDS element + let base_slid = builder.alloc("BaseDS", format!("base_{}", sort_id)); + + // BaseDS/dsort - embed into DSort + let dsort_slid = builder.alloc("DSort", format!("dsort_base_{}", sort_id)); + builder.set_func("BaseDS/dsort", base_slid, dsort_slid); + + // BaseDS/srt - point to the Srt element + if let Some(&srt_slid) = builder.sort_map.get(sort_id) { + builder.set_func("BaseDS/srt", base_slid, srt_slid); + } + + dsort_slid + } + DerivedSort::Product(fields) => { + // Create a ProdDS element + let prod_slid = builder.alloc("ProdDS", "prod".to_string()); + + // ProdDS/dsort - embed into DSort + let dsort_slid = builder.alloc("DSort", "dsort_prod".to_string()); + builder.set_func("ProdDS/dsort", prod_slid, dsort_slid); + + // Create Field elements for each field + for (field_name, field_type) in fields { + let field_slid = builder.alloc("Field", field_name.clone()); + + // Track field for later use in RecEntry/field and ProjT/field + builder.field_map.insert(field_name.clone(), field_slid); + + // Field/prod + builder.set_func("Field/prod", field_slid, prod_slid); + + // Field/type (recursive) + let type_slid = convert_dsort(builder, field_type); + builder.set_func("Field/type", field_slid, type_slid); + } + + dsort_slid + } + } +} + +fn convert_param(builder: &mut MetaBuilder, param: &TheoryParam) { + let param_slid = builder.alloc("Param", param.name.clone()); + + // Param/theory (which theory has this param) + builder.set_func("Param/theory", param_slid, builder.theory_slid); + + // Param/type - we'd need to look up the theory by name + // For now, create a placeholder Theory element + let type_theory_slid = builder.alloc("Theory", param.theory_name.clone()); + builder.set_func("Param/type", param_slid, type_theory_slid); +} + +fn convert_sequent(builder: &mut MetaBuilder, sequent: &Sequent, name: &str) -> u32 { + let seq_slid = builder.alloc("Sequent", name.to_string()); + + // Sequent/theory + builder.set_func("Sequent/theory", seq_slid, builder.theory_slid); + + // Create binders for context variables + let mut binder_map = HashMap::new(); + for (var_name, var_type) in &sequent.context.vars { + let binder_slid = builder.alloc("Binder", var_name.clone()); + + // Binder/type + let type_slid = convert_dsort(builder, var_type); + builder.set_func("Binder/type", binder_slid, type_slid); + + // CtxVar linking binder to sequent + let ctxvar_slid = builder.alloc("CtxVar", format!("cv_{}", var_name)); + builder.set_func("CtxVar/sequent", ctxvar_slid, seq_slid); + builder.set_func("CtxVar/binder", ctxvar_slid, binder_slid); + + binder_map.insert(var_name.clone(), binder_slid); + } + + // Sequent/premise + let premise_slid = convert_formula(builder, &sequent.premise, &binder_map); + builder.set_func("Sequent/premise", seq_slid, premise_slid); + + // Sequent/conclusion + let conclusion_slid = convert_formula(builder, &sequent.conclusion, &binder_map); + builder.set_func("Sequent/conclusion", seq_slid, conclusion_slid); + + seq_slid +} + +fn convert_formula( + builder: &mut MetaBuilder, + formula: &Formula, + binder_map: &HashMap, +) -> u32 { + match formula { + Formula::True => { + let truef_slid = builder.alloc("TrueF", "true".to_string()); + let formula_slid = builder.alloc("Formula", "true".to_string()); + builder.set_func("TrueF/formula", truef_slid, formula_slid); + formula_slid + } + Formula::False => { + let falsef_slid = builder.alloc("FalseF", "false".to_string()); + let formula_slid = builder.alloc("Formula", "false".to_string()); + builder.set_func("FalseF/formula", falsef_slid, formula_slid); + formula_slid + } + Formula::Eq(lhs, rhs) => { + let eqf_slid = builder.alloc("EqF", "eq".to_string()); + let formula_slid = builder.alloc("Formula", "eq".to_string()); + builder.set_func("EqF/formula", eqf_slid, formula_slid); + + let lhs_slid = convert_term(builder, lhs, binder_map); + let rhs_slid = convert_term(builder, rhs, binder_map); + builder.set_func("EqF/lhs", eqf_slid, lhs_slid); + builder.set_func("EqF/rhs", eqf_slid, rhs_slid); + + formula_slid + } + Formula::Conj(conjuncts) => { + let conjf_slid = builder.alloc("ConjF", "conj".to_string()); + let formula_slid = builder.alloc("Formula", "conj".to_string()); + builder.set_func("ConjF/formula", conjf_slid, formula_slid); + + for (i, conjunct) in conjuncts.iter().enumerate() { + let arm_slid = builder.alloc("ConjArm", format!("arm_{}", i)); + builder.set_func("ConjArm/conj", arm_slid, conjf_slid); + + let child_slid = convert_formula(builder, conjunct, binder_map); + builder.set_func("ConjArm/child", arm_slid, child_slid); + } + + formula_slid + } + Formula::Disj(disjuncts) => { + let disjf_slid = builder.alloc("DisjF", "disj".to_string()); + let formula_slid = builder.alloc("Formula", "disj".to_string()); + builder.set_func("DisjF/formula", disjf_slid, formula_slid); + + for (i, disjunct) in disjuncts.iter().enumerate() { + let arm_slid = builder.alloc("DisjArm", format!("arm_{}", i)); + builder.set_func("DisjArm/disj", arm_slid, disjf_slid); + + let child_slid = convert_formula(builder, disjunct, binder_map); + builder.set_func("DisjArm/child", arm_slid, child_slid); + } + + formula_slid + } + Formula::Exists(var_name, var_type, body) => { + let existsf_slid = builder.alloc("ExistsF", format!("exists_{}", var_name)); + let formula_slid = builder.alloc("Formula", format!("exists_{}", var_name)); + builder.set_func("ExistsF/formula", existsf_slid, formula_slid); + + // Create a new binder for this existential + let binder_slid = builder.alloc("Binder", var_name.clone()); + + let type_slid = convert_dsort(builder, var_type); + builder.set_func("Binder/type", binder_slid, type_slid); + + builder.set_func("ExistsF/binder", existsf_slid, binder_slid); + + // Extend binder map for body + let mut extended_map = binder_map.clone(); + extended_map.insert(var_name.clone(), binder_slid); + + let body_slid = convert_formula(builder, body, &extended_map); + builder.set_func("ExistsF/body", existsf_slid, body_slid); + + formula_slid + } + Formula::Rel(rel_id, arg) => { + let relf_slid = builder.alloc("RelF", format!("rel_{}", rel_id)); + let formula_slid = builder.alloc("Formula", format!("rel_{}", rel_id)); + builder.set_func("RelF/formula", relf_slid, formula_slid); + + // RelF/rel - need to look up the Rel element by ID + // For now, just use the ID directly (we'd need the rel_map to be indexed by ID) + // This is a simplification - in practice we'd track rel_id -> slid + + let arg_slid = convert_term(builder, arg, binder_map); + builder.set_func("RelF/arg", relf_slid, arg_slid); + + formula_slid + } + } +} + +fn convert_term(builder: &mut MetaBuilder, term: &Term, binder_map: &HashMap) -> u32 { + match term { + Term::Var(name, _sort) => { + let vart_slid = builder.alloc("VarT", name.clone()); + let term_slid = builder.alloc("Term", name.clone()); + builder.set_func("VarT/term", vart_slid, term_slid); + + // VarT/binder - look up in binder map + if let Some(&binder_slid) = binder_map.get(name) { + builder.set_func("VarT/binder", vart_slid, binder_slid); + } + + term_slid + } + Term::App(func_id, arg) => { + let appt_slid = builder.alloc("AppT", format!("app_{}", func_id)); + let term_slid = builder.alloc("Term", format!("app_{}", func_id)); + builder.set_func("AppT/term", appt_slid, term_slid); + + // AppT/func - need to look up Func element by ID + // Similar simplification as with relations + + let arg_slid = convert_term(builder, arg, binder_map); + builder.set_func("AppT/arg", appt_slid, arg_slid); + + term_slid + } + Term::Record(fields) => { + let recordt_slid = builder.alloc("RecordT", "record".to_string()); + let term_slid = builder.alloc("Term", "record".to_string()); + builder.set_func("RecordT/term", recordt_slid, term_slid); + + for (field_name, field_val) in fields { + let entry_slid = builder.alloc("RecEntry", field_name.clone()); + builder.set_func("RecEntry/record", entry_slid, recordt_slid); + + // RecEntry/field points to the Field element (if known) + if let Some(&field_slid) = builder.field_map.get(field_name) { + builder.set_func("RecEntry/field", entry_slid, field_slid); + } + + let val_slid = convert_term(builder, field_val, binder_map); + builder.set_func("RecEntry/val", entry_slid, val_slid); + } + + term_slid + } + Term::Project(base, field) => { + let projt_slid = builder.alloc("ProjT", format!("proj_{}", field)); + let term_slid = builder.alloc("Term", format!("proj_{}", field)); + builder.set_func("ProjT/term", projt_slid, term_slid); + + let base_slid = convert_term(builder, base, binder_map); + builder.set_func("ProjT/base", projt_slid, base_slid); + + // ProjT/field points to the Field element (if known) + if let Some(&field_slid) = builder.field_map.get(field) { + builder.set_func("ProjT/field", projt_slid, field_slid); + } + + term_slid + } + } +} + +/// Convert a MetaBuilder into an actual Structure (GeologMeta instance) +/// +/// This is the final step in theory serialization: +/// ElaboratedTheory → MetaBuilder → Structure +/// +/// Also populates the NamingIndex with human-readable names for all elements. +pub fn builder_to_structure( + builder: &MetaBuilder, + universe: &mut Universe, + naming: &mut NamingIndex, + theory_name: &str, +) -> Structure { + let meta_theory = geolog_meta(); + let sig = &meta_theory.theory.signature; + + let num_sorts = sig.sorts.len(); + let mut structure = Structure::new(num_sorts); + + // Map MetaBuilder internal slids → Structure Slids + let mut slid_map: HashMap = HashMap::new(); + + // Phase 1: Add all elements + // Iterate through MetaBuilder's elements by sort, adding them to Structure + for (sort_name, elems) in &builder.elements { + let sort_id = sig + .lookup_sort(sort_name) + .unwrap_or_else(|| panic!("Sort '{}' not found in GeologMeta", sort_name)); + + for (elem_name, internal_slid) in elems { + let (struct_slid, luid) = structure.add_element(universe, sort_id); + slid_map.insert(*internal_slid, struct_slid); + + // Register name in NamingIndex (qualified by theory name) + let uuid = universe + .get(luid) + .expect("freshly created luid should have uuid"); + naming.insert(uuid, vec![theory_name.to_string(), elem_name.clone()]); + } + } + + // Phase 2: Initialize function storage + // Build domain sort mapping for each function + let domain_sort_ids: Vec> = sig + .functions + .iter() + .map(|f| { + match &f.domain { + DerivedSort::Base(sort_id) => Some(*sort_id), + DerivedSort::Product(_) => None, // Product domains deferred + } + }) + .collect(); + + structure.init_functions(&domain_sort_ids); + + // Phase 3: Define function values + for (func_name, values) in &builder.functions { + let func_id = sig + .lookup_func(func_name) + .unwrap_or_else(|| panic!("Function '{}' not found in GeologMeta", func_name)); + + for (internal_dom, internal_cod) in values { + let dom_slid = slid_map + .get(internal_dom) + .unwrap_or_else(|| panic!("Domain slid {} not mapped", internal_dom)); + let cod_slid = slid_map + .get(internal_cod) + .unwrap_or_else(|| panic!("Codomain slid {} not mapped", internal_cod)); + + structure + .define_function(func_id, *dom_slid, *cod_slid) + .unwrap_or_else(|e| panic!("Function definition failed: {}", e)); + } + } + + structure +} + +/// Full conversion: ElaboratedTheory → Structure (GeologMeta instance) +/// +/// This is the main entry point for theory serialization. +/// Names are registered in the provided NamingIndex. +pub fn theory_to_structure( + theory: &ElaboratedTheory, + universe: &mut Universe, + naming: &mut NamingIndex, +) -> Structure { + let builder = theory_to_meta(theory, universe); + builder_to_structure(&builder, universe, naming, &theory.theory.name) +} + +// ============================================================================ +// REVERSE CONVERSION: Structure → ElaboratedTheory +// ============================================================================ + +/// A reader for navigating GeologMeta structures +/// +/// Provides convenient access to follow function pointers and collect elements. +/// Uses NamingIndex and Universe to look up human-readable names. +pub struct MetaReader<'a> { + structure: &'a Structure, + universe: &'a Universe, + naming: &'a NamingIndex, + /// The GeologMeta theory (Arc keeps signature alive) + meta: Arc, + // Cached function IDs for quick lookup + func_ids: HashMap<&'static str, FuncId>, + // Cached sort IDs + sort_ids: HashMap<&'static str, SortId>, +} + +impl<'a> MetaReader<'a> { + pub fn new(structure: &'a Structure, universe: &'a Universe, naming: &'a NamingIndex) -> Self { + let meta = geolog_meta(); + let sig = &meta.theory.signature; + + // Pre-cache commonly used function IDs + // Note: No */name functions - names are in NamingIndex + let func_names = [ + "Srt/theory", + "Func/theory", + "Func/dom", + "Func/cod", + "Rel/theory", + "Rel/dom", + "Param/theory", + "Param/type", + "BaseDS/dsort", + "BaseDS/srt", + "ProdDS/dsort", + "Field/prod", + "Field/type", + "Sequent/theory", + "Sequent/premise", + "Sequent/conclusion", + "CtxVar/sequent", + "CtxVar/binder", + "Binder/type", + "VarT/term", + "VarT/binder", + "AppT/term", + "AppT/func", + "AppT/arg", + "RecordT/term", + "RecEntry/record", + "RecEntry/val", + "RecEntry/field", + "ProjT/term", + "ProjT/base", + "ProjT/field", + "TrueF/formula", + "FalseF/formula", + "EqF/formula", + "EqF/lhs", + "EqF/rhs", + "ConjF/formula", + "ConjArm/conj", + "ConjArm/child", + "DisjF/formula", + "DisjArm/disj", + "DisjArm/child", + "ExistsF/formula", + "ExistsF/binder", + "ExistsF/body", + "RelF/formula", + "RelF/rel", + "RelF/arg", + "Term/node", + "Formula/node", + ]; + + let mut func_ids = HashMap::new(); + for name in func_names { + if let Some(id) = sig.lookup_func(name) { + func_ids.insert(name, id); + } + } + + // Note: No "Name" sort - names are in NamingIndex + let sort_names = [ + "Theory", "Param", "Srt", "DSort", "BaseDS", "ProdDS", "Field", "Func", "Rel", + "Binder", "Term", "VarT", "AppT", "RecordT", "RecEntry", "ProjT", "Formula", "RelF", + "TrueF", "FalseF", "EqF", "ConjF", "ConjArm", "DisjF", "DisjArm", "ExistsF", "Sequent", + "CtxVar", "Node", + ]; + + let mut sort_ids = HashMap::new(); + for name in sort_names { + if let Some(id) = sig.lookup_sort(name) { + sort_ids.insert(name, id); + } + } + + Self { + structure, + universe, + naming, + meta, + func_ids, + sort_ids, + } + } + + /// Get all elements of a given sort + fn elements_of_sort(&self, sort_name: &str) -> Vec { + let sort_id = self.sort_ids.get(sort_name).copied().unwrap_or(usize::MAX); + if sort_id == usize::MAX { + return vec![]; + } + self.structure.carriers[sort_id] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect() + } + + /// Follow a function from an element, returning the target Slid if defined + fn follow(&self, func_name: &str, elem: Slid) -> Option { + let func_id = *self.func_ids.get(func_name)?; + let sort_slid = self.structure.sort_local_id(elem); + self.structure.get_function(func_id, sort_slid) + } + + /// Get the name of an element (from NamingIndex via UUID lookup) + fn name(&self, elem: Slid) -> String { + let luid = self.structure.get_luid(elem); + if let Some(uuid) = self.universe.get(luid) { + self.naming.display_name(&uuid) + } else { + format!("slid_{}", elem) + } + } + + /// Find elements where a given function points to target + fn find_by_func(&self, func_name: &str, target: Slid) -> Vec { + let Some(&func_id) = self.func_ids.get(func_name) else { + return vec![]; + }; + + // Get the domain sort for this function + let func = &self.meta.theory.signature.functions[func_id]; + let DerivedSort::Base(domain_sort) = &func.domain else { + return vec![]; // Product domains not supported yet + }; + + // Iterate through all elements of the domain sort + let mut results = vec![]; + for elem in self.structure.carriers[*domain_sort].iter() { + let elem = Slid::from_usize(elem as usize); + if self.follow(func_name, elem) == Some(target) { + results.push(elem); + } + } + results + } +} + +/// Reconstruct a DerivedSort from its GeologMeta representation +fn reconstruct_dsort( + reader: &MetaReader, + dsort_elem: Slid, + slid_to_sort_id: &HashMap, +) -> DerivedSort { + // Check if it's a BaseDS (find BaseDS where BaseDS/dsort = dsort_elem) + let base_elems = reader.find_by_func("BaseDS/dsort", dsort_elem); + if !base_elems.is_empty() { + let base_elem = base_elems[0]; + if let Some(srt_elem) = reader.follow("BaseDS/srt", base_elem) + && let Some(&sort_id) = slid_to_sort_id.get(&srt_elem) + { + return DerivedSort::Base(sort_id); + } + } + + // Check if it's a ProdDS + let prod_elems = reader.find_by_func("ProdDS/dsort", dsort_elem); + if !prod_elems.is_empty() { + let prod_elem = prod_elems[0]; + let field_elems = reader.find_by_func("Field/prod", prod_elem); + + let mut fields = vec![]; + for field_elem in field_elems { + let field_name = reader.name(field_elem); + // Recursively reconstruct field type + if let Some(type_dsort) = reader.follow("Field/type", field_elem) { + let field_type = reconstruct_dsort(reader, type_dsort, slid_to_sort_id); + fields.push((field_name, field_type)); + } + } + return DerivedSort::Product(fields); + } + + // Default to unit + DerivedSort::unit() +} + +/// Recursively reconstruct a Term from its GeologMeta representation +fn reconstruct_term_inner( + reader: &MetaReader, + term_elem: Slid, + binder_map: &HashMap, + slid_to_func_id: &HashMap, +) -> Option { + // Check VarT + let var_elems = reader.find_by_func("VarT/term", term_elem); + if !var_elems.is_empty() { + let var_t = var_elems[0]; + if let Some(binder) = reader.follow("VarT/binder", var_t) + && let Some((name, sort)) = binder_map.get(&binder) { + return Some(Term::Var(name.clone(), sort.clone())); + } + return None; + } + + // Check AppT + let app_elems = reader.find_by_func("AppT/term", term_elem); + if !app_elems.is_empty() { + let app_t = app_elems[0]; + if let Some(func_elem) = reader.follow("AppT/func", app_t) + && let Some(&func_id) = slid_to_func_id.get(&func_elem) + && let Some(arg_term) = reader.follow("AppT/arg", app_t) + { + // Recursively reconstruct argument term + if let Some(arg) = reconstruct_term_inner(reader, arg_term, binder_map, slid_to_func_id) { + return Some(Term::App(func_id, Box::new(arg))); + } + } + return None; + } + + // Check ProjT + let proj_elems = reader.find_by_func("ProjT/term", term_elem); + if !proj_elems.is_empty() { + let proj_t = proj_elems[0]; + let field_name = reader + .follow("ProjT/field", proj_t) + .map(|f| reader.name(f)) + .unwrap_or_default(); + if let Some(base_term) = reader.follow("ProjT/base", proj_t) { + // Recursively reconstruct base term + if let Some(base) = reconstruct_term_inner(reader, base_term, binder_map, slid_to_func_id) { + return Some(Term::Project(Box::new(base), field_name)); + } + } + return None; + } + + // Check RecordT + let rec_elems = reader.find_by_func("RecordT/term", term_elem); + if !rec_elems.is_empty() { + let rec_t = rec_elems[0]; + let entry_elems = reader.find_by_func("RecEntry/record", rec_t); + let mut fields = vec![]; + for entry_elem in entry_elems { + let field_name = reader + .follow("RecEntry/field", entry_elem) + .map(|f| reader.name(f)) + .unwrap_or_default(); + if let Some(val_term) = reader.follow("RecEntry/val", entry_elem) { + // Recursively reconstruct value term + if let Some(val) = reconstruct_term_inner(reader, val_term, binder_map, slid_to_func_id) { + fields.push((field_name, val)); + } + } + } + return Some(Term::Record(fields)); + } + + None +} + +/// Recursively reconstruct a Formula from its GeologMeta representation +fn reconstruct_formula_inner( + reader: &MetaReader, + formula_elem: Slid, + binder_map: &HashMap, + slid_to_sort_id: &HashMap, + slid_to_func_id: &HashMap, + slid_to_rel_id: &HashMap, +) -> Option { + // Check TrueF + let true_elems = reader.find_by_func("TrueF/formula", formula_elem); + if !true_elems.is_empty() { + return Some(Formula::True); + } + + // Check FalseF + let false_elems = reader.find_by_func("FalseF/formula", formula_elem); + if !false_elems.is_empty() { + return Some(Formula::False); + } + + // Check EqF + let eq_elems = reader.find_by_func("EqF/formula", formula_elem); + if !eq_elems.is_empty() { + let eq_f = eq_elems[0]; + if let Some(lhs_term) = reader.follow("EqF/lhs", eq_f) + && let Some(rhs_term) = reader.follow("EqF/rhs", eq_f) + && let Some(lhs) = reconstruct_term_inner(reader, lhs_term, binder_map, slid_to_func_id) + && let Some(rhs) = reconstruct_term_inner(reader, rhs_term, binder_map, slid_to_func_id) + { + return Some(Formula::Eq(lhs, rhs)); + } + return None; + } + + // Check RelF + let rel_elems = reader.find_by_func("RelF/formula", formula_elem); + if !rel_elems.is_empty() { + let rel_f = rel_elems[0]; + if let Some(rel_elem) = reader.follow("RelF/rel", rel_f) + && let Some(&rel_id) = slid_to_rel_id.get(&rel_elem) + && let Some(arg_term) = reader.follow("RelF/arg", rel_f) + && let Some(arg) = reconstruct_term_inner(reader, arg_term, binder_map, slid_to_func_id) + { + return Some(Formula::Rel(rel_id, arg)); + } + return None; + } + + // Check ConjF + let conj_elems = reader.find_by_func("ConjF/formula", formula_elem); + if !conj_elems.is_empty() { + let conj_f = conj_elems[0]; + let arm_elems = reader.find_by_func("ConjArm/conj", conj_f); + let mut children = vec![]; + for arm_elem in arm_elems { + if let Some(child_formula) = reader.follow("ConjArm/child", arm_elem) + && let Some(child) = reconstruct_formula_inner( + reader, + child_formula, + binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + children.push(child); + } + } + return Some(Formula::Conj(children)); + } + + // Check DisjF + let disj_elems = reader.find_by_func("DisjF/formula", formula_elem); + if !disj_elems.is_empty() { + let disj_f = disj_elems[0]; + let arm_elems = reader.find_by_func("DisjArm/disj", disj_f); + let mut children = vec![]; + for arm_elem in arm_elems { + if let Some(child_formula) = reader.follow("DisjArm/child", arm_elem) + && let Some(child) = reconstruct_formula_inner( + reader, + child_formula, + binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + children.push(child); + } + } + return Some(Formula::Disj(children)); + } + + // Check ExistsF + let exists_elems = reader.find_by_func("ExistsF/formula", formula_elem); + if !exists_elems.is_empty() { + let exists_f = exists_elems[0]; + // Get the binder for this existential + if let Some(binder_elem) = reader.follow("ExistsF/binder", exists_f) { + let var_name = reader.name(binder_elem); + let var_sort = reader + .follow("Binder/type", binder_elem) + .map(|d| reconstruct_dsort(reader, d, slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + // Create new binder map with this binder + let mut new_binder_map = binder_map.clone(); + new_binder_map.insert(binder_elem, (var_name.clone(), var_sort.clone())); + + // Recursively reconstruct body + if let Some(body_formula) = reader.follow("ExistsF/body", exists_f) + && let Some(body) = reconstruct_formula_inner( + reader, + body_formula, + &new_binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + return Some(Formula::Exists(var_name, var_sort, Box::new(body))); + } + } + return None; + } + + None +} + +/// Convert a GeologMeta Structure back to an ElaboratedTheory +/// +/// This is the reverse of theory_to_structure, used for loading saved theories. +/// Requires Universe and NamingIndex to look up human-readable names. +pub fn structure_to_theory( + structure: &Structure, + universe: &Universe, + naming: &NamingIndex, +) -> Result { + let reader = MetaReader::new(structure, universe, naming); + + // Find the Theory element (assume exactly one for now) + let theory_elems = reader.elements_of_sort("Theory"); + if theory_elems.is_empty() { + return Err("No Theory element found".to_string()); + } + let theory_elem = theory_elems[0]; + let theory_name = reader.name(theory_elem); + + // Build signature + let mut sig = Signature::new(); + + // Reconstruct sorts: find all Srt elements pointing to this theory + let srt_elems = reader.find_by_func("Srt/theory", theory_elem); + let mut slid_to_sort_id: HashMap = HashMap::new(); + + for srt_elem in &srt_elems { + let name = reader.name(*srt_elem); + let sort_id = sig.add_sort(name); + slid_to_sort_id.insert(*srt_elem, sort_id); + } + + // Reconstruct functions (using standalone reconstruct_dsort helper) + let func_elems = reader.find_by_func("Func/theory", theory_elem); + let mut slid_to_func_id: HashMap = HashMap::new(); + + for func_elem in &func_elems { + let name = reader.name(*func_elem); + + let domain = reader + .follow("Func/dom", *func_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let codomain = reader + .follow("Func/cod", *func_elem) + .map(|c| reconstruct_dsort(&reader, c, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let func_id = sig.add_function(name, domain, codomain); + slid_to_func_id.insert(*func_elem, func_id); + } + + // Reconstruct relations + let rel_elems = reader.find_by_func("Rel/theory", theory_elem); + let mut slid_to_rel_id: HashMap = HashMap::new(); + + for rel_elem in &rel_elems { + let name = reader.name(*rel_elem); + + let domain = reader + .follow("Rel/dom", *rel_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let rel_id = sig.add_relation(name, domain); + slid_to_rel_id.insert(*rel_elem, rel_id); + } + + // Reconstruct params + let param_elems = reader.find_by_func("Param/theory", theory_elem); + let mut params = vec![]; + + for param_elem in param_elems { + let name = reader.name(param_elem); + let type_theory = reader + .follow("Param/type", param_elem) + .map(|t| reader.name(t)) + .unwrap_or_default(); + + params.push(TheoryParam { + name, + theory_name: type_theory, + }); + } + + // Reconstruct axioms (sequents) + let sequent_elems = reader.find_by_func("Sequent/theory", theory_elem); + let mut axioms = vec![]; + let mut axiom_names = vec![]; + + for sequent_elem in sequent_elems { + // Collect the axiom name from the sequent element + axiom_names.push(reader.name(sequent_elem)); + // Build binder map: Slid -> (name, DerivedSort) + let mut binder_map: HashMap = HashMap::new(); + + // Get context variables (CtxVar elements for this sequent) + let ctx_var_elems = reader.find_by_func("CtxVar/sequent", sequent_elem); + let mut context_vars = vec![]; + + for ctx_var_elem in ctx_var_elems { + let var_name = reader.name(ctx_var_elem); + if let Some(binder_elem) = reader.follow("CtxVar/binder", ctx_var_elem) { + let var_sort = reader + .follow("Binder/type", binder_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + binder_map.insert(binder_elem, (var_name.clone(), var_sort.clone())); + context_vars.push((var_name, var_sort)); + } + } + + let context = Context { vars: context_vars }; + + // Get premise and conclusion using standalone recursive helpers + let premise = reader + .follow("Sequent/premise", sequent_elem) + .and_then(|f| { + reconstruct_formula_inner( + &reader, + f, + &binder_map, + &slid_to_sort_id, + &slid_to_func_id, + &slid_to_rel_id, + ) + }) + .unwrap_or(Formula::True); + + let conclusion = reader + .follow("Sequent/conclusion", sequent_elem) + .and_then(|f| { + reconstruct_formula_inner( + &reader, + f, + &binder_map, + &slid_to_sort_id, + &slid_to_func_id, + &slid_to_rel_id, + ) + }) + .unwrap_or(Formula::True); + + axioms.push(Sequent { + context, + premise, + conclusion, + }); + } + + Ok(ElaboratedTheory { + params, + theory: crate::core::Theory { + name: theory_name, + signature: sig, + axioms, + axiom_names, + }, + }) +} + +// Unit tests moved to tests/unit_meta.rs diff --git a/src/naming.rs b/src/naming.rs new file mode 100644 index 0000000..880e4a2 --- /dev/null +++ b/src/naming.rs @@ -0,0 +1,355 @@ +//! Global naming index for human-readable names +//! +//! Names are purely a UI concern - all data in structures is identified by UUID. +//! This index maps UUIDs to human-readable names for display and provides +//! reverse lookup for parsing. +//! +//! Following chit's design: "namings are purely a user interface (input/output +//! for humans and large language models)" +//! +//! ## Suffix-based lookup via ReversedPath +//! +//! To efficiently look up names by suffix (e.g., find all `*/A` when given just `A`), +//! we store paths reversed in a BTreeMap. For example: +//! - `["PetriNet", "P"]` is stored as `ReversedPath(["P", "PetriNet"])` +//! - A prefix scan for `["A"]` finds all paths ending in `A` +//! +//! This enables O(log n + k) suffix lookups where k is the number of matches. + +use crate::id::Uuid; +use indexmap::IndexMap; +use memmap2::Mmap; +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Archive, Deserialize, Serialize, check_archived_root}; +use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::Write; +use std::path::PathBuf; + +/// A qualified name path (e.g., ["PetriNet", "P"] for sort P in theory PetriNet) +pub type QualifiedName = Vec; + +/// A path stored with segments reversed for efficient suffix-based lookup. +/// +/// `["PetriNet", "P"]` becomes `ReversedPath(["P", "PetriNet"])`. +/// This allows BTreeMap range queries to find all paths with a given suffix. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReversedPath(Vec); + +impl ReversedPath { + /// Create a reversed path from a qualified name. + pub fn from_qualified(segments: &[String]) -> Self { + Self(segments.iter().rev().cloned().collect()) + } + + /// Convert back to a qualified name (forward order). + pub fn to_qualified(&self) -> QualifiedName { + self.0.iter().rev().cloned().collect() + } + + /// Create a prefix for range queries (just the suffix segments, reversed). + /// For looking up all paths ending in `["A"]`, create `ReversedPath(["A"])`. + pub fn from_suffix(suffix: &[String]) -> Self { + // Suffix is already in forward order, just reverse it + Self(suffix.iter().rev().cloned().collect()) + } + + /// Check if this path starts with the given prefix (for range iteration). + pub fn starts_with(&self, prefix: &ReversedPath) -> bool { + self.0.len() >= prefix.0.len() && self.0[..prefix.0.len()] == prefix.0[..] + } + + /// Get the inner segments (reversed order). + pub fn segments(&self) -> &[String] { + &self.0 + } +} + +/// Serializable form of the naming index +#[derive(Archive, Deserialize, Serialize, Default)] +#[archive(check_bytes)] +struct NamingData { + /// UUID → qualified name mapping + entries: Vec<(Uuid, QualifiedName)>, +} + +/// Global naming index +/// +/// Provides bidirectional mapping between UUIDs and human-readable names. +/// Names are qualified paths like ["PetriNet", "P"] for sort P in theory PetriNet. +/// +/// ## Lookup modes +/// - **By UUID**: O(1) via `uuid_to_name` +/// - **By exact path**: O(log n) via `path_to_uuid` +/// - **By suffix**: O(log n + k) via BTreeMap range query on reversed paths +#[derive(Debug, Default)] +pub struct NamingIndex { + /// UUID → qualified name (for display) + uuid_to_name: IndexMap, + /// Reversed path → UUIDs (for suffix-based lookup) + /// Paths are stored reversed so that suffix queries become prefix scans. + /// Multiple UUIDs can share the same path (ambiguous names). + path_to_uuid: BTreeMap>, + /// Persistence path + path: Option, + /// Dirty flag + dirty: bool, +} + +impl NamingIndex { + /// Create a new empty naming index + pub fn new() -> Self { + Self::default() + } + + /// Create a naming index with a persistence path + pub fn with_path(path: impl Into) -> Self { + Self { + uuid_to_name: IndexMap::new(), + path_to_uuid: BTreeMap::new(), + path: Some(path.into()), + dirty: false, + } + } + + /// Load a naming index from disk + pub fn load(path: impl Into) -> Result { + let path = path.into(); + + if !path.exists() { + return Ok(Self::with_path(path)); + } + + let file = File::open(&path).map_err(|e| format!("Failed to open naming index: {}", e))?; + + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap naming index: {}", e))?; + + if mmap.is_empty() { + return Ok(Self::with_path(path)); + } + + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate naming index: {}", e))?; + + let data: NamingData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize naming index")?; + + let mut index = Self::with_path(path); + for (uuid, name) in data.entries { + index.insert_internal(uuid, name); + } + + Ok(index) + } + + /// Save the naming index to disk + pub fn save(&mut self) -> Result<(), String> { + let path = self + .path + .as_ref() + .ok_or("Naming index has no persistence path")?; + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create naming directory: {}", e))?; + } + + let data = NamingData { + entries: self + .uuid_to_name + .iter() + .map(|(k, v)| (*k, v.clone())) + .collect(), + }; + + let mut serializer = AllocSerializer::<4096>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize naming index: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let temp_path = path.with_extension("tmp"); + { + let mut file = File::create(&temp_path) + .map_err(|e| format!("Failed to create temp file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write naming index: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync naming index: {}", e))?; + } + + fs::rename(&temp_path, path) + .map_err(|e| format!("Failed to rename naming index: {}", e))?; + + self.dirty = false; + Ok(()) + } + + /// Internal insert without setting dirty flag + fn insert_internal(&mut self, uuid: Uuid, name: QualifiedName) { + // Add to reverse index (reversed path → UUIDs) + let reversed = ReversedPath::from_qualified(&name); + self.path_to_uuid + .entry(reversed) + .or_default() + .push(uuid); + self.uuid_to_name.insert(uuid, name); + } + + /// Register a name for a UUID + pub fn insert(&mut self, uuid: Uuid, name: QualifiedName) { + self.insert_internal(uuid, name); + self.dirty = true; + } + + /// Register a simple (unqualified) name for a UUID + pub fn insert_simple(&mut self, uuid: Uuid, name: String) { + self.insert(uuid, vec![name]); + } + + /// Get the qualified name for a UUID + pub fn get(&self, uuid: &Uuid) -> Option<&QualifiedName> { + self.uuid_to_name.get(uuid) + } + + /// Get the simple (last component) name for a UUID + pub fn get_simple(&self, uuid: &Uuid) -> Option<&str> { + self.uuid_to_name + .get(uuid) + .and_then(|name| name.last()) + .map(|s| s.as_str()) + } + + /// Get the display name for a UUID (simple name, or UUID if unnamed) + pub fn display_name(&self, uuid: &Uuid) -> String { + self.get_simple(uuid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("{}", uuid)) + } + + /// Look up all UUIDs whose qualified name ends with the given suffix. + /// + /// Examples: + /// - `lookup_suffix(&["A"])` returns UUIDs for "ExampleNet/A", "OtherNet/A", etc. + /// - `lookup_suffix(&["ExampleNet", "A"])` returns just "ExampleNet/A" + /// + /// Returns an iterator over matching UUIDs. + pub fn lookup_suffix<'a>(&'a self, suffix: &[String]) -> impl Iterator + 'a { + let prefix = ReversedPath::from_suffix(suffix); + self.path_to_uuid + .range(prefix.clone()..) + .take_while(move |(k, _)| k.starts_with(&prefix)) + .flat_map(|(_, uuids)| uuids.iter().copied()) + } + + /// Look up UUID by exact qualified path. + /// Returns None if ambiguous (multiple UUIDs share the exact path). + pub fn lookup_exact(&self, path: &[String]) -> Option { + let reversed = ReversedPath::from_qualified(path); + match self.path_to_uuid.get(&reversed) { + Some(uuids) if uuids.len() == 1 => Some(uuids[0]), + _ => None, + } + } + + /// Resolve a path to a UUID. + /// - If exact match exists, return it. + /// - If suffix matches exactly one UUID, return it. + /// - Otherwise return Err with all candidates (empty if not found, multiple if ambiguous). + pub fn resolve(&self, path: &[String]) -> Result> { + // First try exact match + if let Some(uuid) = self.lookup_exact(path) { + return Ok(uuid); + } + + // Fall back to suffix match + let candidates: Vec = self.lookup_suffix(path).collect(); + match candidates.len() { + 1 => Ok(candidates[0]), + _ => Err(candidates), + } + } + + /// Look up UUIDs by simple (single-segment) name. + /// This is a convenience wrapper around `lookup_suffix` for single names. + pub fn lookup(&self, name: &str) -> Vec { + self.lookup_suffix(&[name.to_string()]).collect() + } + + /// Look up a unique UUID by simple name (returns None if ambiguous or not found) + pub fn lookup_unique(&self, name: &str) -> Option { + let results: Vec = self.lookup_suffix(&[name.to_string()]).collect(); + if results.len() == 1 { + Some(results[0]) + } else { + None + } + } + + /// Check if dirty + pub fn is_dirty(&self) -> bool { + self.dirty + } + + /// Number of entries + pub fn len(&self) -> usize { + self.uuid_to_name.len() + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.uuid_to_name.is_empty() + } + + /// Iterate over all (UUID, name) pairs + pub fn iter(&self) -> impl Iterator { + self.uuid_to_name.iter() + } +} + +impl Drop for NamingIndex { + fn drop(&mut self) { + if self.dirty && self.path.is_some() { + let _ = self.save(); + } + } +} + +/// Get the global naming index path +pub fn global_naming_path() -> Option { + #[cfg(unix)] + { + std::env::var("HOME").ok().map(|h| { + let mut p = PathBuf::from(h); + p.push(".config"); + p.push("geolog"); + p.push("names.bin"); + p + }) + } + #[cfg(windows)] + { + std::env::var("APPDATA").ok().map(|mut p| { + p.push("geolog"); + p.push("names.bin"); + p + }) + } + #[cfg(not(any(unix, windows)))] + { + None + } +} + +/// Load or create the global naming index +pub fn global_naming_index() -> NamingIndex { + match global_naming_path() { + Some(path) => NamingIndex::load(&path).unwrap_or_else(|_| NamingIndex::with_path(path)), + None => NamingIndex::new(), + } +} + +// Unit tests moved to tests/proptest_naming.rs diff --git a/src/overlay.rs b/src/overlay.rs new file mode 100644 index 0000000..8c6eae7 --- /dev/null +++ b/src/overlay.rs @@ -0,0 +1,574 @@ +//! Overlay structures: patch-on-write semantics for efficient mutations. +//! +//! Instead of copying a structure to mutate it, we layer changes on top of an +//! immutable base. The base is memory-mapped (zero-copy), and mutations accumulate +//! in a thin delta layer. Cost of mutation is O(Δ), never O(base). +//! +//! # Architecture +//! +//! ```text +//! ┌────────────────────────────────────────────────────────┐ +//! │ MappedStructure (immutable, mmap'd, potentially huge) │ +//! └────────────────────────────────────────────────────────┘ +//! ↑ read fallthrough +//! ┌────────────────────────────────────────────────────────┐ +//! │ StructureDelta (tiny: just the changes) │ +//! └────────────────────────────────────────────────────────┘ +//! ``` +//! +//! # Slid Addressing +//! +//! Base elements have Slids `0..base_len`. New overlay elements get Slids +//! `base_len..base_len+delta_len`, so the address space is contiguous. +//! +//! # Usage +//! +//! ```ignore +//! // Load base (fast, zero-copy) +//! let base = MappedStructure::open(path)?; +//! +//! // Create overlay for mutations +//! let mut overlay = OverlayStructure::new(Arc::new(base)); +//! +//! // Mutate (changes go to delta) +//! let elem = overlay.add_element(luid, sort_id); +//! overlay.assert_relation(rel_id, vec![elem, other]); +//! +//! // Read (checks delta first, falls back to base) +//! let sort = overlay.get_sort(elem); +//! +//! // Commit (materialize to new immutable structure) +//! let new_base = overlay.commit(new_path)?; +//! +//! // Or rollback (instant - just clears delta) +//! overlay.rollback(); +//! ``` + +use std::collections::{BTreeSet, HashMap}; +use std::path::Path; +use std::sync::Arc; + +use crate::core::{SortId, Structure}; +use crate::id::{Luid, NumericId, Slid}; +use crate::serialize::save_structure; +use crate::zerocopy::{MappedRelation, MappedStructure}; + +// ============================================================================ +// DELTA TYPES +// ============================================================================ + +/// A delta/patch representing changes to a structure. +/// +/// This is the runtime-efficient analog of `Patch` (which uses UUIDs for +/// persistence). `StructureDelta` uses Slids for fast in-memory operations. +#[derive(Clone, Debug, Default)] +pub struct StructureDelta { + /// New elements: (Luid, SortId). Slids start at base.len(). + pub new_elements: Vec<(Luid, SortId)>, + + /// Per-relation deltas (indexed by rel_id) + pub relations: Vec, + + /// Per-function deltas (indexed by func_id) + pub functions: Vec, +} + +impl StructureDelta { + /// Create a new empty delta with the given number of relations and functions. + pub fn new(num_relations: usize, num_functions: usize) -> Self { + Self { + new_elements: Vec::new(), + relations: vec![RelationDelta::default(); num_relations], + functions: vec![FunctionDelta::default(); num_functions], + } + } + + /// Check if the delta is empty (no changes). + pub fn is_empty(&self) -> bool { + self.new_elements.is_empty() + && self.relations.iter().all(|r| r.is_empty()) + && self.functions.iter().all(|f| f.is_empty()) + } +} + +/// Delta for a single relation: assertions and retractions. +#[derive(Clone, Debug, Default)] +pub struct RelationDelta { + /// New tuples to assert (by content) + pub assertions: BTreeSet>, + + /// Tuples to retract (by content, not by ID) + pub retractions: BTreeSet>, +} + +impl RelationDelta { + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.assertions.is_empty() && self.retractions.is_empty() + } +} + +/// Delta for a single function: updated mappings. +#[derive(Clone, Debug, Default)] +pub struct FunctionDelta { + /// Updated mappings: domain Slid -> codomain Slid. + /// Only supports local functions in this version. + pub updates: HashMap, +} + +impl FunctionDelta { + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.updates.is_empty() + } +} + +// ============================================================================ +// OVERLAY STRUCTURE +// ============================================================================ + +/// A mutable overlay on top of an immutable base structure. +/// +/// All reads check the delta first, then fall back to the base. +/// All writes go to the delta. The base is never modified. +pub struct OverlayStructure { + /// The immutable base (memory-mapped, zero-copy) + base: Arc, + + /// Accumulated changes + delta: StructureDelta, +} + +impl OverlayStructure { + /// Create a new overlay on top of a base structure. + pub fn new(base: Arc) -> Self { + let num_relations = base.num_relations(); + let num_functions = base.num_functions(); + Self { + base, + delta: StructureDelta::new(num_relations, num_functions), + } + } + + /// Get the immutable base. + pub fn base(&self) -> &MappedStructure { + &self.base + } + + /// Get the accumulated delta. + pub fn delta(&self) -> &StructureDelta { + &self.delta + } + + /// Check if clean (no changes from base). + pub fn is_clean(&self) -> bool { + self.delta.is_empty() + } + + /// Discard all changes, returning to base state. + pub fn rollback(&mut self) { + self.delta = StructureDelta::new( + self.base.num_relations(), + self.base.num_functions(), + ); + } + + // ======================================================================== + // ELEMENT OPERATIONS + // ======================================================================== + + /// Total number of elements (base + overlay). + pub fn len(&self) -> usize { + self.base.len() + self.delta.new_elements.len() + } + + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Number of sorts. + pub fn num_sorts(&self) -> usize { + self.base.num_sorts() + } + + /// Number of relations. + pub fn num_relations(&self) -> usize { + self.base.num_relations() + } + + /// Number of functions. + pub fn num_functions(&self) -> usize { + self.base.num_functions() + } + + /// Add a new element. Returns its Slid (starts at base.len()). + pub fn add_element(&mut self, luid: Luid, sort_id: SortId) -> Slid { + let slid = Slid::from_usize(self.base.len() + self.delta.new_elements.len()); + self.delta.new_elements.push((luid, sort_id)); + slid + } + + /// Get the Luid for an element. + pub fn get_luid(&self, slid: Slid) -> Option { + let idx = slid.index(); + let base_len = self.base.len(); + if idx < base_len { + self.base.get_luid(slid) + } else { + self.delta + .new_elements + .get(idx - base_len) + .map(|(luid, _)| *luid) + } + } + + /// Get the sort for an element. + pub fn get_sort(&self, slid: Slid) -> Option { + let idx = slid.index(); + let base_len = self.base.len(); + if idx < base_len { + self.base.get_sort(slid) + } else { + self.delta + .new_elements + .get(idx - base_len) + .map(|(_, sort)| *sort) + } + } + + /// Iterate over all elements (base + overlay). + pub fn elements(&self) -> impl Iterator + '_ { + let base_iter = self.base.elements(); + let base_len = self.base.len(); + let overlay_iter = self + .delta + .new_elements + .iter() + .enumerate() + .map(move |(i, (luid, sort))| { + (Slid::from_usize(base_len + i), *luid, *sort) + }); + base_iter.chain(overlay_iter) + } + + /// Iterate over elements of a specific sort. + pub fn elements_of_sort(&self, sort_id: SortId) -> impl Iterator + '_ { + let base_iter = self.base.elements_of_sort(sort_id); + let base_len = self.base.len(); + let overlay_iter = self + .delta + .new_elements + .iter() + .enumerate() + .filter(move |(_, (_, s))| *s == sort_id) + .map(move |(i, _)| Slid::from_usize(base_len + i)); + base_iter.chain(overlay_iter) + } + + // ======================================================================== + // RELATION OPERATIONS + // ======================================================================== + + /// Assert a relation tuple. + pub fn assert_relation(&mut self, rel_id: usize, tuple: Vec) { + // If this tuple was previously retracted, un-retract it + self.delta.relations[rel_id].retractions.remove(&tuple); + // Add to assertions + self.delta.relations[rel_id].assertions.insert(tuple); + } + + /// Retract a relation tuple (by content). + pub fn retract_relation(&mut self, rel_id: usize, tuple: Vec) { + // If this tuple was asserted in the overlay, just remove it + if self.delta.relations[rel_id].assertions.remove(&tuple) { + return; + } + // Otherwise, mark it as retracted from base + self.delta.relations[rel_id].retractions.insert(tuple); + } + + /// Get an overlay view of a relation. + pub fn relation(&self, rel_id: usize) -> Option> { + let base_rel = self.base.relation(rel_id)?; + let delta = self.delta.relations.get(rel_id)?; + Some(OverlayRelation { + base: base_rel, + delta, + }) + } + + // ======================================================================== + // FUNCTION OPERATIONS + // ======================================================================== + + /// Set a function value. + pub fn set_function(&mut self, func_id: usize, domain: Slid, value: Slid) { + self.delta.functions[func_id].updates.insert(domain, value); + } + + /// Get a function value. + pub fn get_function(&self, func_id: usize, domain: Slid) -> Option { + // Check delta first + if let Some(&value) = self.delta.functions[func_id].updates.get(&domain) { + return Some(value); + } + // Fall back to base (only for base elements) + if domain.index() < self.base.len() { + // Need to convert Slid to sort-local index for base lookup + // This requires knowing the sort of the domain element + if let Some(sort_id) = self.base.get_sort(domain) { + // Count how many elements of this sort come before this one + let sort_local_idx = self + .base + .elements_of_sort(sort_id) + .take_while(|&s| s.index() < domain.index()) + .count(); + return self.base.function(func_id)?.get_local(sort_local_idx); + } + } + None + } + + // ======================================================================== + // COMMIT / MATERIALIZE + // ======================================================================== + + /// Materialize the overlay into an owned Structure. + /// + /// This combines the base and delta into a single Structure that can be + /// saved to disk. + pub fn materialize(&self) -> Structure { + // Start with a fresh structure + let mut structure = Structure::new(self.num_sorts()); + + // Copy base elements (we need to create them fresh since Structure wants to own them) + // For now, we'll iterate and add. In production, we'd want a more efficient bulk copy. + let mut slid_map: HashMap = HashMap::new(); + + // We need a universe to add elements, but we're materializing so we'll + // reuse the Luids from the overlay. Create elements with existing Luids. + for (old_slid, luid, sort_id) in self.elements() { + let new_slid = structure.add_element_with_luid(luid, sort_id); + slid_map.insert(old_slid, new_slid); + } + + // Initialize relations with correct arities + let arities: Vec = (0..self.num_relations()) + .map(|rel_id| { + self.base + .relation(rel_id) + .map(|r| r.arity()) + .unwrap_or(0) + }) + .collect(); + structure.init_relations(&arities); + + // Copy relation tuples (applying the slid remapping) + for rel_id in 0..self.num_relations() { + if let Some(rel) = self.relation(rel_id) { + for tuple in rel.live_tuples() { + let remapped: Vec = tuple + .iter() + .map(|&old_slid| slid_map.get(&old_slid).copied().unwrap_or(old_slid)) + .collect(); + structure.assert_relation(rel_id, remapped); + } + } + } + + // TODO: Copy functions (more complex, skip for now) + + structure + } + + /// Commit the overlay: materialize and save to a new file, returning the new MappedStructure. + pub fn commit(&self, path: &Path) -> Result { + let structure = self.materialize(); + save_structure(&structure, path)?; + MappedStructure::open(path) + } +} + +// ============================================================================ +// OVERLAY RELATION VIEW +// ============================================================================ + +/// A read-only view of a relation through an overlay. +pub struct OverlayRelation<'a> { + base: MappedRelation<'a>, + delta: &'a RelationDelta, +} + +impl<'a> OverlayRelation<'a> { + /// Relation arity. + pub fn arity(&self) -> usize { + self.base.arity() + } + + /// Approximate count of live tuples. + /// + /// This is approximate because checking retractions against base tuples + /// would require iterating. For exact count, iterate `live_tuples()`. + pub fn live_count_approx(&self) -> usize { + // Base count + assertions - retractions (approximate) + self.base.live_count() + self.delta.assertions.len() + - self.delta.retractions.len().min(self.base.live_count()) + } + + /// Check if a tuple is live (in base or assertions, not retracted). + pub fn contains(&self, tuple: &[Slid]) -> bool { + // Check if retracted + if self.delta.retractions.contains(tuple) { + return false; + } + // Check assertions + if self.delta.assertions.contains(tuple) { + return true; + } + // Check base - need to iterate base tuples to check + // This is O(n) which is unfortunate, but we don't have a hash index + for base_tuple in self.base.live_tuples() { + let base_vec: Vec = base_tuple.collect(); + if base_vec.as_slice() == tuple { + return true; + } + } + false + } + + /// Iterate over live tuples (base filtered by retractions, plus assertions). + /// + /// Returns tuples as `Vec` for simplicity. Each vec is one tuple. + pub fn live_tuples(&self) -> impl Iterator> + '_ { + // Collect base tuples, filtering out retracted ones + let base_filtered = self + .base + .live_tuples() + .map(|t| t.collect::>()) + .filter(|tuple| !self.delta.retractions.contains(tuple)); + + // Chain with assertions + let assertions = self.delta.assertions.iter().cloned(); + + base_filtered.chain(assertions) + } +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::universe::Universe; + use crate::serialize::save_structure; + use tempfile::tempdir; + + #[test] + fn test_overlay_add_elements() { + let dir = tempdir().unwrap(); + let path = dir.path().join("base.structure"); + + // Create and save a base structure + let mut universe = Universe::new(); + let mut base_structure = Structure::new(2); + let (a, _) = base_structure.add_element(&mut universe, 0); + let (b, _) = base_structure.add_element(&mut universe, 1); + save_structure(&base_structure, &path).unwrap(); + + // Load as mapped and create overlay + let mapped = MappedStructure::open(&path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + assert_eq!(overlay.len(), 2); + assert!(overlay.is_clean()); + + // Add elements through overlay + let luid_c = universe.intern(crate::id::Uuid::now_v7()); + let c = overlay.add_element(luid_c, 0); + + assert_eq!(overlay.len(), 3); + assert!(!overlay.is_clean()); + assert_eq!(c.index(), 2); // New element gets Slid after base + + // Check element lookups + assert_eq!(overlay.get_sort(a), Some(0)); + assert_eq!(overlay.get_sort(b), Some(1)); + assert_eq!(overlay.get_sort(c), Some(0)); + + // Rollback + overlay.rollback(); + assert_eq!(overlay.len(), 2); + assert!(overlay.is_clean()); + } + + #[test] + fn test_overlay_relations() { + let dir = tempdir().unwrap(); + let path = dir.path().join("base.structure"); + + // Create base with a relation + let mut universe = Universe::new(); + let mut base_structure = Structure::new(1); + let (a, _) = base_structure.add_element(&mut universe, 0); + let (b, _) = base_structure.add_element(&mut universe, 0); + base_structure.init_relations(&[2]); // binary relation + base_structure.assert_relation(0, vec![a, b]); + save_structure(&base_structure, &path).unwrap(); + + // Load and overlay + let mapped = MappedStructure::open(&path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Check base relation + let rel = overlay.relation(0).unwrap(); + assert_eq!(rel.arity(), 2); + assert!(rel.contains(&[a, b])); + assert!(!rel.contains(&[b, a])); + + // Assert new tuple + overlay.assert_relation(0, vec![b, a]); + let rel = overlay.relation(0).unwrap(); + assert!(rel.contains(&[a, b])); + assert!(rel.contains(&[b, a])); + + // Retract original tuple + overlay.retract_relation(0, vec![a, b]); + let rel = overlay.relation(0).unwrap(); + assert!(!rel.contains(&[a, b])); + assert!(rel.contains(&[b, a])); + } + + #[test] + fn test_overlay_materialize() { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let new_path = dir.path().join("new.structure"); + + // Create base + let mut universe = Universe::new(); + let mut base_structure = Structure::new(1); + let (a, _) = base_structure.add_element(&mut universe, 0); + base_structure.init_relations(&[1]); // unary relation + base_structure.assert_relation(0, vec![a]); + save_structure(&base_structure, &base_path).unwrap(); + + // Load, modify, commit + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + let luid_b = universe.intern(crate::id::Uuid::now_v7()); + let b = overlay.add_element(luid_b, 0); + overlay.assert_relation(0, vec![b]); + + let new_mapped = overlay.commit(&new_path).unwrap(); + + // Verify new structure + assert_eq!(new_mapped.len(), 2); + assert_eq!(new_mapped.num_relations(), 1); + let rel = new_mapped.relation(0).unwrap(); + assert_eq!(rel.live_count(), 2); + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..b540ff2 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,761 @@ +//! Parser for Geolog +//! +//! Parses token streams into AST. + +use chumsky::prelude::*; + +use crate::ast::*; +use crate::lexer::{Span, Token}; + +/// Create a parser for a complete Geolog file +pub fn parser() -> impl Parser> + Clone { + declaration() + .map_with_span(|decl, span| Spanned::new(decl, to_span(span))) + .repeated() + .then_ignore(end()) + .map(|declarations| File { declarations }) +} + +fn to_span(span: Span) -> crate::ast::Span { + crate::ast::Span::new(span.start, span.end) +} + +/// Assign positional names ("0", "1", ...) to unnamed fields in a record +/// Only unnamed fields consume positional indices, so named fields can be reordered freely: +/// `[a, on: b, c]` → `[("0", a), ("on", b), ("1", c)]` +/// `[on: b, a, c]` → `[("on", b), ("0", a), ("1", c)]` +/// +/// Returns Err with the duplicate field name if duplicates are found. +fn assign_positional_names_checked( + fields: Vec<(Option, T)>, +) -> Result, String> { + let mut positional_idx = 0usize; + let mut seen = std::collections::HashSet::new(); + let mut result = Vec::with_capacity(fields.len()); + + for (name, val) in fields { + let field_name = match name { + Some(n) => n, + None => { + let n = positional_idx.to_string(); + positional_idx += 1; + n + } + }; + + if !seen.insert(field_name.clone()) { + return Err(field_name); + } + result.push((field_name, val)); + } + + Ok(result) +} + +// ============================================================================ +// Helpers +// ============================================================================ + +fn ident() -> impl Parser> + Clone { + select! { + Token::Ident(s) => s, + // Allow keywords to be used as identifiers (e.g., in paths like ax/child/exists) + Token::Namespace => "namespace".to_string(), + Token::Theory => "theory".to_string(), + Token::Instance => "instance".to_string(), + Token::Query => "query".to_string(), + Token::Sort => "Sort".to_string(), + Token::Prop => "Prop".to_string(), + Token::Forall => "forall".to_string(), + Token::Exists => "exists".to_string(), + } +} + +/// Parse a path: `foo` or `foo/bar/baz` +/// Uses `/` for namespace qualification +fn path() -> impl Parser> + Clone { + ident() + .separated_by(just(Token::Slash)) + .at_least(1) + .map(|segments| Path { segments }) +} + +// ============================================================================ +// Types (Concatenative Stack-Based Parsing) +// ============================================================================ + +/// Parse a full type expression with arrows (concatenative style) +/// +/// `A B -> C D -> E` becomes tokens: [A, B, C, D, E, Arrow, Arrow] +/// which evaluates right-to-left: A B -> (C D -> E) +/// +/// Uses a single recursive() to handle mutual recursion between type expressions +/// (for parentheses and record fields) and atomic type tokens. +fn type_expr_impl() -> impl Parser> + Clone { + recursive(|type_expr_rec| { + // === Atomic type tokens (non-recursive) === + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + let instance = just(Token::Instance).to(TypeToken::Instance); + let path_tok = path().map(TypeToken::Path); + + // Record type: [field: Type, ...] or [Type, ...] or mixed + // Named field: `name: Type` + let named_type_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_rec.clone()) + .map(|(name, ty)| (Some(name), ty)); + // Positional field: `Type` + let positional_type_field = type_expr_rec.clone().map(|ty| (None, ty)); + let record_field = choice((named_type_field, positional_type_field)); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(TypeToken::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Single atomic token + let single_token = choice((sort, prop, instance, record, path_tok)).map(|t| vec![t]); + + // Parenthesized expression - flatten tokens into parent sequence + let paren_expr = type_expr_rec + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr: TypeExpr| expr.tokens); + + // A "chunk item" is either a paren group or a single token + let chunk_item = choice((paren_expr, single_token)); + + // A "chunk" is one or more items (before an arrow or end) + let chunk = chunk_item + .repeated() + .at_least(1) + .map(|items: Vec>| items.into_iter().flatten().collect::>()); + + // Full type expression: chunks separated by arrows + chunk + .separated_by(just(Token::Arrow)) + .at_least(1) + .map(|chunks: Vec>| { + // For right-associative arrows: + // chunks: [[A, B], [C, D], [E]] + // result: [A, B, C, D, E, Arrow, Arrow] + // + // The evaluator processes Arrow tokens right-to-left: + // Stack after all tokens pushed: [A, B, C, D, E] + // Arrow 1: pop C,D -> push Arrow{C,D} -> [A, B, Arrow{C,D}, E] + // Wait, that's not right either... + // + // Actually the order should be: + // [A, B, Arrow, C, D, Arrow, E] for left-to-right application + // But we want (A B) -> ((C D) -> E) for right-associative + // + // For postfix arrows: + // [A, B, C, D, E, Arrow, Arrow] means: + // - Push A, B, C, D, E + // - Arrow: pop E, pop D -> push Arrow{D,E} + // - Arrow: pop Arrow{D,E}, pop C -> push Arrow{C, Arrow{D,E}} + // Hmm, this also doesn't work well for multi-token chunks. + // + // Actually, let's just flatten all and append arrows. + // The evaluator will be responsible for parsing chunks correctly. + + let num_arrows = chunks.len() - 1; + let mut tokens: Vec = chunks.into_iter().flatten().collect(); + + // Add Arrow tokens at end + for _ in 0..num_arrows { + tokens.push(TypeToken::Arrow); + } + + TypeExpr { tokens } + }) + }) +} + +/// Parse a type expression (full, with arrows) +fn type_expr() -> impl Parser> + Clone { + type_expr_impl() +} + +/// Parse a type expression without top-level arrows (for function domain position) +/// +/// This parses a single "chunk" - type tokens without arrows at the top level. +/// Used for places like function domain where we don't want `A -> B` to be ambiguous. +fn type_expr_no_arrow() -> impl Parser> + Clone { + recursive(|_type_expr_rec| { + // Atomic type tokens + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + let instance = just(Token::Instance).to(TypeToken::Instance); + let path_tok = path().map(TypeToken::Path); + + // Record type: [field: Type, ...] or [Type, ...] or mixed + // Named field: `name: Type` + let named_type_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_impl()) + .map(|(name, ty)| (Some(name), ty)); + // Positional field: `Type` + let positional_type_field = type_expr_impl().map(|ty| (None, ty)); + let record_field = choice((named_type_field, positional_type_field)); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(TypeToken::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Single atomic token + let single_token = choice((sort, prop, instance, record, path_tok)).map(|t| vec![t]); + + // Parenthesized expression - can contain full type expr with arrows + let paren_expr = type_expr_impl() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr: TypeExpr| expr.tokens); + + // A "chunk item" is either a paren group or a single token + let chunk_item = choice((paren_expr, single_token)); + + // One or more items, no arrows + chunk_item + .repeated() + .at_least(1) + .map(|items: Vec>| { + TypeExpr { + tokens: items.into_iter().flatten().collect(), + } + }) + }) +} + +// ============================================================================ +// Terms +// ============================================================================ + +fn term() -> impl Parser> + Clone { + recursive(|term| { + let path_term = path().map(Term::Path); + + // Record literal: [field: term, ...] or [term, ...] or mixed + // Named field: `name: value` + // Positional field: `value` (gets name "0", "1", etc.) + let named_field = ident() + .then_ignore(just(Token::Colon)) + .then(term.clone()) + .map(|(name, val)| (Some(name), val)); + let positional_field = term.clone().map(|val| (None, val)); + let record_field = choice((named_field, positional_field)); + + let record_term = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(Term::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Parenthesized term + let paren_term = term + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); + + let atom = choice((record_term, paren_term, path_term)); + + // Postfix operations: + // - Application (juxtaposition): `w W/src` means "apply W/src to w" + // - Field projection: `.field` projects a field from a record + atom.clone() + .then( + choice(( + // Field projection: .field + just(Token::Dot) + .ignore_then(ident()) + .map(TermPostfix::Project), + // Application: another atom + atom.clone().map(TermPostfix::App), + )) + .repeated(), + ) + .foldl(|acc, op| match op { + TermPostfix::Project(field) => Term::Project(Box::new(acc), field), + TermPostfix::App(arg) => Term::App(Box::new(acc), Box::new(arg)), + }) + }) +} + +#[derive(Clone)] +enum TermPostfix { + Project(String), + App(Term), +} + +/// Parse a record term specifically: [field: term, ...] or [term, ...] or mixed +/// Used for relation assertions where we need a standalone record parser. +fn record_term() -> impl Parser> + Clone { + recursive(|rec_term| { + let path_term = path().map(Term::Path); + let inner_term = choice((rec_term.clone(), path_term.clone())); + + // Named field: `name: value` + let named_field = ident() + .then_ignore(just(Token::Colon)) + .then(inner_term.clone()) + .map(|(name, val)| (Some(name), val)); + // Positional field: `value` + let positional_field = inner_term.map(|val| (None, val)); + let record_field = choice((named_field, positional_field)); + + record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(Term::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }) + }) +} + +// ============================================================================ +// Formulas +// ============================================================================ + +fn formula() -> impl Parser> + Clone { + recursive(|formula| { + let quantified_var = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(names, ty)| QuantifiedVar { names, ty }); + + // Existential: exists x : T. phi1, phi2, ... + // The body is a conjunction of formulas (comma-separated). + // An empty body (exists x : X.) is interpreted as True. + // This is standard geometric logic syntax. + let exists = just(Token::Exists) + .ignore_then( + quantified_var + .clone() + .separated_by(just(Token::Comma)) + .at_least(1), + ) + .then_ignore(just(Token::Dot)) + .then(formula.clone().separated_by(just(Token::Comma))) + .map(|(vars, body_conjuncts)| { + let body = match body_conjuncts.len() { + 0 => Formula::True, + 1 => body_conjuncts.into_iter().next().unwrap(), + _ => Formula::And(body_conjuncts), + }; + Formula::Exists(vars, Box::new(body)) + }); + + // Parenthesized formula + let paren_formula = formula + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); + + // Term-based formulas: either equality (term = term) or relation application (term rel) + // Since term() greedily parses `base rel` as App(base, Path(rel)), + // we detect that pattern when not followed by `=` and convert to RelApp + let term_based = term() + .then(just(Token::Eq).ignore_then(term()).or_not()) + .try_map(|(t, opt_rhs), span| { + match opt_rhs { + Some(rhs) => Ok(Formula::Eq(t, rhs)), + None => { + // Not equality - check for relation application pattern: term rel + match t { + Term::App(base, rel_term) => { + match *rel_term { + Term::Path(path) if path.segments.len() == 1 => { + Ok(Formula::RelApp(path.segments[0].clone(), *base)) + } + _ => Err(Simple::custom(span, "expected relation name (single identifier)")) + } + } + _ => Err(Simple::custom(span, "expected relation application (term rel) or equality (term = term)")) + } + } + } + }); + + // Literals + let true_lit = just(Token::True).to(Formula::True); + let false_lit = just(Token::False).to(Formula::False); + + let atom = choice((true_lit, false_lit, exists, paren_formula, term_based)); + + // Conjunction: phi /\ psi (binds tighter than disjunction) + let conjunction = atom + .clone() + .then(just(Token::And).ignore_then(atom.clone()).repeated()) + .foldl(|a, b| { + // Flatten into a single And with multiple conjuncts + match a { + Formula::And(mut conjuncts) => { + conjuncts.push(b); + Formula::And(conjuncts) + } + _ => Formula::And(vec![a, b]), + } + }); + + // Disjunction: phi \/ psi + conjunction + .clone() + .then(just(Token::Or).ignore_then(conjunction.clone()).repeated()) + .foldl(|a, b| { + // Flatten into a single Or with multiple disjuncts + match a { + Formula::Or(mut disjuncts) => { + disjuncts.push(b); + Formula::Or(disjuncts) + } + _ => Formula::Or(vec![a, b]), + } + }) + }) +} + +// ============================================================================ +// Axioms +// ============================================================================ + +fn axiom_decl() -> impl Parser> + Clone { + let quantified_var = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(names, ty)| QuantifiedVar { names, ty }); + + // Allow empty quantifier list: `forall .` means no universally quantified variables + // This is useful for "unconditional" axioms like `forall . |- exists x : X. ...` + let quantified_vars = just(Token::Forall) + .ignore_then(quantified_var.separated_by(just(Token::Comma))) + .then_ignore(just(Token::Dot)); + + // Hypotheses before |- (optional, comma separated) + let hypotheses = formula() + .separated_by(just(Token::Comma)) + .then_ignore(just(Token::Turnstile)); + + // name : forall vars. hyps |- conclusion + // Name can be a path like `ax/anc/base` + path() + .then_ignore(just(Token::Colon)) + .then(quantified_vars) + .then(hypotheses) + .then(formula()) + .map(|(((name, quantified), hypotheses), conclusion)| AxiomDecl { + name, + quantified, + hypotheses, + conclusion, + }) +} + +// ============================================================================ +// Theory items +// ============================================================================ + +fn theory_item() -> impl Parser> + Clone { + // Sort declaration: P : Sort; + let sort_decl = ident() + .then_ignore(just(Token::Colon)) + .then_ignore(just(Token::Sort)) + .then_ignore(just(Token::Semicolon)) + .map(TheoryItem::Sort); + + // Function declaration: name : domain -> codomain; + // Name can be a path like `in.src` + // Domain is parsed without arrows to avoid ambiguity + let function_decl = path() + .then_ignore(just(Token::Colon)) + .then(type_expr_no_arrow()) + .then_ignore(just(Token::Arrow)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|((name, domain), codomain)| { + TheoryItem::Function(FunctionDecl { + name, + domain, + codomain, + }) + }); + + // Axiom: name : forall ... |- ...; + let axiom = axiom_decl() + .then_ignore(just(Token::Semicolon)) + .map(TheoryItem::Axiom); + + // Field declaration (catch-all for parameterized theories): name : type; + let field_decl = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|(name, ty)| TheoryItem::Field(name, ty)); + + // Order matters: try more specific patterns first + // axiom starts with "ident : forall" + // function has "ident : type ->" + // sort has "ident : Sort" + // field is catch-all "ident : type" + choice((axiom, function_decl, sort_decl, field_decl)) +} + +// ============================================================================ +// Declarations +// ============================================================================ + +fn param() -> impl Parser> + Clone { + ident() + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(name, ty)| Param { name, ty }) +} + +fn theory_decl() -> impl Parser> + Clone { + // Optional `extends ParentTheory` + let extends_clause = ident() + .try_map(|s, span| { + if s == "extends" { + Ok(()) + } else { + Err(Simple::custom(span, "expected 'extends'")) + } + }) + .ignore_then(path()) + .or_not(); + + // A param group in parens: (X : Type, Y : Type) + let param_group = param() + .separated_by(just(Token::Comma)) + .at_least(1) + .delimited_by(just(Token::LParen), just(Token::RParen)); + + // After 'theory', we may have: + // 1. One or more param groups followed by an identifier: (X:T) (Y:U) Name + // 2. Just an identifier (no params): Name + // 3. Just '{' (missing name - ERROR) + // + // Strategy: Parse by looking at the first token after 'theory': + // - If '(' -> parse params, then expect name + // - If identifier -> that's the name, no params + // - If '{' -> error: missing name + + // Helper to parse params then name + let params_then_name = param_group + .repeated() + .at_least(1) + .map(|groups: Vec>| groups.into_iter().flatten().collect::>()) + .then(ident()) + .map(|(params, name)| (params, name)); + + // No params, just a name + let just_name = ident().map(|name| (Vec::::new(), name)); + + // Error case: '{' with no name - emit error at the '{' token's location + // Use `just` to peek at '{' and capture its position, then emit a helpful error + // We DON'T consume the '{' because we need it for the body parser + let missing_name = just(Token::LBrace) + .map_with_span(|_, span: Span| span) // Capture '{' token's span + .rewind() // Rewind to not consume '{' - we need it for the body + .validate(|brace_span, _, emit| { + emit(Simple::custom( + brace_span, + "expected theory name - anonymous theories are not allowed. \ + Use: theory MyTheoryName { ... }", + )); + // Return dummy values for error recovery + (Vec::::new(), "_anonymous_".to_string()) + }); + + // Parse theory keyword, then params+name in one of the three ways + // Order matters: try params first (if '('), then name (if ident), then error (if '{') + just(Token::Theory) + .ignore_then(choice((params_then_name, just_name, missing_name))) + .then(extends_clause) + .then( + theory_item() + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(((params, name), extends), body)| TheoryDecl { + params, + name, + extends, + body, + }) +} + +fn instance_item() -> impl Parser> + Clone { + recursive(|instance_item| { + // Nested instance: name = { ... }; + // Type is inferred from the field declaration in the theory + let nested = ident() + .then_ignore(just(Token::Eq)) + .then( + instance_item + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .then_ignore(just(Token::Semicolon)) + .map(|(name, body)| { + InstanceItem::NestedInstance( + name, + InstanceDecl { + // Type will be inferred during elaboration + theory: TypeExpr::single_path(Path::single("_inferred".to_string())), + name: String::new(), + body, + needs_chase: false, + }, + ) + }); + + // Element declaration: A : P; or a, b, c : P; + let element = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|(names, ty)| InstanceItem::Element(names, ty)); + + // Equation: term = term; + let equation = term() + .then_ignore(just(Token::Eq)) + .then(term()) + .then_ignore(just(Token::Semicolon)) + .map(|(l, r)| InstanceItem::Equation(l, r)); + + // Relation assertion: [field: value, ...] relation_name; (multi-ary) + // or: element relation_name; (unary) + // Multi-ary with explicit record + let relation_assertion_record = record_term() + .then(ident()) + .then_ignore(just(Token::Semicolon)) + .map(|(term, rel)| InstanceItem::RelationAssertion(term, rel)); + + // Unary relation: element relation_name; + // This parses as: path followed by another ident, then semicolon + // We wrap the element in a single-field record for uniform handling + let relation_assertion_unary = path() + .map(Term::Path) + .then(ident()) + .then_ignore(just(Token::Semicolon)) + .map(|(elem, rel)| InstanceItem::RelationAssertion(elem, rel)); + + // Try nested first (ident = {), then element (ident :), then record relation ([ ...), + // then unary relation (ident ident ;), then equation (fallback with =) + choice((nested, element, relation_assertion_record, relation_assertion_unary, equation)) + }) +} + +/// Parse a single type token without 'instance' (for instance declaration headers) +fn type_token_no_instance() -> impl Parser> + Clone { + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + // No instance token here! + + let path_tok = path().map(TypeToken::Path); + + // Record type with full type expressions inside + let record_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_impl()); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(TypeToken::Record); + + choice((sort, prop, record, path_tok)) +} + +/// Parse a type expression without the `instance` suffix (for instance declaration headers) +fn type_expr_no_instance() -> impl Parser> + Clone { + // Parenthesized type - parse inner full type expr + let paren_expr = type_expr_impl() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr| expr.tokens); + + // Single token (no instance allowed) + let single = type_token_no_instance().map(|t| vec![t]); + + // Either paren group or single token + let item = choice((paren_expr, single)); + + // Collect all tokens + item.repeated() + .at_least(1) + .map(|items| TypeExpr { + tokens: items.into_iter().flatten().collect(), + }) +} + +fn instance_decl() -> impl Parser> + Clone { + // Syntax: instance Name : Type = { ... } + // or: instance Name : Type = chase { ... } + just(Token::Instance) + .ignore_then(ident()) + .then_ignore(just(Token::Colon)) + .then(type_expr_no_instance()) + .then_ignore(just(Token::Eq)) + .then(just(Token::Chase).or_not()) + .then( + instance_item() + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(((name, theory), needs_chase), body)| InstanceDecl { + theory, + name, + body, + needs_chase: needs_chase.is_some(), + }) +} + +fn query_decl() -> impl Parser> + Clone { + just(Token::Query) + .ignore_then(ident()) + .then( + just(Token::Question) + .ignore_then(just(Token::Colon)) + .ignore_then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(name, goal)| QueryDecl { name, goal }) +} + +fn namespace_decl() -> impl Parser> + Clone { + just(Token::Namespace) + .ignore_then(ident()) + .then_ignore(just(Token::Semicolon)) +} + +fn declaration() -> impl Parser> + Clone { + choice(( + namespace_decl().map(Declaration::Namespace), + theory_decl().map(Declaration::Theory), + instance_decl().map(Declaration::Instance), + query_decl().map(Declaration::Query), + )) +} + +// Unit tests moved to tests/unit_parsing.rs diff --git a/src/patch.rs b/src/patch.rs new file mode 100644 index 0000000..5e99620 --- /dev/null +++ b/src/patch.rs @@ -0,0 +1,688 @@ +//! Patch types for version control of geolog structures +//! +//! A Patch represents the changes between two versions of a Structure. +//! Patches are the fundamental unit of version history - each commit +//! creates a new patch that can be applied to recreate the structure. + +use crate::core::SortId; +use crate::id::{NumericId, Slid, Uuid}; +use rkyv::{Archive, Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; + +/// Changes to the element universe (additions and deletions) +/// +/// Note: Element names are tracked separately in NamingPatch. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct ElementPatch { + /// Elements removed from structure (by UUID) + pub deletions: BTreeSet, + /// Elements added: Uuid → sort_id + pub additions: BTreeMap, +} + +impl ElementPatch { + pub fn is_empty(&self) -> bool { + self.deletions.is_empty() && self.additions.is_empty() + } +} + +/// Changes to element names (separate from structural changes) +/// +/// Names can change independently of structure (renames), and new elements +/// need names. This keeps patches self-contained for version control. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct NamingPatch { + /// Names removed (by UUID) - typically when element is deleted + pub deletions: BTreeSet, + /// Names added or changed: UUID → qualified_name path + pub additions: BTreeMap>, +} + +impl NamingPatch { + pub fn is_empty(&self) -> bool { + self.deletions.is_empty() && self.additions.is_empty() + } +} + +/// Changes to function definitions +/// +/// We track both old and new values to support inversion (for undo). +/// The structure uses UUIDs rather than Slids since Slids are unstable +/// across different structure versions. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct FunctionPatch { + /// func_id → (domain_uuid → old_codomain_uuid) + /// None means was undefined before + pub old_values: BTreeMap>>, + /// func_id → (domain_uuid → new_codomain_uuid) + pub new_values: BTreeMap>, +} + +impl FunctionPatch { + pub fn is_empty(&self) -> bool { + self.new_values.is_empty() + } +} + +/// Changes to relation assertions (tuples added/removed) +/// +/// Tuples are stored as `Vec` since element Slids are unstable across versions. +/// We track both assertions and retractions to support inversion. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct RelationPatch { + /// rel_id → set of tuples retracted (as UUID vectors) + pub retractions: BTreeMap>>, + /// rel_id → set of tuples asserted (as UUID vectors) + pub assertions: BTreeMap>>, +} + +impl RelationPatch { + pub fn is_empty(&self) -> bool { + self.assertions.is_empty() && self.retractions.is_empty() + } +} + +/// A complete patch between two structure versions +/// +/// Patches form a linked list via source_commit → target_commit. +/// The initial commit has source_commit = None. +/// +/// Note: Theory reference is stored as a Luid in the Structure, not here. +#[derive(Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct Patch { + /// The commit this patch is based on (None for initial commit) + pub source_commit: Option, + /// The commit this patch creates + pub target_commit: Uuid, + /// Number of sorts in the theory (needed to rebuild structure) + pub num_sorts: usize, + /// Number of functions in the theory (needed to rebuild structure) + pub num_functions: usize, + /// Number of relations in the theory (needed to rebuild structure) + pub num_relations: usize, + /// Element changes (additions/deletions) + pub elements: ElementPatch, + /// Function value changes + pub functions: FunctionPatch, + /// Relation tuple changes (assertions/retractions) + pub relations: RelationPatch, + /// Name changes (for self-contained patches) + pub names: NamingPatch, +} + +impl Patch { + /// Create a new patch + pub fn new( + source_commit: Option, + num_sorts: usize, + num_functions: usize, + num_relations: usize, + ) -> Self { + Self { + source_commit, + target_commit: Uuid::now_v7(), + num_sorts, + num_functions, + num_relations, + elements: ElementPatch::default(), + functions: FunctionPatch::default(), + relations: RelationPatch::default(), + names: NamingPatch::default(), + } + } + + /// Check if this patch makes any changes + pub fn is_empty(&self) -> bool { + self.elements.is_empty() + && self.functions.is_empty() + && self.relations.is_empty() + && self.names.is_empty() + } + + /// Invert this patch (swap old/new, additions/deletions) + /// + /// Note: Inversion of element additions requires knowing the sort_id of deleted elements, + /// which we don't track in deletions. This is a known limitation - sort info is lost on invert. + /// Names are fully invertible since we track the full qualified name. + /// Relations are fully invertible (assertions ↔ retractions). + pub fn invert(&self) -> Patch { + Patch { + source_commit: Some(self.target_commit), + target_commit: self.source_commit.unwrap_or_else(Uuid::now_v7), + num_sorts: self.num_sorts, + num_functions: self.num_functions, + num_relations: self.num_relations, + elements: ElementPatch { + deletions: self.elements.additions.keys().copied().collect(), + additions: self + .elements + .deletions + .iter() + .map(|uuid| (*uuid, 0)) // Note: loses sort info on invert + .collect(), + }, + functions: FunctionPatch { + old_values: self + .functions + .new_values + .iter() + .map(|(func_id, changes)| { + ( + *func_id, + changes.iter().map(|(k, v)| (*k, Some(*v))).collect(), + ) + }) + .collect(), + new_values: self + .functions + .old_values + .iter() + .filter_map(|(func_id, changes)| { + let filtered: BTreeMap<_, _> = changes + .iter() + .filter_map(|(k, v)| v.map(|v| (*k, v))) + .collect(); + if filtered.is_empty() { + None + } else { + Some((*func_id, filtered)) + } + }) + .collect(), + }, + relations: RelationPatch { + // Swap assertions ↔ retractions + retractions: self.relations.assertions.clone(), + assertions: self.relations.retractions.clone(), + }, + names: NamingPatch { + deletions: self.names.additions.keys().copied().collect(), + additions: self + .names + .deletions + .iter() + .map(|uuid| (*uuid, vec![])) // Note: loses name on invert (would need old_names tracking) + .collect(), + }, + } + } +} + +// ============ Diff and Apply operations ============ + +use crate::core::{RelationStorage, Structure}; +use crate::id::{Luid, get_slid, some_slid}; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +/// Create a patch representing the difference from `old` to `new`. +/// +/// The resulting patch, when applied to `old`, produces `new`. +/// Requires Universe for UUID lookup and NamingIndex for name changes. +pub fn diff( + old: &Structure, + new: &Structure, + universe: &Universe, + old_naming: &NamingIndex, + new_naming: &NamingIndex, +) -> Patch { + let mut patch = Patch::new( + None, // Will be set by caller if needed + new.num_sorts(), + new.num_functions(), + new.relations.len(), + ); + + // Find element deletions: elements in old but not in new + for &luid in old.luids.iter() { + if !new.luid_to_slid.contains_key(&luid) + && let Some(uuid) = universe.get(luid) { + patch.elements.deletions.insert(uuid); + // Also mark name as deleted + patch.names.deletions.insert(uuid); + } + } + + // Find element additions: elements in new but not in old + for (slid, &luid) in new.luids.iter().enumerate() { + if !old.luid_to_slid.contains_key(&luid) + && let Some(uuid) = universe.get(luid) { + patch.elements.additions.insert(uuid, new.sorts[slid]); + // Also add name from new_naming + if let Some(name) = new_naming.get(&uuid) { + patch.names.additions.insert(uuid, name.clone()); + } + } + } + + // Find name changes for elements that exist in both + for &luid in new.luids.iter() { + if old.luid_to_slid.contains_key(&luid) { + // Element exists in both - check for name change + if let Some(uuid) = universe.get(luid) { + let old_name = old_naming.get(&uuid); + let new_name = new_naming.get(&uuid); + if old_name != new_name + && let Some(name) = new_name { + patch.names.additions.insert(uuid, name.clone()); + } + } + } + } + + // Find function value changes + // We need to compare function values for elements that exist in both + for func_id in 0..new.num_functions() { + if func_id >= old.num_functions() { + // New function added to schema - all its values are additions + // Record each defined value with old_value = None + let Some(new_func_col) = new.functions[func_id].as_local() else { continue }; + for (sort_slid, opt_codomain) in new_func_col.iter().enumerate() { + if let Some(new_codomain_slid) = get_slid(*opt_codomain) { + // Find UUIDs for domain and codomain + let domain_uuid = find_uuid_by_sort_slid(new, universe, func_id, sort_slid); + if let Some(domain_uuid) = domain_uuid { + let new_codomain_luid = new.luids[new_codomain_slid.index()]; + if let Some(new_codomain_uuid) = universe.get(new_codomain_luid) { + // Record: this domain element now maps to this codomain element + // (was undefined before since function didn't exist) + patch.functions.old_values + .entry(func_id) + .or_default() + .insert(domain_uuid, None); + patch.functions.new_values + .entry(func_id) + .or_default() + .insert(domain_uuid, new_codomain_uuid); + } + } + } + } + continue; + } + + let mut old_vals: BTreeMap> = BTreeMap::new(); + let mut new_vals: BTreeMap = BTreeMap::new(); + + // Iterate over elements in the new structure's function domain + // Note: patches only work with local functions currently + let Some(new_func_col) = new.functions[func_id].as_local() else { continue }; + let Some(old_func_col) = old.functions[func_id].as_local() else { continue }; + + for (sort_slid, opt_codomain) in new_func_col.iter().enumerate() { + // Find the UUID for this domain element + if let Some(new_codomain_slid) = get_slid(*opt_codomain) { + let domain_uuid = find_uuid_by_sort_slid(new, universe, func_id, sort_slid); + if let Some(domain_uuid) = domain_uuid { + let new_codomain_luid = new.luids[new_codomain_slid.index()]; + let new_codomain_uuid = universe.get(new_codomain_luid); + + if let Some(new_codomain_uuid) = new_codomain_uuid { + // Check if this element existed in old (by looking up its luid) + let domain_luid = find_luid_by_sort_slid(new, func_id, sort_slid); + if let Some(domain_luid) = domain_luid { + if let Some(&old_domain_slid) = old.luid_to_slid.get(&domain_luid) { + let old_sort_slid = old.sort_local_id(old_domain_slid); + let old_codomain = get_slid(old_func_col[old_sort_slid.index()]); + + match old_codomain { + Some(old_codomain_slid) => { + let old_codomain_luid = old.luids[old_codomain_slid.index()]; + if let Some(old_codomain_uuid) = + universe.get(old_codomain_luid) + && old_codomain_uuid != new_codomain_uuid { + // Value changed + old_vals + .insert(domain_uuid, Some(old_codomain_uuid)); + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + None => { + // Was undefined, now defined + old_vals.insert(domain_uuid, None); + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + } else { + // Domain element is new - function value is part of the addition + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + } + } + } + } + + if !new_vals.is_empty() { + patch.functions.old_values.insert(func_id, old_vals); + patch.functions.new_values.insert(func_id, new_vals); + } + } + + // Find relation changes + // Compare tuples in each relation between old and new + let num_relations = new.relations.len().min(old.relations.len()); + for rel_id in 0..num_relations { + let old_rel = &old.relations[rel_id]; + let new_rel = &new.relations[rel_id]; + + // Helper: convert a Slid tuple to UUID tuple + let slid_tuple_to_uuids = |tuple: &[Slid], structure: &Structure| -> Option> { + tuple + .iter() + .map(|&slid| { + let luid = structure.luids[slid.index()]; + universe.get(luid) + }) + .collect() + }; + + // Find tuples in old but not in new (retractions) + let mut retractions: BTreeSet> = BTreeSet::new(); + for tuple in old_rel.iter() { + // Check if this tuple (by UUID) exists in new + if let Some(uuid_tuple) = slid_tuple_to_uuids(tuple, old) { + // See if we can find the same UUID tuple in new + let exists_in_new = new_rel.iter().any(|new_tuple| { + slid_tuple_to_uuids(new_tuple, new) + .map(|new_uuids| new_uuids == uuid_tuple) + .unwrap_or(false) + }); + if !exists_in_new { + retractions.insert(uuid_tuple); + } + } + } + + // Find tuples in new but not in old (assertions) + let mut assertions: BTreeSet> = BTreeSet::new(); + for tuple in new_rel.iter() { + if let Some(uuid_tuple) = slid_tuple_to_uuids(tuple, new) { + let exists_in_old = old_rel.iter().any(|old_tuple| { + slid_tuple_to_uuids(old_tuple, old) + .map(|old_uuids| old_uuids == uuid_tuple) + .unwrap_or(false) + }); + if !exists_in_old { + assertions.insert(uuid_tuple); + } + } + } + + if !retractions.is_empty() { + patch.relations.retractions.insert(rel_id, retractions); + } + if !assertions.is_empty() { + patch.relations.assertions.insert(rel_id, assertions); + } + } + + // Handle new relations in new that don't exist in old + for rel_id in num_relations..new.relations.len() { + let new_rel = &new.relations[rel_id]; + let mut assertions: BTreeSet> = BTreeSet::new(); + + for tuple in new_rel.iter() { + let uuid_tuple: Option> = tuple + .iter() + .map(|&slid| { + let luid = new.luids[slid.index()]; + universe.get(luid) + }) + .collect(); + if let Some(uuids) = uuid_tuple { + assertions.insert(uuids); + } + } + + if !assertions.is_empty() { + patch.relations.assertions.insert(rel_id, assertions); + } + } + + patch +} + +/// Helper to find the Luid of an element given its func_id and sort_slid in a structure +fn find_luid_by_sort_slid(structure: &Structure, func_id: usize, sort_slid: usize) -> Option { + let func_col_len = structure.functions[func_id].len(); + for (slid_idx, &_sort_id) in structure.sorts.iter().enumerate() { + let slid = Slid::from_usize(slid_idx); + let elem_sort_slid = structure.sort_local_id(slid); + if elem_sort_slid.index() == sort_slid && func_col_len > sort_slid { + return Some(structure.luids[slid_idx]); + } + } + None +} + +/// Helper to find the UUID of an element given its func_id and sort_slid in a structure +fn find_uuid_by_sort_slid( + structure: &Structure, + universe: &Universe, + func_id: usize, + sort_slid: usize, +) -> Option { + find_luid_by_sort_slid(structure, func_id, sort_slid).and_then(|luid| universe.get(luid)) +} + +/// Apply a patch to create a new structure and update naming index. +/// +/// Returns Ok(new_structure) on success, or Err with a description of what went wrong. +/// Requires a Universe to convert UUIDs from the patch to Luids. +/// The naming parameter is updated with name changes from the patch. +pub fn apply_patch( + base: &Structure, + patch: &Patch, + universe: &mut Universe, + naming: &mut NamingIndex, +) -> Result { + // Create a new structure + let mut result = Structure::new(patch.num_sorts); + + // Build a set of deleted UUIDs for quick lookup + let deleted_uuids: std::collections::HashSet = + patch.elements.deletions.iter().copied().collect(); + + // Copy elements from base that weren't deleted + for (slid, &luid) in base.luids.iter().enumerate() { + let uuid = universe.get(luid).ok_or("Unknown luid in base structure")?; + if !deleted_uuids.contains(&uuid) { + result.add_element_with_luid(luid, base.sorts[slid]); + } + } + + // Add new elements from the patch (register UUIDs in universe) + for (uuid, sort_id) in &patch.elements.additions { + result.add_element_with_uuid(universe, *uuid, *sort_id); + } + + // Apply naming changes + for uuid in &patch.names.deletions { + // Note: NamingIndex doesn't have a remove method yet, skip for now + let _ = uuid; + } + for (uuid, name) in &patch.names.additions { + naming.insert(*uuid, name.clone()); + } + + // Initialize function storage + let domain_sort_ids: Vec> = (0..patch.num_functions) + .map(|func_id| { + if func_id < base.functions.len() && !base.functions[func_id].is_empty() { + let func_len = base.functions[func_id].len(); + for (sort_id, carrier) in base.carriers.iter().enumerate() { + if carrier.len() as usize == func_len { + return Some(sort_id); + } + } + } + None + }) + .collect(); + + result.init_functions(&domain_sort_ids); + + // Copy function values from base (for non-deleted elements) + // Note: patches only work with local functions currently + for func_id in 0..base.num_functions().min(result.num_functions()) { + let Some(base_func_col) = base.functions[func_id].as_local() else { continue }; + if !result.functions[func_id].is_local() { continue }; + + // Collect all the updates we need to make (to avoid borrow checker issues) + let mut updates: Vec<(usize, Slid)> = Vec::new(); + + for (old_sort_slid, opt_codomain) in base_func_col.iter().enumerate() { + if let Some(old_codomain_slid) = get_slid(*opt_codomain) { + // Find the domain element's Luid + let domain_luid = find_luid_by_sort_slid(base, func_id, old_sort_slid); + if let Some(domain_luid) = domain_luid { + // Check if domain element still exists in result + if let Some(&new_domain_slid) = result.luid_to_slid.get(&domain_luid) { + // Check if codomain element still exists + let codomain_luid = base.luids[old_codomain_slid.index()]; + if let Some(&new_codomain_slid) = result.luid_to_slid.get(&codomain_luid) { + let new_sort_slid = result.sort_local_id(new_domain_slid); + updates.push((new_sort_slid.index(), new_codomain_slid)); + } + } + } + } + } + + // Apply updates + if let Some(result_func_col) = result.functions[func_id].as_local_mut() { + for (idx, codomain_slid) in updates { + if idx < result_func_col.len() { + result_func_col[idx] = some_slid(codomain_slid); + } + } + } + } + + // Apply function value changes from patch (using UUIDs → Luids) + // Note: patches only work with local functions currently + for (func_id, changes) in &patch.functions.new_values { + if *func_id < result.num_functions() && result.functions[*func_id].is_local() { + // Collect updates first to avoid borrow checker issues + let mut updates: Vec<(usize, Slid)> = Vec::new(); + for (domain_uuid, codomain_uuid) in changes { + let domain_luid = universe.lookup(domain_uuid); + let codomain_luid = universe.lookup(codomain_uuid); + if let (Some(domain_luid), Some(codomain_luid)) = (domain_luid, codomain_luid) + && let (Some(&domain_slid), Some(&codomain_slid)) = ( + result.luid_to_slid.get(&domain_luid), + result.luid_to_slid.get(&codomain_luid), + ) + { + let sort_slid = result.sort_local_id(domain_slid); + updates.push((sort_slid.index(), codomain_slid)); + } + } + + // Apply updates + if let Some(result_func_col) = result.functions[*func_id].as_local_mut() { + for (idx, codomain_slid) in updates { + if idx < result_func_col.len() { + result_func_col[idx] = some_slid(codomain_slid); + } + } + } + } + } + + // Initialize relation storage + // Infer arities from base if available, otherwise from patch assertions + let relation_arities: Vec = (0..patch.num_relations) + .map(|rel_id| { + // Try base first + if rel_id < base.relations.len() { + base.relations[rel_id].arity() + } else if let Some(assertions) = patch.relations.assertions.get(&rel_id) { + // Infer from first assertion + assertions.iter().next().map(|t| t.len()).unwrap_or(0) + } else { + 0 + } + }) + .collect(); + result.init_relations(&relation_arities); + + // Copy relation tuples from base (for non-deleted elements) + for rel_id in 0..base.relations.len().min(patch.num_relations) { + let base_rel = &base.relations[rel_id]; + + for tuple in base_rel.iter() { + // Convert Slid tuple to UUID tuple to check if still valid + let uuid_tuple: Option> = tuple + .iter() + .map(|&slid| { + let luid = base.luids[slid.index()]; + universe.get(luid) + }) + .collect(); + + if let Some(uuid_tuple) = uuid_tuple { + // Check if this tuple should be retracted + let should_retract = patch + .relations + .retractions + .get(&rel_id) + .map(|r| r.contains(&uuid_tuple)) + .unwrap_or(false); + + if !should_retract { + // Check all elements still exist and convert to new Slids + let new_tuple: Option> = uuid_tuple + .iter() + .map(|uuid| { + universe + .lookup(uuid) + .and_then(|luid| result.luid_to_slid.get(&luid).copied()) + }) + .collect(); + + if let Some(new_tuple) = new_tuple { + result.assert_relation(rel_id, new_tuple); + } + } + } + } + } + + // Apply relation assertions from patch + for (rel_id, assertions) in &patch.relations.assertions { + if *rel_id < patch.num_relations { + for uuid_tuple in assertions { + let slid_tuple: Option> = uuid_tuple + .iter() + .map(|uuid| { + universe + .lookup(uuid) + .and_then(|luid| result.luid_to_slid.get(&luid).copied()) + }) + .collect(); + + if let Some(slid_tuple) = slid_tuple { + result.assert_relation(*rel_id, slid_tuple); + } + } + } + } + + Ok(result) +} + +/// Create a patch representing a structure from empty (initial commit) +pub fn to_initial_patch(structure: &Structure, universe: &Universe, naming: &NamingIndex) -> Patch { + let empty = Structure::new(structure.num_sorts()); + let empty_naming = NamingIndex::new(); + diff(&empty, structure, universe, &empty_naming, naming) +} + +// Unit tests moved to tests/proptest_patch.rs diff --git a/src/pretty.rs b/src/pretty.rs new file mode 100644 index 0000000..98368db --- /dev/null +++ b/src/pretty.rs @@ -0,0 +1,424 @@ +//! Pretty-printer for Geolog AST +//! +//! Renders AST back to source syntax for round-trip testing. + +use crate::ast::*; + +/// Pretty-print configuration +pub struct PrettyConfig { + pub indent: usize, +} + +impl Default for PrettyConfig { + fn default() -> Self { + Self { indent: 2 } + } +} + +/// A pretty-printer with indentation tracking +pub struct Pretty { + output: String, + indent_level: usize, + config: PrettyConfig, +} + +impl Default for Pretty { + fn default() -> Self { + Self::new() + } +} + +impl Pretty { + pub fn new() -> Self { + Self { + output: String::new(), + indent_level: 0, + config: PrettyConfig::default(), + } + } + + pub fn finish(self) -> String { + self.output + } + + fn indent(&mut self) { + for _ in 0..(self.indent_level * self.config.indent) { + self.output.push(' '); + } + } + + fn write(&mut self, s: &str) { + self.output.push_str(s); + } + + fn writeln(&mut self, s: &str) { + self.output.push_str(s); + self.output.push('\n'); + } + + fn newline(&mut self) { + self.output.push('\n'); + } + + fn inc_indent(&mut self) { + self.indent_level += 1; + } + + fn dec_indent(&mut self) { + self.indent_level = self.indent_level.saturating_sub(1); + } +} + +// ============ Pretty-printing implementations ============ + +impl Pretty { + pub fn file(&mut self, file: &File) { + for (i, decl) in file.declarations.iter().enumerate() { + if i > 0 { + self.newline(); + } + self.declaration(&decl.node); + } + } + + pub fn declaration(&mut self, decl: &Declaration) { + match decl { + Declaration::Namespace(name) => { + self.write("namespace "); + self.write(name); + self.writeln(";"); + } + Declaration::Theory(t) => self.theory_decl(t), + Declaration::Instance(i) => self.instance_decl(i), + Declaration::Query(q) => self.query_decl(q), + } + } + + pub fn theory_decl(&mut self, t: &TheoryDecl) { + self.write("theory "); + for param in &t.params { + self.write("("); + self.write(¶m.name); + self.write(" : "); + self.type_expr(¶m.ty); + self.write(") "); + } + self.write(&t.name); + self.writeln(" {"); + self.inc_indent(); + for item in &t.body { + self.indent(); + self.theory_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.writeln("}"); + } + + pub fn theory_item(&mut self, item: &TheoryItem) { + match item { + TheoryItem::Sort(name) => { + self.write(name); + self.write(" : Sort;"); + } + TheoryItem::Function(f) => { + self.write(&f.name.to_string()); + self.write(" : "); + self.type_expr(&f.domain); + self.write(" -> "); + self.type_expr(&f.codomain); + self.write(";"); + } + TheoryItem::Axiom(a) => self.axiom_decl(a), + TheoryItem::Field(name, ty) => { + self.write(name); + self.write(" : "); + self.type_expr(ty); + self.write(";"); + } + } + } + + pub fn axiom_decl(&mut self, a: &AxiomDecl) { + self.write(&a.name.to_string()); + self.write(" : forall "); + for (i, qv) in a.quantified.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(&qv.names.join(", ")); + self.write(" : "); + self.type_expr(&qv.ty); + } + self.write(". "); + + // Hypotheses (if any) + if !a.hypotheses.is_empty() { + for (i, hyp) in a.hypotheses.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.formula(hyp); + } + self.write(" "); + } + + self.write("|- "); + self.formula(&a.conclusion); + self.write(";"); + } + + pub fn type_expr(&mut self, ty: &TypeExpr) { + use crate::ast::TypeToken; + + let mut need_space = false; + + for token in &ty.tokens { + match token { + TypeToken::Sort => { + if need_space { + self.write(" "); + } + self.write("Sort"); + need_space = true; + } + TypeToken::Prop => { + if need_space { + self.write(" "); + } + self.write("Prop"); + need_space = true; + } + TypeToken::Path(p) => { + if need_space { + self.write(" "); + } + self.write(&p.to_string()); + need_space = true; + } + TypeToken::Instance => { + self.write(" instance"); + need_space = true; + } + TypeToken::Arrow => { + // Arrows are inserted between chunks + // This simplistic approach just prints " -> " when we see Arrow + self.write(" -> "); + need_space = false; + } + TypeToken::Record(fields) => { + if need_space { + self.write(" "); + } + self.write("["); + for (i, (name, field_ty)) in fields.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(name); + self.write(" : "); + self.type_expr(field_ty); + } + self.write("]"); + need_space = true; + } + } + } + } + + /// Print a type expression that might need parentheses + #[allow(dead_code)] + fn type_expr_atom(&mut self, ty: &TypeExpr) { + use crate::ast::TypeToken; + + // Check if this needs parentheses (has arrows or multiple paths) + let has_arrow = ty.tokens.iter().any(|t| matches!(t, TypeToken::Arrow)); + let path_count = ty + .tokens + .iter() + .filter(|t| matches!(t, TypeToken::Path(_))) + .count(); + + if has_arrow || path_count > 1 { + self.write("("); + self.type_expr(ty); + self.write(")"); + } else { + self.type_expr(ty); + } + } + + pub fn term(&mut self, t: &Term) { + match t { + Term::Path(p) => self.write(&p.to_string()), + Term::App(f, a) => { + self.term(f); + self.write(" "); + self.term_atom(a); + } + Term::Project(t, field) => { + self.term(t); + self.write(" ."); + self.write(field); + } + Term::Record(fields) => { + self.write("["); + for (i, (name, val)) in fields.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(name); + self.write(": "); + self.term(val); + } + self.write("]"); + } + } + } + + /// Print a term that might need parentheses + fn term_atom(&mut self, t: &Term) { + match t { + Term::App(_, _) | Term::Project(_, _) => { + self.write("("); + self.term(t); + self.write(")"); + } + _ => self.term(t), + } + } + + pub fn formula(&mut self, f: &Formula) { + match f { + Formula::True => self.write("true"), + Formula::False => self.write("false"), + Formula::RelApp(rel_name, arg) => { + // Postfix relation application: term rel + self.term(arg); + self.write(" "); + self.write(rel_name); + } + Formula::Eq(l, r) => { + self.term(l); + self.write(" = "); + self.term(r); + } + Formula::And(conjuncts) => { + for (i, c) in conjuncts.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.formula(c); + } + } + Formula::Or(disjuncts) => { + for (i, d) in disjuncts.iter().enumerate() { + if i > 0 { + self.write(" \\/ "); + } + self.formula_atom(d); + } + } + Formula::Exists(vars, body) => { + self.write("(exists "); + for (i, qv) in vars.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(&qv.names.join(", ")); + self.write(" : "); + self.type_expr(&qv.ty); + } + self.write(". "); + self.formula(body); + self.write(")"); + } + } + } + + /// Print a formula that might need parentheses + fn formula_atom(&mut self, f: &Formula) { + match f { + Formula::Or(_) | Formula::And(_) => { + self.write("("); + self.formula(f); + self.write(")"); + } + _ => self.formula(f), + } + } + + pub fn instance_decl(&mut self, i: &InstanceDecl) { + self.write("instance "); + self.write(&i.name); + self.write(" : "); + self.type_expr(&i.theory); + self.writeln(" = {"); + self.inc_indent(); + for item in &i.body { + self.indent(); + self.instance_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.writeln("}"); + } + + pub fn instance_item(&mut self, item: &InstanceItem) { + match item { + InstanceItem::Element(names, ty) => { + self.write(&names.join(", ")); + self.write(" : "); + self.type_expr(ty); + self.write(";"); + } + InstanceItem::Equation(lhs, rhs) => { + self.term(lhs); + self.write(" = "); + self.term(rhs); + self.write(";"); + } + InstanceItem::NestedInstance(name, inner) => { + self.write(name); + self.writeln(" = {"); + self.inc_indent(); + for item in &inner.body { + self.indent(); + self.instance_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.indent(); + self.write("};"); + } + InstanceItem::RelationAssertion(term, rel) => { + self.term(term); + self.write(" "); + self.write(rel); + self.write(";"); + } + } + } + + pub fn query_decl(&mut self, q: &QueryDecl) { + self.write("query "); + self.write(&q.name); + self.writeln(" {"); + self.inc_indent(); + self.indent(); + self.write("? : "); + self.type_expr(&q.goal); + self.writeln(";"); + self.dec_indent(); + self.writeln("}"); + } +} + +/// Convenience function to pretty-print a file +pub fn pretty_print(file: &File) -> String { + let mut p = Pretty::new(); + p.file(file); + p.finish() +} + +// Unit tests moved to tests/unit_pretty.rs diff --git a/src/query/backend.rs b/src/query/backend.rs new file mode 100644 index 0000000..458b67b --- /dev/null +++ b/src/query/backend.rs @@ -0,0 +1,1650 @@ +//! Naive backend for executing RelAlgIR query plans. +//! +//! This is the "obviously correct" reference implementation: +//! - No optimization +//! - No indexing +//! - Just straightforward interpretation +//! +//! Used for proptest validation against optimized backends. +//! +//! # DBSP Temporal Operators +//! +//! This backend supports DBSP-style incremental computation via three temporal operators: +//! +//! - **Delay (z⁻¹)**: Access previous timestep's value +//! - **Diff (δ = 1 - z⁻¹)**: Compute difference from previous timestep +//! - **Integrate (∫)**: Accumulate values across all timesteps +//! +//! These operators require state across timesteps, managed by `StreamContext`. + +use std::collections::HashMap; + +use crate::core::Structure; +use crate::id::{NumericId, Slid}; + +/// A tuple in a relation (bag of tuples with multiplicities). +/// For now we use positive multiplicities only (proper Z-sets would allow negatives). +pub type Tuple = Vec; + +/// A bag of tuples (multiset). Maps tuple -> multiplicity. +/// Multiplicity 0 means absent. +#[derive(Debug, Clone, Default)] +pub struct Bag { + pub tuples: HashMap, +} + +impl Bag { + pub fn new() -> Self { + Self::default() + } + + pub fn singleton(tuple: Tuple) -> Self { + let mut b = Self::new(); + b.insert(tuple, 1); + b + } + + pub fn insert(&mut self, tuple: Tuple, mult: i64) { + let entry = self.tuples.entry(tuple.clone()).or_insert(0); + *entry += mult; + if *entry == 0 { + self.tuples.remove(&tuple); + } + } + + pub fn iter(&self) -> impl Iterator { + self.tuples.iter().filter(|(_, m)| **m != 0) + } + + /// Union (Z-set addition) + pub fn union(&self, other: &Bag) -> Bag { + let mut result = self.clone(); + for (tuple, mult) in other.iter() { + result.insert(tuple.clone(), *mult); + } + result + } + + /// Negate (flip multiplicities) + pub fn negate(&self) -> Bag { + let mut result = Bag::new(); + for (tuple, mult) in self.iter() { + result.insert(tuple.clone(), -mult); + } + result + } + + /// Distinct (clamp multiplicities to 0 or 1) + pub fn distinct(&self) -> Bag { + let mut result = Bag::new(); + for (tuple, mult) in self.iter() { + if *mult > 0 { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + pub fn is_empty(&self) -> bool { + self.tuples.is_empty() + } + + pub fn len(&self) -> usize { + self.tuples.len() + } +} + +/// Query plan operations (mirrors RelAlgIR but as Rust enums for execution) +#[derive(Debug, Clone)] +pub enum QueryOp { + /// Scan all elements of a sort + Scan { sort_idx: usize }, + + /// Scan all tuples in a relation + /// Each tuple becomes a row in the result bag + ScanRelation { rel_id: usize }, + + /// Filter by predicate + Filter { + input: Box, + pred: Predicate, + }, + + /// Project to specific columns + Project { + input: Box, + columns: Vec, + }, + + /// Join two inputs on condition + Join { + left: Box, + right: Box, + cond: JoinCond, + }, + + /// Union (bag addition) + Union { + left: Box, + right: Box, + }, + + /// Distinct (deduplicate) + Distinct { input: Box }, + + /// Negate multiplicities + Negate { input: Box }, + + /// Constant single tuple + Constant { tuple: Tuple }, + + /// Empty relation + Empty, + + /// Apply a function: extends tuples with `func(arg_col)` + /// `(t₁, ..., tₙ)` → `(t₁, ..., tₙ, func(t[arg_col]))` + Apply { + input: Box, + func_idx: usize, + arg_col: usize, + }, + + /// Apply a single field of a product codomain function + /// For `f: A -> [x: B, y: C]`, extends tuples with `f(arg_col).field_name` + /// `(t₁, ..., tₙ)` → `(t₁, ..., tₙ, f(t[arg_col]).field_name)` + ApplyField { + input: Box, + func_idx: usize, + arg_col: usize, + field_name: String, + }, + + // ======================================================================== + // DBSP Temporal Operators + // ======================================================================== + // These operators work on streams over time, requiring state management. + // Use `execute_stream` with a `StreamContext` instead of bare `execute`. + + /// Delay (z⁻¹): output previous timestep's input value + /// At timestep 0, outputs empty bag. + Delay { + input: Box, + /// Unique identifier for this delay's state + state_id: usize, + }, + + /// Differentiate (δ = 1 - z⁻¹): compute changes since previous timestep + /// output = current_input - previous_input + Diff { + input: Box, + /// Unique identifier for this diff's state + state_id: usize, + }, + + /// Integrate (∫): accumulate inputs over all timesteps + /// output = Σ (all inputs from timestep 0 to now) + Integrate { + input: Box, + /// Unique identifier for this integrate's state + state_id: usize, + }, +} + +/// Predicate for filtering +#[derive(Debug, Clone)] +pub enum Predicate { + True, + False, + /// Column equals constant + ColEqConst { col: usize, val: Slid }, + /// Two columns equal + ColEqCol { left: usize, right: usize }, + /// Function application: func(col_arg) = col_result (both columns) + FuncEq { + func_idx: usize, + arg_col: usize, + result_col: usize, + }, + /// Function application equals constant: func(col_arg) = expected + FuncEqConst { + func_idx: usize, + arg_col: usize, + expected: Slid, + }, + And(Box, Box), + Or(Box, Box), +} + +/// Join condition +#[derive(Debug, Clone)] +pub enum JoinCond { + /// Cross product + Cross, + /// Equijoin on columns + Equi { left_col: usize, right_col: usize }, +} + +// ============================================================================ +// Pretty Printing +// ============================================================================ + +use std::fmt; + +impl fmt::Display for QueryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.fmt_indented(f, 0) + } +} + +impl QueryOp { + /// Format with indentation for tree structure + fn fmt_indented(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { + let pad = " ".repeat(indent); + match self { + QueryOp::Scan { sort_idx } => { + write!(f, "{}Scan(sort={})", pad, sort_idx) + } + QueryOp::ScanRelation { rel_id } => { + write!(f, "{}ScanRelation(rel={})", pad, rel_id) + } + QueryOp::Filter { input, pred } => { + writeln!(f, "{}Filter({})", pad, pred)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Project { input, columns } => { + writeln!(f, "{}Project({:?})", pad, columns)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Join { left, right, cond } => { + writeln!(f, "{}Join({})", pad, cond)?; + left.fmt_indented(f, indent + 1)?; + writeln!(f)?; + right.fmt_indented(f, indent + 1) + } + QueryOp::Union { left, right } => { + writeln!(f, "{}Union", pad)?; + left.fmt_indented(f, indent + 1)?; + writeln!(f)?; + right.fmt_indented(f, indent + 1) + } + QueryOp::Distinct { input } => { + writeln!(f, "{}Distinct", pad)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Negate { input } => { + writeln!(f, "{}Negate", pad)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Constant { tuple } => { + let vals: Vec<_> = tuple.iter().map(|s| s.index()).collect(); + write!(f, "{}Const({:?})", pad, vals) + } + QueryOp::Empty => { + write!(f, "{}Empty", pad) + } + QueryOp::Apply { input, func_idx, arg_col } => { + writeln!(f, "{}Apply(func={}, arg_col={})", pad, func_idx, arg_col)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + writeln!(f, "{}ApplyField(func={}, arg_col={}, field={})", pad, func_idx, arg_col, field_name)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Delay { input, state_id } => { + writeln!(f, "{}z⁻¹(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Diff { input, state_id } => { + writeln!(f, "{}δ(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Integrate { input, state_id } => { + writeln!(f, "{}∫(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + } + } +} + +impl fmt::Display for Predicate { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Predicate::True => write!(f, "⊤"), + Predicate::False => write!(f, "⊥"), + Predicate::ColEqConst { col, val } => write!(f, "c{}={}", col, val.index()), + Predicate::ColEqCol { left, right } => write!(f, "c{}=c{}", left, right), + Predicate::FuncEq { func_idx, arg_col, result_col } => { + write!(f, "f{}(c{})=c{}", func_idx, arg_col, result_col) + } + Predicate::FuncEqConst { func_idx, arg_col, expected } => { + write!(f, "f{}(c{})={}", func_idx, arg_col, expected.index()) + } + Predicate::And(a, b) => write!(f, "({} ∧ {})", a, b), + Predicate::Or(a, b) => write!(f, "({} ∨ {})", a, b), + } + } +} + +impl fmt::Display for JoinCond { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + JoinCond::Cross => write!(f, "×"), + JoinCond::Equi { left_col, right_col } => { + write!(f, "c{}=c{}", left_col, right_col) + } + } + } +} + +// ============================================================================ +// DBSP Stream Context +// ============================================================================ + +/// State for DBSP temporal operators across timesteps. +/// +/// Each stateful operator (Delay, Diff, Integrate) uses a unique `state_id` +/// to store its state in this context. Call `step()` to advance time. +#[derive(Debug, Clone, Default)] +pub struct StreamContext { + /// Current timestep (starts at 0) + pub timestep: u64, + + /// State for Delay operators: state_id -> previous input + delay_state: HashMap, + + /// State for Diff operators: state_id -> previous input + diff_state: HashMap, + + /// State for Integrate operators: state_id -> accumulated sum + integrate_state: HashMap, +} + +impl StreamContext { + /// Create a new stream context at timestep 0 + pub fn new() -> Self { + Self::default() + } + + /// Advance to the next timestep. + /// + /// This should be called after processing all operators for the current step. + /// Delay state is automatically updated during execution. + pub fn step(&mut self) { + self.timestep += 1; + } + + /// Reset all state (for testing or restarting computation) + pub fn reset(&mut self) { + self.timestep = 0; + self.delay_state.clear(); + self.diff_state.clear(); + self.integrate_state.clear(); + } + + /// Get delay state (previous input) + fn get_delay(&self, state_id: usize) -> Bag { + self.delay_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Set delay state for next timestep + fn set_delay(&mut self, state_id: usize, bag: Bag) { + self.delay_state.insert(state_id, bag); + } + + /// Get diff state (previous input for differentiation) + fn get_diff_prev(&self, state_id: usize) -> Bag { + self.diff_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Set diff state for next timestep + fn set_diff_prev(&mut self, state_id: usize, bag: Bag) { + self.diff_state.insert(state_id, bag); + } + + /// Get integrate state (accumulated sum) + fn get_integrate(&self, state_id: usize) -> Bag { + self.integrate_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Update integrate state with new input + fn accumulate_integrate(&mut self, state_id: usize, delta: &Bag) { + let current = self.get_integrate(state_id); + let new_total = current.union(delta); + self.integrate_state.insert(state_id, new_total); + } +} + +/// Execute a query plan against a structure. +/// +/// This is the naive, obviously-correct implementation. +pub fn execute(plan: &QueryOp, structure: &Structure) -> Bag { + match plan { + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond } => { + let left_bag = execute(left, structure); + let right_bag = execute(right, structure); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + if eval_join_cond(cond, l_tuple, r_tuple) { + // Concatenate tuples + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute(left, structure); + let right_bag = execute(right, structure); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute(input, structure); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute(input, structure); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + // Look up function value + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + // Extend tuple with function result + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + // If function undefined, tuple is dropped (acts as filter) + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + // Get product codomain and look up specific field + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + // If field undefined, tuple is dropped + } + } + result + } + + // DBSP operators require StreamContext - use execute_stream() instead + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + panic!("DBSP temporal operators require StreamContext - use execute_stream() instead") + } + } +} + +/// Execute a query plan with DBSP temporal operator support. +/// +/// This handles both stateless operators (scan, filter, join, etc.) and stateful +/// DBSP operators (delay, diff, integrate). The StreamContext maintains state +/// across timesteps. +/// +/// # Example: Semi-naive Datalog fixpoint +/// +/// ```ignore +/// let mut ctx = StreamContext::new(); +/// let plan = /* query plan with Integrate for fixpoint */; +/// +/// loop { +/// let delta = execute_stream(&plan, &structure, &mut ctx); +/// if delta.is_empty() { +/// break; // fixpoint reached +/// } +/// ctx.step(); +/// } +/// ``` +pub fn execute_stream(plan: &QueryOp, structure: &Structure, ctx: &mut StreamContext) -> Bag { + match plan { + // Stateless operators - delegate to execute() + QueryOp::Scan { .. } + | QueryOp::ScanRelation { .. } + | QueryOp::Filter { .. } + | QueryOp::Project { .. } + | QueryOp::Join { .. } + | QueryOp::Union { .. } + | QueryOp::Distinct { .. } + | QueryOp::Negate { .. } + | QueryOp::Constant { .. } + | QueryOp::Empty + | QueryOp::Apply { .. } + | QueryOp::ApplyField { .. } => { + // For stateless operators that contain DBSP subexpressions, + // we need to recursively handle them + execute_stream_stateless(plan, structure, ctx) + } + + // DBSP: Delay (z⁻¹) - output previous timestep's input + QueryOp::Delay { input, state_id } => { + // Get previous state (empty at timestep 0) + let previous = ctx.get_delay(*state_id); + + // Compute current input + let current = execute_stream(input, structure, ctx); + + // Store current for next timestep + ctx.set_delay(*state_id, current); + + // Return previous + previous + } + + // DBSP: Diff (δ = 1 - z⁻¹) - compute difference from previous + QueryOp::Diff { input, state_id } => { + // Get previous input + let previous = ctx.get_diff_prev(*state_id); + + // Compute current input + let current = execute_stream(input, structure, ctx); + + // Store current for next timestep + ctx.set_diff_prev(*state_id, current.clone()); + + // Return current - previous (using Z-set subtraction) + current.union(&previous.negate()) + } + + // DBSP: Integrate (∫) - accumulate over all timesteps + QueryOp::Integrate { input, state_id } => { + // Compute current input (typically a delta/diff) + let delta = execute_stream(input, structure, ctx); + + // Add to accumulated total + ctx.accumulate_integrate(*state_id, &delta); + + // Return the accumulated total + ctx.get_integrate(*state_id) + } + } +} + +/// Helper for executing stateless operators that may contain DBSP subexpressions. +fn execute_stream_stateless(plan: &QueryOp, structure: &Structure, ctx: &mut StreamContext) -> Bag { + match plan { + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond } => { + let left_bag = execute_stream(left, structure, ctx); + let right_bag = execute_stream(right, structure, ctx); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + if eval_join_cond(cond, l_tuple, r_tuple) { + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute_stream(left, structure, ctx); + let right_bag = execute_stream(right, structure, ctx); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute_stream(input, structure, ctx); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute_stream(input, structure, ctx); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + } + } + result + } + + // DBSP operators handled by execute_stream directly + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + execute_stream(plan, structure, ctx) + } + } +} + +fn eval_predicate(pred: &Predicate, tuple: &Tuple, structure: &Structure) -> bool { + match pred { + Predicate::True => true, + Predicate::False => false, + + Predicate::ColEqConst { col, val } => tuple.get(*col) == Some(val), + + Predicate::ColEqCol { left, right } => { + tuple.get(*left) == tuple.get(*right) && tuple.get(*left).is_some() + } + + Predicate::FuncEq { + func_idx, + arg_col, + result_col, + } => { + if let (Some(&arg), Some(&expected)) = (tuple.get(*arg_col), tuple.get(*result_col)) { + // Look up function value in structure + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(actual) = structure.get_function(*func_idx, sort_slid) { + return actual == expected; + } + } + false + } + + Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } => { + if let Some(&arg) = tuple.get(*arg_col) { + // Look up function value in structure + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(actual) = structure.get_function(*func_idx, sort_slid) { + return actual == *expected; + } + } + false + } + + Predicate::And(a, b) => { + eval_predicate(a, tuple, structure) && eval_predicate(b, tuple, structure) + } + + Predicate::Or(a, b) => { + eval_predicate(a, tuple, structure) || eval_predicate(b, tuple, structure) + } + } +} + +fn eval_join_cond(cond: &JoinCond, left: &Tuple, right: &Tuple) -> bool { + match cond { + JoinCond::Cross => true, + JoinCond::Equi { left_col, right_col } => { + left.get(*left_col) == right.get(*right_col) && left.get(*left_col).is_some() + } + } +} + +// ============================================================================ +// Optimized Backend with Hash Joins +// ============================================================================ + +/// Execute a query plan with optimizations (hash joins for equijoins). +/// +/// This produces the same results as `execute()` but with better asymptotic +/// complexity for equijoins: O(n+m) instead of O(n*m). +/// +/// Use `execute()` as the reference implementation for testing correctness. +pub fn execute_optimized(plan: &QueryOp, structure: &Structure) -> Bag { + match plan { + QueryOp::Join { left, right, cond: JoinCond::Equi { left_col, right_col } } => { + // Hash join: O(n + m) instead of O(n * m) + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + + // Build phase: hash the smaller relation + let (build_bag, probe_bag, build_col, probe_col, is_left_build) = + if left_bag.len() <= right_bag.len() { + (&left_bag, &right_bag, *left_col, *right_col, true) + } else { + (&right_bag, &left_bag, *right_col, *left_col, false) + }; + + // Build hash table: key -> Vec<(tuple, multiplicity)> + let mut hash_table: HashMap> = HashMap::new(); + for (tuple, mult) in build_bag.iter() { + if let Some(&key) = tuple.get(build_col) { + hash_table.entry(key).or_default().push((tuple, *mult)); + } + } + + // Probe phase + let mut result = Bag::new(); + for (probe_tuple, probe_mult) in probe_bag.iter() { + if let Some(&key) = probe_tuple.get(probe_col) + && let Some(matches) = hash_table.get(&key) { + for (build_tuple, build_mult) in matches { + // Reconstruct in correct order (left, right) + let combined = if is_left_build { + let mut c = (*build_tuple).clone(); + c.extend(probe_tuple.iter().cloned()); + c + } else { + let mut c = probe_tuple.clone(); + c.extend((*build_tuple).iter().cloned()); + c + }; + + let mult = if is_left_build { + build_mult * probe_mult + } else { + probe_mult * build_mult + }; + result.insert(combined, mult); + } + } + } + result + } + + // For other operators, delegate to naive implementation but recurse optimized + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond: JoinCond::Cross } => { + // Cross join: still O(n*m), no optimization possible + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute_optimized(input, structure); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute_optimized(input, structure); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + } + } + result + } + + // DBSP operators not supported in optimized path yet + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + panic!("DBSP temporal operators require StreamContext - use execute_stream() instead") + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::id::NumericId; + + #[test] + fn test_scan_filter() { + // Create a simple structure with one sort containing elements 0, 1, 2 + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + // Scan all elements + let scan = QueryOp::Scan { sort_idx: 0 }; + let result = execute(&scan, &structure); + assert_eq!(result.len(), 3); + + // Filter to just element 1 + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + let result = execute(&filter, &structure); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1)])); + } + + #[test] + fn test_scan_relation() { + use crate::core::{RelationStorage, VecRelation}; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + + // Initialize a relation with arity 2 + structure.relations.push(VecRelation::new(2)); + + // Add tuples to the relation + structure.relations[0].insert(vec![Slid::from_usize(0), Slid::from_usize(1)]); + structure.relations[0].insert(vec![Slid::from_usize(1), Slid::from_usize(0)]); + + // Scan the relation + let scan_rel = QueryOp::ScanRelation { rel_id: 0 }; + let result = execute(&scan_rel, &structure); + + assert_eq!(result.len(), 2); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(0), Slid::from_usize(1)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1), Slid::from_usize(0)])); + } + + #[test] + fn test_join() { + let mut structure = Structure::new(2); + // Sort 0: {a, b} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + // Sort 1: {x, y} + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + let result = execute(&join, &structure); + // Cross product: 2 * 2 = 4 tuples + assert_eq!(result.len(), 4); + } + + // ======================================================================== + // DBSP Temporal Operator Tests + // ======================================================================== + + #[test] + fn test_delay_initial_empty() { + // Delay should output empty at timestep 0 + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(42)], + }), + state_id: 0, + }; + + // First step: output should be empty (no previous) + let result = execute_stream(&plan, &structure, &mut ctx); + assert!(result.is_empty(), "delay should be empty at timestep 0"); + } + + #[test] + fn test_delay_outputs_previous() { + // Delay should output previous input after step() + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(42)], + }), + state_id: 0, + }; + + // First step: execute to set up state + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + + // Second step: should output the previous input + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(42)])); + } + + #[test] + fn test_diff_computes_delta() { + // Diff outputs current - previous + let mut structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + // Start with elements {0, 1} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + + let plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // First step: diff = {0, 1} - {} = {0, 1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 2); + ctx.step(); + + // Add element 2, so now scan = {0, 1, 2} + structure.carriers[0].insert(2); + + // Second step: diff = {0, 1, 2} - {0, 1} = {2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(2)])); + } + + #[test] + fn test_integrate_accumulates() { + // Integrate accumulates across timesteps + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + // We'll feed constant input at each step + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(1)], + }), + state_id: 0, + }; + + // Step 0: accumulated = {1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 1); + ctx.step(); + + // Step 1: accumulated = {1} + {1} = {1} with multiplicity 2 + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 2); + ctx.step(); + + // Step 2: multiplicity 3 + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 3); + } + + #[test] + fn test_diff_integrate_identity() { + // ∫(δ(x)) = x (for stable input) + // This is the fundamental DBSP identity + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let mut ctx = StreamContext::new(); + + // ∫(δ(scan)) + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + + // Step 0: diff = {0,1,2}, integrate = {0,1,2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + ctx.step(); + + // Step 1: diff = {} (no change), integrate = {0,1,2} (unchanged) + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + ctx.step(); + + // Step 2: still {0,1,2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + } + + #[test] + fn test_dbsp_with_filter() { + // Test DBSP operators composed with stateless operators + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let mut ctx = StreamContext::new(); + + // Filter(Diff(scan)) - incremental filter + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + + // Step 0: diff = {0,1,2}, filter = {1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1)])); + ctx.step(); + + // Add element 3 (doesn't pass filter) + structure.carriers[0].insert(3); + + // Step 1: diff = {3}, filter = {} (3 doesn't match predicate) + let result = execute_stream(&plan, &structure, &mut ctx); + assert!(result.is_empty()); + } + + #[test] + fn test_stream_context_reset() { + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(1)], + }), + state_id: 0, + }; + + // Run a few steps + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + + assert_eq!(ctx.timestep, 2); + + // Reset + ctx.reset(); + assert_eq!(ctx.timestep, 0); + + // Integrate should start fresh + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 1); + } + + // ======================================================================== + // Semi-Naive Datalog Example (DBSP in action) + // ======================================================================== + + /// Demonstrates DBSP for transitive closure (semi-naive style). + /// + /// This example computes reachability in a graph using the DBSP pattern: + /// - δR = new facts this iteration + /// - ∫(δR) = all facts so far + /// + /// The "semi-naive" optimization is automatic: Diff computes only changes, + /// avoiding redundant re-derivation of old facts. + #[test] + fn test_semi_naive_transitive_closure() { + // Graph: 0→1, 1→2, 2→3 + // We represent edges as tuples (src, tgt) in sort 0 + let mut structure = Structure::new(1); + + // Add edge tuples as elements: encode (a,b) as slid = a*10 + b + // 0→1: slid=1, 1→2: slid=12, 2→3: slid=23 + structure.carriers[0].insert(1); // edge 0→1 + structure.carriers[0].insert(12); // edge 1→2 + structure.carriers[0].insert(23); // edge 2→3 + + let mut ctx = StreamContext::new(); + + // Query: scan all edges (base facts) + let base_facts = QueryOp::Scan { sort_idx: 0 }; + + // In a full implementation, we'd: + // 1. Differentiate the base facts to get δR + // 2. Join δR with ∫R to derive new transitive edges + // 3. Integrate to accumulate all reachable pairs + // + // For this test, we just verify the DBSP operators work together: + + // Step 1: ∫(δ(scan)) should equal the scan itself for stable input + let incremental_view = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(base_facts.clone()), + state_id: 0, + }), + state_id: 1, + }; + + // First execution: should see all 3 edges + let result = execute_stream(&incremental_view, &structure, &mut ctx); + assert_eq!(result.len(), 3, "should have 3 edges initially"); + ctx.step(); + + // Add new edge: 3→4 (encoded as slid=34) + structure.carriers[0].insert(34); + + // Second execution: diff should detect +1 new edge, integrate shows all 4 + let result = execute_stream(&incremental_view, &structure, &mut ctx); + assert_eq!(result.len(), 4, "should have 4 edges after adding 3→4"); + + // Verify incrementality: diff should show just the new edge + let diff_only = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 2, // fresh state_id + }; + let mut fresh_ctx = StreamContext::new(); + + // First step: all edges are "new" + let delta = execute_stream(&diff_only, &structure, &mut fresh_ctx); + assert_eq!(delta.len(), 4); + fresh_ctx.step(); + + // Second step with no changes: delta should be empty + let delta = execute_stream(&diff_only, &structure, &mut fresh_ctx); + assert!(delta.is_empty(), "no changes, delta should be empty"); + } + + // ======================================================================== + // Hash Join Tests (execute_optimized) + // ======================================================================== + + #[test] + fn test_hash_join_basic() { + // Test that hash join produces same results as nested loop join + let mut structure = Structure::new(2); + // Sort 0: {0, 1, 2} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: {0, 1, 2} (some overlap for equijoin) + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + structure.carriers[1].insert(2); + + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // Results should be identical + assert_eq!(naive_result.len(), optimized_result.len()); + for (tuple, mult) in naive_result.iter() { + assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "tuple {:?} has different multiplicity", + tuple + ); + } + } + + #[test] + fn test_hash_join_no_matches() { + // Test equijoin with no matching keys + let mut structure = Structure::new(2); + // Sort 0: {0, 1} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + // Sort 1: {10, 11} (no overlap) + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + assert!(naive_result.is_empty()); + assert!(optimized_result.is_empty()); + } + + #[test] + fn test_hash_join_asymmetric() { + // Test that join order is preserved when left is larger than right + let mut structure = Structure::new(2); + // Sort 0: {0, 1, 2, 3, 4} (larger) + for i in 0..5 { + structure.carriers[0].insert(i); + } + // Sort 1: {2, 3} (smaller, will be build side) + structure.carriers[1].insert(2); + structure.carriers[1].insert(3); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // Should have matches for 2 and 3 + assert_eq!(naive_result.len(), 2); + assert_eq!(optimized_result.len(), 2); + + // Verify tuple order is (left_val, right_val) + assert!(optimized_result.tuples.contains_key(&vec![ + Slid::from_usize(2), + Slid::from_usize(2) + ])); + assert!(optimized_result.tuples.contains_key(&vec![ + Slid::from_usize(3), + Slid::from_usize(3) + ])); + } + + #[test] + fn test_hash_join_with_duplicates() { + // Test hash join correctly handles multiplicities + let mut structure = Structure::new(2); + // Both sides have element 1 + structure.carriers[0].insert(1); + structure.carriers[1].insert(1); + + // Join constant bags with multiplicities + let left = QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + }; + let right = QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + }), + }; + + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // 2 * 3 = 6 (multiplicity multiplication) + let tuple = vec![Slid::from_usize(1), Slid::from_usize(1)]; + assert_eq!(naive_result.tuples.get(&tuple), Some(&6)); + assert_eq!(optimized_result.tuples.get(&tuple), Some(&6)); + } + + #[test] + fn test_optimized_matches_naive_cross_join() { + // Cross join should work the same in optimized backend + let mut structure = Structure::new(2); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + assert_eq!(naive_result.len(), 4); // 2 * 2 = 4 + assert_eq!(optimized_result.len(), 4); + + for (tuple, mult) in naive_result.iter() { + assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "tuple {:?} mismatch", + tuple + ); + } + } + + #[test] + fn test_optimized_nested_joins() { + // Test optimized backend with nested joins + let mut structure = Structure::new(3); + structure.carriers[0].insert(1); + structure.carriers[1].insert(1); + structure.carriers[2].insert(1); + + // (A ⋈ B) ⋈ C + let join_ab = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join_abc = QueryOp::Join { + left: Box::new(join_ab), + right: Box::new(QueryOp::Scan { sort_idx: 2 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join_abc, &structure); + let optimized_result = super::execute_optimized(&join_abc, &structure); + + assert_eq!(naive_result.len(), optimized_result.len()); + // Result should be (1, 1, 1) + let expected = vec![Slid::from_usize(1), Slid::from_usize(1), Slid::from_usize(1)]; + assert!(optimized_result.tuples.contains_key(&expected)); + } + + // ======================================================================== + // Display / Pretty Printing Tests + // ======================================================================== + + #[test] + fn test_display_scan() { + let plan = QueryOp::Scan { sort_idx: 0 }; + let display = format!("{}", plan); + assert_eq!(display, "Scan(sort=0)"); + } + + #[test] + fn test_display_filter() { + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 1 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(42), + }, + }; + let display = format!("{}", plan); + assert!(display.contains("Filter(c0=42)")); + assert!(display.contains("Scan(sort=1)")); + } + + #[test] + fn test_display_join() { + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + let display = format!("{}", plan); + assert!(display.contains("Join(c0=c0)")); + assert!(display.contains("Scan(sort=0)")); + assert!(display.contains("Scan(sort=1)")); + } + + #[test] + fn test_display_cross_join() { + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + let display = format!("{}", plan); + assert!(display.contains("Join(×)")); + } + + #[test] + fn test_display_dbsp_operators() { + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + let display = format!("{}", plan); + assert!(display.contains("∫(state=1)")); + assert!(display.contains("δ(state=0)")); + assert!(display.contains("Scan(sort=0)")); + } + + #[test] + fn test_display_nested_plan() { + // Filter(Join(×) + // Scan(0) + // Scan(1)) + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }), + pred: Predicate::ColEqCol { left: 0, right: 1 }, + }; + let display = format!("{}", plan); + // Verify structure is maintained + assert!(display.contains("Filter(c0=c1)")); + assert!(display.contains("Join(×)")); + // Verify indentation is present (child ops should be indented) + assert!(display.contains(" Scan(sort=0)")); + assert!(display.contains(" Scan(sort=1)")); + } + + #[test] + fn test_display_predicate_compound() { + let pred = Predicate::And( + Box::new(Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }), + Box::new(Predicate::Or( + Box::new(Predicate::True), + Box::new(Predicate::False), + )), + ); + let display = format!("{}", pred); + assert_eq!(display, "(c0=1 ∧ (⊤ ∨ ⊥))"); + } +} diff --git a/src/query/chase.rs b/src/query/chase.rs new file mode 100644 index 0000000..aa3ee5d --- /dev/null +++ b/src/query/chase.rs @@ -0,0 +1,710 @@ +//! Chase algorithm for computing derived relations. +//! +//! The chase takes a structure and a set of axioms (sequents) and repeatedly +//! applies the axioms until a fixpoint is reached. This is the standard database +//! chase algorithm adapted for geometric logic. +//! +//! # Implementation +//! +//! This implementation uses the tensor subsystem to evaluate premises: +//! 1. Compile premise to TensorExpr (handles existentials, conjunctions, etc.) +//! 2. Materialize to get all satisfying variable assignments +//! 3. For each assignment, fire the conclusion (add relations, create elements) +//! +//! This approach is strictly more powerful than query-based chase because +//! the tensor system naturally handles existential quantification in premises +//! via tensor contraction. +//! +//! # Supported Axiom Patterns +//! +//! **Premises** (anything the tensor system can compile): +//! - Relations: `R(x,y)` +//! - Conjunctions: `R(x,y), S(y,z)` +//! - Existentials: `∃e. f(e) = x ∧ g(e) = y` +//! - Equalities: `f(x) = y`, `f(x) = g(y)` +//! - Disjunctions: `R(x) ∨ S(x)` +//! +//! **Conclusions**: +//! - Relations: `⊢ R(x,y)` — add tuple to relation +//! - Existentials: `⊢ ∃b. f(b) = y` — create element with function binding +//! - Conjunctions: `⊢ R(x,y), f(x) = z` — multiple effects +//! +//! # Usage +//! +//! ```ignore +//! use geolog::query::chase::chase_fixpoint; +//! +//! // Run chase to fixpoint +//! let iterations = chase_fixpoint( +//! &theory.theory.axioms, +//! &mut structure, +//! &mut universe, +//! &theory.theory.signature, +//! 100, +//! )?; +//! ``` + +use std::collections::HashMap; + +use crate::cc::{CongruenceClosure, EquationReason}; +use crate::core::{DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term}; +use crate::id::{NumericId, Slid}; +use crate::tensor::{check_sequent, CheckResult}; +use crate::universe::Universe; + +/// Error type for chase operations +#[derive(Debug, Clone)] +pub enum ChaseError { + /// Unsupported formula in conclusion + UnsupportedConclusion(String), + /// Variable not bound + UnboundVariable(String), + /// Function conflict (different values for same input) + FunctionConflict(String), + /// Chase did not converge + MaxIterationsExceeded(usize), + /// Tensor compilation failed + TensorCompilationFailed(String), +} + +impl std::fmt::Display for ChaseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnsupportedConclusion(s) => write!(f, "Unsupported conclusion: {s}"), + Self::UnboundVariable(s) => write!(f, "Unbound variable: {s}"), + Self::FunctionConflict(s) => write!(f, "Function conflict: {s}"), + Self::MaxIterationsExceeded(n) => write!(f, "Chase did not converge after {n} iterations"), + Self::TensorCompilationFailed(s) => write!(f, "Tensor compilation failed: {s}"), + } + } +} + +impl std::error::Error for ChaseError {} + +/// Variable binding: maps variable names to Slids +pub type Binding = HashMap; + +/// Execute one step of the chase algorithm. +/// +/// Iterates over all axioms, evaluates premises using the tensor system, +/// and fires conclusions for each satisfying assignment. +/// +/// Returns `true` if any changes were made. +pub fn chase_step( + axioms: &[Sequent], + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + let mut changed = false; + + for axiom in axioms { + changed |= fire_axiom(axiom, structure, cc, universe, sig)?; + } + + Ok(changed) +} + +/// Fire a single axiom: find violations using tensor system, fire conclusion only for violations. +/// +/// This is the key to correct chase semantics: we only create fresh elements when +/// the tensor system confirms there is NO existing witness for the conclusion. +fn fire_axiom( + axiom: &Sequent, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + // Check the axiom - if compilation fails due to unsupported patterns, skip silently + let violations = match check_sequent(axiom, structure, sig) { + Ok(CheckResult::Satisfied) => { + // Axiom is already satisfied - nothing to fire + return Ok(false); + } + Ok(CheckResult::Violated(vs)) => vs, + Err(_) => { + // Tensor compilation failed (unsupported pattern) + // Skip this axiom silently + return Ok(false); + } + }; + + if violations.is_empty() { + return Ok(false); + } + + // Build index→Slid lookup for each context variable + let index_to_slid: Vec> = axiom.context.vars.iter() + .map(|(_, sort)| carrier_to_slid_vec(structure, sort)) + .collect(); + + // Map from variable name to its position in the context + let var_to_ctx_idx: HashMap<&str, usize> = axiom.context.vars.iter() + .enumerate() + .map(|(i, (name, _))| (name.as_str(), i)) + .collect(); + + let mut changed = false; + + // Fire conclusion ONLY for violations (where premise holds but conclusion doesn't) + for violation in violations { + // Build binding from violation assignment + let binding: Binding = violation.variable_names.iter() + .enumerate() + .filter_map(|(tensor_idx, var_name)| { + let ctx_idx = var_to_ctx_idx.get(var_name.as_str())?; + let slid_vec = &index_to_slid[*ctx_idx]; + let tensor_val = violation.assignment.get(tensor_idx)?; + let slid = slid_vec.get(*tensor_val)?; + Some((var_name.clone(), *slid)) + }) + .collect(); + + // Fire conclusion with this binding + match fire_conclusion(&axiom.conclusion, &binding, structure, cc, universe, sig) { + Ok(c) => changed |= c, + Err(_) => { + // Unsupported conclusion pattern - skip this axiom silently + return Ok(false); + } + } + } + + Ok(changed) +} + +/// Convert a carrier to a Vec of Slids for index→Slid lookup +fn carrier_to_slid_vec(structure: &Structure, sort: &DerivedSort) -> Vec { + match sort { + DerivedSort::Base(sort_id) => { + structure.carriers[*sort_id] + .iter() + .map(|u| Slid::from_usize(u as usize)) + .collect() + } + DerivedSort::Product(_) => { + // Product sorts: would need to enumerate all combinations + // For now, return empty (these are rare in practice) + vec![] + } + } +} + +/// Fire a conclusion formula given a variable binding. +/// Returns true if any changes were made. +fn fire_conclusion( + formula: &Formula, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + match formula { + Formula::True => Ok(false), + + Formula::False => { + // Contradiction - this shouldn't happen in valid chase + Err(ChaseError::UnsupportedConclusion("False in conclusion".to_string())) + } + + Formula::Rel(rel_id, term) => { + // Add tuple to relation + let tuple = eval_term_to_tuple(term, binding, structure)?; + + // Check if already present (using canonical representatives) + let canonical_tuple: Vec = tuple.iter() + .map(|&s| cc.canonical(s)) + .collect(); + + // Check if a canonically-equivalent tuple exists + let exists = structure.relations[*rel_id].iter().any(|existing| { + if existing.len() != canonical_tuple.len() { + return false; + } + existing.iter().zip(canonical_tuple.iter()).all(|(e, c)| { + cc.canonical(*e) == *c + }) + }); + + if exists { + return Ok(false); + } + + structure.relations[*rel_id].insert(tuple); + Ok(true) + } + + Formula::Conj(formulas) => { + let mut changed = false; + for f in formulas { + changed |= fire_conclusion(f, binding, structure, cc, universe, sig)?; + } + Ok(changed) + } + + Formula::Disj(formulas) => { + // Naive parallel chase: fire all disjuncts + // (sound but potentially adds more facts than necessary) + let mut changed = false; + for f in formulas { + changed |= fire_conclusion(f, binding, structure, cc, universe, sig)?; + } + Ok(changed) + } + + Formula::Eq(left, right) => { + fire_equality(left, right, binding, structure, cc, sig) + } + + Formula::Exists(var_name, sort, body) => { + fire_existential(var_name, sort, body, binding, structure, cc, universe, sig) + } + } +} + +/// Evaluate a term to a tuple of Slids (for relation arguments) +fn eval_term_to_tuple( + term: &Term, + binding: &Binding, + structure: &Structure, +) -> Result, ChaseError> { + match term { + Term::Var(name, _) => { + let slid = binding.get(name) + .ok_or_else(|| ChaseError::UnboundVariable(name.clone()))?; + Ok(vec![*slid]) + } + Term::Record(fields) => { + let mut tuple = Vec::new(); + for (_, field_term) in fields { + tuple.extend(eval_term_to_tuple(field_term, binding, structure)?); + } + Ok(tuple) + } + Term::App(_, _) => { + // Delegate to eval_term_to_slid which handles function application + let result = eval_term_to_slid(term, binding, structure)?; + Ok(vec![result]) + } + Term::Project(_, _) => { + Err(ChaseError::UnsupportedConclusion( + "Projection in relation argument".to_string() + )) + } + } +} + +/// Evaluate a term to a single Slid +fn eval_term_to_slid( + term: &Term, + binding: &Binding, + structure: &Structure, +) -> Result { + match term { + Term::Var(name, _) => { + binding.get(name) + .copied() + .ok_or_else(|| ChaseError::UnboundVariable(name.clone())) + } + Term::App(func_idx, arg) => { + let arg_slid = eval_term_to_slid(arg, binding, structure)?; + let local_id = structure.sort_local_id(arg_slid); + + structure.get_function(*func_idx, local_id) + .ok_or_else(|| ChaseError::UnboundVariable( + format!("Function {} undefined at {:?}", func_idx, arg_slid) + )) + } + Term::Project(base, field) => { + let _base_slid = eval_term_to_slid(base, binding, structure)?; + // Product projection - would need more structure info + Err(ChaseError::UnsupportedConclusion( + format!("Projection .{} not yet supported in chase", field) + )) + } + Term::Record(_) => { + Err(ChaseError::UnsupportedConclusion( + "Record term in scalar position".to_string() + )) + } + } +} + +/// Fire an equality in conclusion: f(x) = y, x = y, etc. +fn fire_equality( + left: &Term, + right: &Term, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + sig: &Signature, +) -> Result { + match (left, right) { + // f(arg) = value + (Term::App(func_idx, arg), value) | (value, Term::App(func_idx, arg)) => { + let arg_slid = eval_term_to_slid(arg, binding, structure)?; + let local_id = structure.sort_local_id(arg_slid); + + // Check if dealing with product codomain + let func_info = &sig.functions[*func_idx]; + match &func_info.codomain { + DerivedSort::Base(_) => { + // Simple codomain + let value_slid = eval_term_to_slid(value, binding, structure)?; + + // Check if already defined + if let Some(existing) = structure.get_function(*func_idx, local_id) { + // Check if values are equal (using CC) + if cc.are_equal(existing, value_slid) { + return Ok(false); // Already set to equivalent value + } + // Function conflict: add equation to CC instead of error + // (this is how we propagate equalities through functions) + cc.add_equation(existing, value_slid, EquationReason::FunctionConflict { + func_id: *func_idx, + domain: arg_slid, + }); + return Ok(true); // Changed (added equation) + } + + structure.define_function(*func_idx, arg_slid, value_slid) + .map_err(|e| ChaseError::FunctionConflict(format!("{:?}", e)))?; + Ok(true) + } + DerivedSort::Product(_fields) => { + // Product codomain: f(x) = [field1: v1, ...] + if let Term::Record(value_fields) = value { + let codomain_values: Vec<(&str, Slid)> = value_fields.iter() + .map(|(name, term)| { + let slid = eval_term_to_slid(term, binding, structure)?; + Ok((name.as_str(), slid)) + }) + .collect::, ChaseError>>()?; + + // Check if already defined + if let Some(existing) = structure.get_function_product_codomain(*func_idx, local_id) { + let all_match = codomain_values.iter().all(|(name, expected)| { + existing.iter().any(|(n, v)| n == name && cc.are_equal(*v, *expected)) + }); + if all_match { + return Ok(false); + } + return Err(ChaseError::FunctionConflict( + format!("Function {} already defined at {:?} with different values", func_idx, arg_slid) + )); + } + + structure.define_function_product_codomain(*func_idx, arg_slid, &codomain_values) + .map_err(|e| ChaseError::FunctionConflict(format!("{:?}", e)))?; + Ok(true) + } else { + Err(ChaseError::UnsupportedConclusion( + format!("Expected record for product codomain function, got {:?}", value) + )) + } + } + } + } + + // x = y (variable equality) - add to congruence closure! + (Term::Var(name1, _), Term::Var(name2, _)) => { + let slid1 = binding.get(name1) + .ok_or_else(|| ChaseError::UnboundVariable(name1.clone()))?; + let slid2 = binding.get(name2) + .ok_or_else(|| ChaseError::UnboundVariable(name2.clone()))?; + + // Check if already equal in CC + if cc.are_equal(*slid1, *slid2) { + Ok(false) // Already equivalent + } else { + // Add equation to congruence closure + cc.add_equation(*slid1, *slid2, EquationReason::ChaseConclusion); + Ok(true) // Changed! + } + } + + _ => Err(ChaseError::UnsupportedConclusion( + format!("Unsupported equality pattern: {:?} = {:?}", left, right) + )) + } +} + +/// Check if a formula is satisfied given a variable binding. +/// This is used for witness search in existential conclusions. +/// Uses CC for canonical relation lookups and equality checks. +fn check_formula_satisfied( + formula: &Formula, + binding: &Binding, + structure: &Structure, + cc: &mut CongruenceClosure, +) -> bool { + match formula { + Formula::True => true, + Formula::False => false, + + Formula::Rel(rel_id, term) => { + // Check if the tuple is in the relation (using canonical representatives) + if let Ok(tuple) = eval_term_to_tuple(term, binding, structure) { + let canonical_tuple: Vec = tuple.iter() + .map(|&s| cc.canonical(s)) + .collect(); + + // Check if a canonically-equivalent tuple exists + structure.relations[*rel_id].iter().any(|existing| { + if existing.len() != canonical_tuple.len() { + return false; + } + existing.iter().zip(canonical_tuple.iter()).all(|(e, c)| { + cc.canonical(*e) == *c + }) + }) + } else { + false // Couldn't evaluate term (unbound variable) + } + } + + Formula::Conj(fs) => { + fs.iter().all(|f| check_formula_satisfied(f, binding, structure, cc)) + } + + Formula::Disj(fs) => { + fs.iter().any(|f| check_formula_satisfied(f, binding, structure, cc)) + } + + Formula::Eq(t1, t2) => { + // Check if both terms evaluate to equivalent values (using CC) + match (eval_term_to_slid(t1, binding, structure), eval_term_to_slid(t2, binding, structure)) { + (Ok(s1), Ok(s2)) => cc.are_equal(s1, s2), + _ => false // Couldn't evaluate (unbound variable or undefined function) + } + } + + Formula::Exists(inner_var, inner_sort, inner_body) => { + // Check if any witness exists in the carrier + let DerivedSort::Base(sort_idx) = inner_sort else { + return false; // Product sorts not supported + }; + + structure.carriers[*sort_idx].iter().any(|w_u64| { + let witness = Slid::from_usize(w_u64 as usize); + let mut extended = binding.clone(); + extended.insert(inner_var.clone(), witness); + check_formula_satisfied(inner_body, &extended, structure, cc) + }) + } + } +} + +/// Fire an existential in conclusion: ∃x:S. body +/// This creates a new element if no witness exists. +/// +/// The algorithm: +/// 1. Search the carrier of S for an existing witness w where body[x↦w] holds +/// 2. If found, do nothing (witness exists) +/// 3. If not found, create a fresh element w and fire body as conclusion with x↦w +fn fire_existential( + var_name: &str, + sort: &DerivedSort, + body: &Formula, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + let DerivedSort::Base(sort_idx) = sort else { + return Err(ChaseError::UnsupportedConclusion( + "Existential with product sort not yet supported".to_string() + )); + }; + + // Search for existing witness by checking if body is satisfied (using CC for canonical lookups) + let carrier = &structure.carriers[*sort_idx]; + let witness_found = carrier.iter().any(|elem_u64| { + let elem_slid = Slid::from_usize(elem_u64 as usize); + let mut extended_binding = binding.clone(); + extended_binding.insert(var_name.to_string(), elem_slid); + check_formula_satisfied(body, &extended_binding, structure, cc) + }); + + if witness_found { + return Ok(false); // Witness already exists, nothing to do + } + + // No witness exists - create a fresh element + let (new_elem, _) = structure.add_element(universe, *sort_idx); + + // Fire body as conclusion with the new element bound to var_name + let mut extended_binding = binding.clone(); + extended_binding.insert(var_name.to_string(), new_elem); + + // Use fire_conclusion to make the body true + // This handles relations, equalities, conjunctions uniformly + fire_conclusion(body, &extended_binding, structure, cc, universe, sig)?; + + Ok(true) +} + +/// Run the chase algorithm until a fixpoint is reached, with congruence closure. +/// +/// Repeatedly applies [`chase_step`] and propagates equations until no more changes occur. +/// +/// # Arguments +/// +/// * `axioms` - The sequents (axioms) to apply +/// * `structure` - The structure to modify +/// * `cc` - Congruence closure for equality reasoning +/// * `universe` - The universe for element creation +/// * `sig` - The signature +/// * `max_iterations` - Safety limit to prevent infinite loops +/// +/// # Returns +/// +/// The number of iterations taken to reach the fixpoint. +pub fn chase_fixpoint_with_cc( + axioms: &[Sequent], + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, + max_iterations: usize, +) -> Result { + let mut iterations = 0; + + loop { + if iterations >= max_iterations { + return Err(ChaseError::MaxIterationsExceeded(max_iterations)); + } + + // Fire axiom conclusions + let axiom_changed = chase_step(axioms, structure, cc, universe, sig)?; + + // Propagate pending equations in CC + let eq_changed = propagate_equations(structure, cc, sig); + + iterations += 1; + + if !axiom_changed && !eq_changed { + break; + } + } + + Ok(iterations) +} + +/// Propagate pending equations in the congruence closure. +/// +/// This merges equivalence classes and detects function conflicts +/// (which add new equations via congruence). +fn propagate_equations( + structure: &Structure, + cc: &mut CongruenceClosure, + sig: &Signature, +) -> bool { + let mut changed = false; + + while let Some(eq) = cc.pop_pending() { + // Merge the equivalence classes + if cc.merge(eq.lhs, eq.rhs) { + changed = true; + + // Check for function conflicts (congruence propagation) + // If f(a) = x and f(b) = y, and a = b (just merged), then x = y + for func_id in 0..sig.functions.len() { + if func_id >= structure.functions.len() { + continue; + } + + let lhs_local = structure.sort_local_id(eq.lhs); + let rhs_local = structure.sort_local_id(eq.rhs); + + let lhs_val = structure.get_function(func_id, lhs_local); + let rhs_val = structure.get_function(func_id, rhs_local); + + if let (Some(lv), Some(rv)) = (lhs_val, rhs_val) + && !cc.are_equal(lv, rv) { + // Congruence: f(a) = lv, f(b) = rv, a = b implies lv = rv + cc.add_equation(lv, rv, EquationReason::Congruence { func_id }); + } + } + } + } + + changed +} + +/// Canonicalize the structure based on the congruence closure. +/// +/// After the chase, some elements may have been merged in the CC but the +/// structure still contains distinct elements. This function: +/// 1. Removes non-canonical elements from carriers +/// 2. Replaces relation tuples with their canonical forms +fn canonicalize_structure(structure: &mut Structure, cc: &mut CongruenceClosure) { + use crate::core::{RelationStorage, VecRelation}; + + // 1. Canonicalize carriers: keep only canonical representatives + for carrier in &mut structure.carriers { + let elements: Vec = carrier.iter().collect(); + carrier.clear(); + for elem in elements { + let slid = Slid::from_usize(elem as usize); + let canonical = cc.canonical(slid); + // Only keep if this element is its own canonical representative + if canonical == slid { + carrier.insert(elem); + } + } + } + + // 2. Canonicalize relations: replace tuples with canonical forms + for rel in &mut structure.relations { + let canonical_tuples: Vec> = rel.iter() + .map(|tuple| tuple.iter().map(|&s| cc.canonical(s)).collect()) + .collect(); + + let arity = rel.arity(); + let mut new_rel = VecRelation::new(arity); + for tuple in canonical_tuples { + new_rel.insert(tuple); + } + + *rel = new_rel; + } +} + +/// Run the chase algorithm until a fixpoint is reached. +/// +/// This is a convenience wrapper that creates a fresh congruence closure. +/// Use [`chase_fixpoint_with_cc`] if you need to provide your own CC. +/// +/// # Arguments +/// +/// * `axioms` - The sequents (axioms) to apply +/// * `structure` - The structure to modify +/// * `universe` - The universe for element creation +/// * `sig` - The signature +/// * `max_iterations` - Safety limit to prevent infinite loops +/// +/// # Returns +/// +/// The number of iterations taken to reach the fixpoint. +pub fn chase_fixpoint( + axioms: &[Sequent], + structure: &mut Structure, + universe: &mut Universe, + sig: &Signature, + max_iterations: usize, +) -> Result { + let mut cc = CongruenceClosure::new(); + let iterations = chase_fixpoint_with_cc(axioms, structure, &mut cc, universe, sig, max_iterations)?; + + // Canonicalize structure to reflect CC merges before returning + canonicalize_structure(structure, &mut cc); + + Ok(iterations) +} + +// Tests are in tests/unit_chase.rs diff --git a/src/query/compile.rs b/src/query/compile.rs new file mode 100644 index 0000000..d7dfa0e --- /dev/null +++ b/src/query/compile.rs @@ -0,0 +1,702 @@ +//! Query compiler: high-level queries → QueryOp plans. +//! +//! This module compiles query specifications into executable QueryOp plans. +//! It supports: +//! - Single-sort queries (like `Pattern`) +//! - Multi-sort queries with joins +//! - Function application and projection +//! +//! # Query Styles +//! +//! **∀-style (open sorts):** Elements determined by constraints. +//! Compiled to relational algebra (scan, filter, join, project). +//! +//! **∃-style (closed sorts):** Elements are declared constants. +//! Compiled to constraint satisfaction (witness enumeration). +//! [Not yet implemented] +//! +//! # Design +//! +//! Query compilation is currently direct (Query → QueryOp). +//! A future homoiconic version would compile to RelAlgIR instances, +//! which would then be interpreted by the backend. + +use crate::id::Slid; +use super::backend::{JoinCond, Predicate, QueryOp}; + +/// A query specification that can involve multiple sorts and joins. +/// +/// This generalizes `Pattern` to handle: +/// - Multiple source sorts +/// - Joins between sorts +/// - Complex constraints across sorts +/// +/// # Example: Find all Func where Func/theory == target +/// +/// ```ignore +/// let query = Query::scan(func_sort) +/// .filter_eq(theory_func, 0, target_slid) +/// .build(); +/// ``` +/// +/// # Example: Find all (Srt, Func) pairs where Srt/theory == Func/theory +/// +/// ```ignore +/// let query = Query::scan(srt_sort) +/// .join_scan(func_sort) +/// .join_on_func(srt_theory_func, 0, func_theory_func, 1) +/// .build(); +/// ``` +#[derive(Debug, Clone)] +pub struct Query { + /// Sources: each is (sort_idx, alias). Alias is used in constraints. + sources: Vec, + /// Constraints to apply (filters and join conditions) + constraints: Vec, + /// Projection: which columns to return + projection: Projection, +} + +/// A source in the query (a sort to scan). +#[derive(Debug, Clone)] +struct Source { + /// Sort index to scan + sort_idx: usize, + /// Column offset in the combined tuple + /// (each source adds 1 column for its element) + #[allow(dead_code)] // Used for tracking, will be needed for complex projections + col_offset: usize, +} + +/// A constraint in the query. +#[derive(Debug, Clone)] +enum Constraint { + /// func(col) == constant + FuncEqConst { + func_idx: usize, + arg_col: usize, + expected: Slid, + }, + /// func1(col1) == func2(col2) + FuncEqFunc { + func1_idx: usize, + arg1_col: usize, + func2_idx: usize, + arg2_col: usize, + }, + /// col1 == col2 (direct element equality) + ColEq { + col1: usize, + col2: usize, + }, + /// col == constant + ColEqConst { + col: usize, + expected: Slid, + }, +} + +/// Projection specification. +#[derive(Debug, Clone)] +enum Projection { + /// Return all columns + All, + /// Return specific columns + Cols(Vec), + /// Return specific columns with function applications + FuncCols(Vec), +} + +/// A column in projection, possibly with function application. +#[derive(Debug, Clone)] +struct FuncCol { + /// Column to use as argument + arg_col: usize, + /// Function to apply (None = just the element) + func_idx: Option, +} + +impl Query { + /// Create a new query scanning a single sort. + pub fn scan(sort_idx: usize) -> QueryBuilder { + QueryBuilder { + sources: vec![Source { sort_idx, col_offset: 0 }], + constraints: vec![], + projection: Projection::All, + next_col: 1, + } + } +} + +/// Builder for constructing queries fluently. +#[derive(Debug, Clone)] +pub struct QueryBuilder { + sources: Vec, + constraints: Vec, + projection: Projection, + next_col: usize, +} + +impl QueryBuilder { + /// Add another sort to scan (creates a cross join, to be constrained). + pub fn join_scan(mut self, sort_idx: usize) -> Self { + let col_offset = self.next_col; + self.sources.push(Source { sort_idx, col_offset }); + self.next_col += 1; + self + } + + /// Add a filter: func(col) == expected. + /// + /// `col` is 0-indexed, referring to which source's element. + pub fn filter_eq(mut self, func_idx: usize, arg_col: usize, expected: Slid) -> Self { + self.constraints.push(Constraint::FuncEqConst { + func_idx, + arg_col, + expected, + }); + self + } + + /// Add a join condition: func1(col1) == func2(col2). + /// + /// Used to join two scanned sorts by comparing function values. + pub fn join_on_func( + mut self, + func1_idx: usize, + arg1_col: usize, + func2_idx: usize, + arg2_col: usize, + ) -> Self { + self.constraints.push(Constraint::FuncEqFunc { + func1_idx, + arg1_col, + func2_idx, + arg2_col, + }); + self + } + + /// Add an element equality constraint: col1 == col2. + pub fn where_eq(mut self, col1: usize, col2: usize) -> Self { + self.constraints.push(Constraint::ColEq { col1, col2 }); + self + } + + /// Add a constant equality constraint: col == expected. + pub fn where_const(mut self, col: usize, expected: Slid) -> Self { + self.constraints.push(Constraint::ColEqConst { col, expected }); + self + } + + /// Project to specific columns. + pub fn project(mut self, cols: Vec) -> Self { + self.projection = Projection::Cols(cols); + self + } + + /// Project with function applications. + pub fn project_funcs(mut self, func_cols: Vec<(usize, Option)>) -> Self { + self.projection = Projection::FuncCols( + func_cols + .into_iter() + .map(|(arg_col, func_idx)| FuncCol { arg_col, func_idx }) + .collect(), + ); + self + } + + /// Build the final Query. + pub fn build(self) -> Query { + Query { + sources: self.sources, + constraints: self.constraints, + projection: self.projection, + } + } + + /// Compile directly to QueryOp (skipping Query intermediate). + pub fn compile(self) -> QueryOp { + self.build().compile() + } +} + +impl Query { + /// Compile the query to a QueryOp plan. + /// + /// The compilation strategy: + /// 1. Scan each source sort + /// 2. Join scans together (cross join if >1) + /// 3. Handle FuncEqFunc constraints by applying functions, then filtering + /// 4. Apply other constraints as filters + /// 5. Apply projection + pub fn compile(&self) -> QueryOp { + if self.sources.is_empty() { + return QueryOp::Empty; + } + + // Step 1: Build base plan from sources + let mut plan = QueryOp::Scan { + sort_idx: self.sources[0].sort_idx, + }; + + // Track current column count (each source adds 1 column) + let mut current_cols = 1; + + // If multiple sources, join them + for source in &self.sources[1..] { + let right = QueryOp::Scan { + sort_idx: source.sort_idx, + }; + plan = QueryOp::Join { + left: Box::new(plan), + right: Box::new(right), + cond: JoinCond::Cross, // Start with cross join, constraints will filter + }; + current_cols += 1; + } + + // Step 2: Separate FuncEqFunc constraints (need Apply) from others + let mut func_eq_func_constraints = Vec::new(); + let mut simple_constraints = Vec::new(); + + for constraint in &self.constraints { + match constraint { + Constraint::FuncEqFunc { .. } => func_eq_func_constraints.push(constraint), + _ => simple_constraints.push(constraint), + } + } + + // Step 3: Handle FuncEqFunc constraints + // For each, apply both functions, track the added columns, then filter on equality + for constraint in func_eq_func_constraints { + if let Constraint::FuncEqFunc { + func1_idx, + arg1_col, + func2_idx, + arg2_col, + } = constraint + { + // Apply func1 to arg1_col, result goes in current_cols + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func1_idx, + arg_col: *arg1_col, + }; + let col1_result = current_cols; + current_cols += 1; + + // Apply func2 to arg2_col, result goes in current_cols + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func2_idx, + arg_col: *arg2_col, + }; + let col2_result = current_cols; + current_cols += 1; + + // Filter where the two result columns are equal + plan = QueryOp::Filter { + input: Box::new(plan), + pred: Predicate::ColEqCol { + left: col1_result, + right: col2_result, + }, + }; + } + } + + // Step 4: Apply simple constraints as filters + for constraint in simple_constraints { + let pred = match constraint { + Constraint::FuncEqConst { + func_idx, + arg_col, + expected, + } => Predicate::FuncEqConst { + func_idx: *func_idx, + arg_col: *arg_col, + expected: *expected, + }, + Constraint::FuncEqFunc { .. } => { + unreachable!("FuncEqFunc already handled") + } + Constraint::ColEq { col1, col2 } => Predicate::ColEqCol { + left: *col1, + right: *col2, + }, + Constraint::ColEqConst { col, expected } => Predicate::ColEqConst { + col: *col, + val: *expected, + }, + }; + plan = QueryOp::Filter { + input: Box::new(plan), + pred, + }; + } + + // Step 5: Apply projection + match &self.projection { + Projection::All => { + // No projection needed, return all columns + } + Projection::Cols(cols) => { + plan = QueryOp::Project { + input: Box::new(plan), + columns: cols.clone(), + }; + } + Projection::FuncCols(func_cols) => { + // Apply each function, then project + let base_col = current_cols; // Start adding func results here + for fc in func_cols.iter() { + if let Some(func_idx) = fc.func_idx { + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx, + arg_col: fc.arg_col, + }; + current_cols += 1; + } + } + // Project to the added columns + if current_cols > base_col { + let columns: Vec = (base_col..current_cols).collect(); + plan = QueryOp::Project { + input: Box::new(plan), + columns, + }; + } + } + } + + plan + } +} + +// ============================================================================ +// Convenience functions for common query patterns +// ============================================================================ + +/// Compile a simple single-sort query: scan sort, filter by func == value. +/// +/// This is equivalent to `Pattern::new(sort).filter(func, value).compile()` +/// but uses the new Query API. +pub fn compile_simple_filter(sort_idx: usize, func_idx: usize, expected: Slid) -> QueryOp { + Query::scan(sort_idx) + .filter_eq(func_idx, 0, expected) + .compile() +} + +/// Compile a query that returns func(elem) for matching elements. +/// +/// scan(sort) |> filter(filter_func(elem) == expected) |> project(project_func(elem)) +pub fn compile_filter_project( + sort_idx: usize, + filter_func: usize, + expected: Slid, + project_func: usize, +) -> QueryOp { + // scan → filter → apply → project + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::FuncEqConst { + func_idx: filter_func, + arg_col: 0, + expected, + }, + }; + let apply = QueryOp::Apply { + input: Box::new(filter), + func_idx: project_func, + arg_col: 0, + }; + // Now we have (elem, func(elem)), project to just column 1 + QueryOp::Project { + input: Box::new(apply), + columns: vec![1], + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::id::NumericId; + + #[test] + fn test_simple_scan_compiles() { + let plan = Query::scan(0).compile(); + assert!(matches!(plan, QueryOp::Scan { sort_idx: 0 })); + } + + /// Test that Query-compiled plans produce same results as Pattern. + /// + /// This validates that the new Query API is equivalent to the + /// existing Pattern API for simple queries. + #[test] + fn test_query_matches_pattern() { + use crate::core::Structure; + use crate::query::backend::execute; + use crate::query::{Pattern, Projection as PatternProjection}; + + // Create a structure with some data + let mut structure = Structure::new(2); + // Sort 0: elements 0, 1, 2 + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: elements 10, 11 + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + // Test 1: Simple scan + let pattern_plan = Pattern { + source_sort: 0, + constraints: vec![], + projection: PatternProjection::Element, + } + .compile(); + + let query_plan = Query::scan(0).compile(); + + let pattern_result = execute(&pattern_plan, &structure); + let query_result = execute(&query_plan, &structure); + + assert_eq!( + pattern_result.len(), + query_result.len(), + "Scan should return same number of results" + ); + + // Test 2: Scan with filter (using ColEqConst since we don't have functions) + let pattern_plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + + let query_plan = Query::scan(0) + .where_const(0, Slid::from_usize(1)) + .compile(); + + let pattern_result = execute(&pattern_plan, &structure); + let query_result = execute(&query_plan, &structure); + + assert_eq!(pattern_result.len(), 1); + assert_eq!(query_result.len(), 1); + } + + /// Test FuncEqFunc constraint: func1(col1) == func2(col2) + #[test] + fn test_func_eq_func_join() { + use crate::core::Structure; + use crate::query::backend::execute; + use crate::universe::Universe; + + // Create a structure with two sorts + let mut structure = Structure::new(2); + let mut universe = Universe::new(); + + // Sort 0: elements a, b + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + // Sort 1: elements x, y, z + let (x, _) = structure.add_element(&mut universe, 1); + let (y, _) = structure.add_element(&mut universe, 1); + let (z, _) = structure.add_element(&mut universe, 1); + + // Common target for function results + let target1 = Slid::from_usize(100); + let target2 = Slid::from_usize(200); + + // Initialize functions + // func0: Sort0 -> targets (a→100, b→200) + // func1: Sort1 -> targets (x→100, y→200, z→100) + structure.init_functions(&[Some(0), Some(1)]); + + structure.define_function(0, a, target1).unwrap(); + structure.define_function(0, b, target2).unwrap(); + structure.define_function(1, x, target1).unwrap(); + structure.define_function(1, y, target2).unwrap(); + structure.define_function(1, z, target1).unwrap(); + + // Query: Find all (s0, s1) where func0(s0) == func1(s1) + // Expected matches: + // - (a, x) because func0(a)=100 == func1(x)=100 + // - (a, z) because func0(a)=100 == func1(z)=100 + // - (b, y) because func0(b)=200 == func1(y)=200 + + let plan = Query::scan(0) + .join_scan(1) + .join_on_func(0, 0, 1, 1) // func0(col0) == func1(col1) + .compile(); + + let result = execute(&plan, &structure); + + // Should have exactly 3 matching pairs + assert_eq!( + result.len(), + 3, + "Expected 3 matching pairs, got {}", + result.len() + ); + } + + /// Integration test: validate compiled queries against bootstrap_queries. + /// + /// This test creates a real theory using the REPL, then verifies that + /// queries compiled with the Query API produce the same results as + /// the handcoded bootstrap_queries methods. + #[test] + fn test_query_matches_bootstrap_queries() { + use crate::repl::ReplState; + + // Create a theory via REPL + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + // Get the theory slid + let theory_slid = match repl.store.resolve_name("Graph") { + Some((slid, _)) => slid, + None => panic!("Theory 'Graph' not found"), + }; + + // Get bootstrap_queries result + let bootstrap_sorts = repl.store.query_theory_sorts(theory_slid); + + // Now compile a Query that does the same thing: + // "Find all Srt where Srt/theory == theory_slid" + let srt_sort = repl.store.sort_ids.srt.expect("Srt sort not found"); + let theory_func = repl + .store + .func_ids + .srt_theory + .expect("Srt/theory func not found"); + + // Compile the query + let plan = compile_simple_filter(srt_sort, theory_func, theory_slid); + + // Execute against the store's meta structure + let result = crate::query::backend::execute(&plan, &repl.store.meta); + + // Compare: should have same number of sorts + assert_eq!( + bootstrap_sorts.len(), + result.len(), + "Query should return same number of sorts as bootstrap_queries.\n\ + Bootstrap returned {} sorts: {:?}\n\ + Compiled query returned {} tuples", + bootstrap_sorts.len(), + bootstrap_sorts.iter().map(|s| &s.name).collect::>(), + result.len() + ); + + // Verify we got V and E + assert!( + bootstrap_sorts.len() >= 2, + "Graph theory should have at least V and E sorts" + ); + } + + #[test] + fn test_filter_compiles() { + let plan = Query::scan(0) + .filter_eq(1, 0, Slid::from_usize(42)) + .compile(); + + // Should be Filter(Scan) + if let QueryOp::Filter { input, pred } = plan { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 0 })); + assert!(matches!( + pred, + Predicate::FuncEqConst { + func_idx: 1, + arg_col: 0, + .. + } + )); + } else { + panic!("Expected Filter, got {:?}", plan); + } + } + + #[test] + fn test_join_compiles() { + let plan = Query::scan(0) + .join_scan(1) + .compile(); + + // Should be Join(Scan, Scan) + if let QueryOp::Join { left, right, .. } = plan { + assert!(matches!(*left, QueryOp::Scan { sort_idx: 0 })); + assert!(matches!(*right, QueryOp::Scan { sort_idx: 1 })); + } else { + panic!("Expected Join, got {:?}", plan); + } + } + + #[test] + fn test_compile_simple_filter() { + let plan = compile_simple_filter(5, 3, Slid::from_usize(100)); + + if let QueryOp::Filter { input, pred } = plan { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 5 })); + if let Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } = pred + { + assert_eq!(func_idx, 3); + assert_eq!(arg_col, 0); + assert_eq!(expected, Slid::from_usize(100)); + } else { + panic!("Expected FuncEqConst predicate"); + } + } else { + panic!("Expected Filter"); + } + } + + #[test] + fn test_compile_filter_project() { + let plan = compile_filter_project(0, 1, Slid::from_usize(42), 2); + + // Should be Project(Apply(Filter(Scan))) + if let QueryOp::Project { input, columns } = plan { + assert_eq!(columns, vec![1]); + if let QueryOp::Apply { + input, + func_idx, + arg_col, + } = *input + { + assert_eq!(func_idx, 2); + assert_eq!(arg_col, 0); + if let QueryOp::Filter { input, .. } = *input { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 0 })); + } else { + panic!("Expected Filter inside Apply"); + } + } else { + panic!("Expected Apply inside Project"); + } + } else { + panic!("Expected Project"); + } + } +} diff --git a/src/query/exec.rs b/src/query/exec.rs new file mode 100644 index 0000000..10172d2 --- /dev/null +++ b/src/query/exec.rs @@ -0,0 +1,243 @@ +//! Query execution against a Store. +//! +//! This module executes Pattern queries against the GeologMeta store, +//! computing the unique maximal element (cofree model) for ∀-style queries. + +use crate::id::Slid; +use crate::store::Store; +use crate::store::append::AppendOps; + +use super::{Pattern, Projection}; + +/// Result of a pattern query. +/// +/// For ∀-style queries (open sorts), this is the cofree model: +/// all elements satisfying the constraints. +#[derive(Debug, Clone)] +pub enum QueryResult { + /// List of matching elements + Elements(Vec), + /// List of projected values + Values(Vec), + /// List of projected tuples + Tuples(Vec>), +} + +impl QueryResult { + /// Get as elements (panics if not Elements variant). + pub fn into_elements(self) -> Vec { + match self { + QueryResult::Elements(e) => e, + _ => panic!("QueryResult is not Elements"), + } + } + + /// Get as values (panics if not Values variant). + pub fn into_values(self) -> Vec { + match self { + QueryResult::Values(v) => v, + _ => panic!("QueryResult is not Values"), + } + } + + /// Get as tuples (panics if not Tuples variant). + pub fn into_tuples(self) -> Vec> { + match self { + QueryResult::Tuples(t) => t, + _ => panic!("QueryResult is not Tuples"), + } + } + + /// Check if the result is empty. + pub fn is_empty(&self) -> bool { + match self { + QueryResult::Elements(e) => e.is_empty(), + QueryResult::Values(v) => v.is_empty(), + QueryResult::Tuples(t) => t.is_empty(), + } + } + + /// Get the number of results. + pub fn len(&self) -> usize { + match self { + QueryResult::Elements(e) => e.len(), + QueryResult::Values(v) => v.len(), + QueryResult::Tuples(t) => t.len(), + } + } +} + +/// Execute a pattern query against a store. +/// +/// This is the ∀-style query executor: scans all elements of source_sort, +/// filters by constraints, and projects the result. +/// +/// In terms of query semantics: computes the unique maximal element +/// (cofree model) of the theory extension. +pub fn execute_pattern(store: &Store, pattern: &Pattern) -> QueryResult { + // Scan all elements of source sort + let candidates = store.elements_of_sort(pattern.source_sort); + + // Filter by constraints + let matching: Vec = candidates + .into_iter() + .filter(|&elem| { + pattern.constraints.iter().all(|c| { + store.get_func(c.func, elem) == Some(c.expected) + }) + }) + .collect(); + + // Project + match &pattern.projection { + Projection::Element => QueryResult::Elements(matching), + Projection::Func(func) => { + let values: Vec = matching + .into_iter() + .filter_map(|elem| store.get_func(*func, elem)) + .collect(); + QueryResult::Values(values) + } + Projection::Tuple(funcs) => { + let tuples: Vec> = matching + .into_iter() + .filter_map(|elem| { + let tuple: Vec = funcs + .iter() + .filter_map(|f| store.get_func(*f, elem)) + .collect(); + // Only include if all projections succeeded + if tuple.len() == funcs.len() { + Some(tuple) + } else { + None + } + }) + .collect(); + QueryResult::Tuples(tuples) + } + } +} + +/// Convenience methods on Store for pattern queries. +impl Store { + /// Execute a pattern query. + /// + /// # Example + /// + /// ```ignore + /// // Find all Srt where Srt.theory == theory_slid + /// let result = store.query( + /// Pattern::new(store.sort_ids.srt.unwrap()) + /// .filter(store.func_ids.srt_theory.unwrap(), theory_slid) + /// ); + /// ``` + pub fn query(&self, pattern: &Pattern) -> QueryResult { + execute_pattern(self, pattern) + } + + /// Execute a pattern query and return just the matching elements. + pub fn query_elements(&self, pattern: &Pattern) -> Vec { + execute_pattern(self, pattern).into_elements() + } +} + +// ============================================================================ +// Typed query helpers that replace bootstrap_queries +// ============================================================================ + +/// Information about a sort (mirrors bootstrap_queries::SortInfo) +#[derive(Debug, Clone)] +pub struct SortInfo { + pub name: String, + pub slid: Slid, +} + +impl Store { + /// Query all sorts belonging to a theory using Pattern API. + /// + /// This is the Pattern-based equivalent of bootstrap_queries::query_theory_sorts. + /// Both should return identical results. + pub fn query_sorts_of_theory(&self, theory_slid: Slid) -> Vec { + let Some(srt_sort) = self.sort_ids.srt else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.srt_theory else { + return vec![]; + }; + + // The core pattern: find all Srt where Srt.theory == theory_slid + let pattern = Pattern::new(srt_sort) + .filter(theory_func, theory_slid); + + // Execute and post-process + self.query_elements(&pattern) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + SortInfo { name: short_name, slid } + }) + .collect() + } +} + +#[cfg(test)] +mod tests { + /// Test that Pattern-based query matches bootstrap_queries. + /// + /// This is a sanity test to ensure the new query engine gives + /// identical results to the hand-coded queries. + #[test] + fn test_query_sorts_matches_bootstrap() { + // Parse and elaborate a theory via REPL + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + "#; + + // Use ReplState to execute + let mut repl = crate::repl::ReplState::new(); + let _ = repl.execute_geolog(source); + + // Get the theory slid + if let Some((theory_slid, _)) = repl.store.resolve_name("Graph") { + // Query using bootstrap method + let bootstrap_result = repl.store.query_theory_sorts(theory_slid); + + // Query using Pattern method + let pattern_result = repl.store.query_sorts_of_theory(theory_slid); + + // Should have same number of results + assert_eq!( + bootstrap_result.len(), + pattern_result.len(), + "Different number of sorts returned: bootstrap={}, pattern={}", + bootstrap_result.len(), + pattern_result.len() + ); + + // Should have same sort names (V and E) + let bootstrap_names: std::collections::HashSet<_> = + bootstrap_result.iter().map(|s| &s.name).collect(); + let pattern_names: std::collections::HashSet<_> = + pattern_result.iter().map(|s| &s.name).collect(); + + assert_eq!( + bootstrap_names, + pattern_names, + "Different sort names returned" + ); + + // Verify we got the expected sorts + assert!(bootstrap_names.contains(&"V".to_string()), "Missing sort V"); + assert!(bootstrap_names.contains(&"E".to_string()), "Missing sort E"); + } else { + panic!("Theory 'Graph' not found after execution"); + } + } +} diff --git a/src/query/from_relalg.rs b/src/query/from_relalg.rs new file mode 100644 index 0000000..808f253 --- /dev/null +++ b/src/query/from_relalg.rs @@ -0,0 +1,1239 @@ +//! RelAlgIR Interpreter: Execute query plans represented as geolog instances. +//! +//! This module provides a CPU backend that interprets RelAlgIR instances. +//! It reads the string diagram structure from a geolog Structure and executes +//! the query operations to produce results. +//! +//! # Architecture +//! +//! A RelAlgIR instance encodes a query plan as a string diagram: +//! - Wire elements are edges carrying data streams (Z-sets of tuples) +//! - Op elements are boxes transforming data +//! - Composition is encoded by wire sharing (same Wire as output of one Op and input of another) +//! +//! The interpreter: +//! 1. Parses the instance structure to extract operations and wires +//! 2. Builds a dependency graph from wire connections +//! 3. Topologically sorts operations (respecting DBSP delay semantics) +//! 4. Executes each operation in order +//! 5. Returns the result on the designated output wire +//! +//! # Example +//! +//! ```ignore +//! use geolog::query::from_relalg::execute_relalg; +//! +//! let result = execute_relalg( +//! &relalg_instance, // The compiled query plan +//! &relalg_theory, // RelAlgIR theory +//! &target_structure, // Data to query +//! )?; +//! ``` + +use std::collections::{HashMap, VecDeque}; + +use crate::core::{ElaboratedTheory, Structure}; +use crate::id::{NumericId, Slid, get_slid}; +use crate::query::backend::Bag; +use crate::query::to_relalg::RelAlgInstance; + +/// Error type for RelAlgIR execution +#[derive(Debug, Clone)] +pub enum RelAlgError { + /// Missing required sort in RelAlgIR theory + MissingSortId(String), + /// Missing required function in RelAlgIR theory + MissingFuncId(String), + /// No output wire found + NoOutputWire, + /// Invalid operation structure + InvalidOp(String), + /// Cycle detected without delay + InstantaneousCycle, + /// Unsupported operation + Unsupported(String), +} + +impl std::fmt::Display for RelAlgError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingSortId(s) => write!(f, "Missing sort: {s}"), + Self::MissingFuncId(s) => write!(f, "Missing function: {s}"), + Self::NoOutputWire => write!(f, "No output wire found in plan"), + Self::InvalidOp(s) => write!(f, "Invalid operation: {s}"), + Self::InstantaneousCycle => write!(f, "Cycle detected without delay"), + Self::Unsupported(s) => write!(f, "Unsupported: {s}"), + } + } +} + +impl std::error::Error for RelAlgError {} + +/// Cached sort/function IDs from RelAlgIR theory +#[allow(dead_code)] // Some IDs are for future use +struct RelAlgIds { + // Core sorts + wire: usize, + op: usize, + + // Operation sorts + scan_op: usize, + filter_op: usize, + distinct_op: usize, + negate_op: usize, + join_op: usize, + union_op: usize, + empty_op: usize, + delay_op: usize, + diff_op: usize, + integrate_op: usize, + + // Predicate sorts + pred: usize, + true_pred: usize, + false_pred: usize, + col_eq_pred: usize, + const_eq_pred: usize, + and_pred: usize, + or_pred: usize, + + // Join condition sorts + join_cond: usize, + equi_join_cond: usize, + cross_join_cond: usize, + + // Column reference sorts + col_ref: usize, + col_path: usize, + here_path: usize, + left_path: usize, + right_path: usize, + + // GeologMeta sorts (for references to target structure) + srt: usize, + elem: usize, + func: usize, +} + +impl RelAlgIds { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + + let get_sort = |name: &str| -> Result { + sig.sorts + .iter() + .position(|s| s == name) + .ok_or_else(|| RelAlgError::MissingSortId(name.to_string())) + }; + + Ok(Self { + wire: get_sort("Wire")?, + op: get_sort("Op")?, + + scan_op: get_sort("ScanOp")?, + filter_op: get_sort("FilterOp")?, + distinct_op: get_sort("DistinctOp")?, + negate_op: get_sort("NegateOp")?, + join_op: get_sort("JoinOp")?, + union_op: get_sort("UnionOp")?, + empty_op: get_sort("EmptyOp")?, + delay_op: get_sort("DelayOp")?, + diff_op: get_sort("DiffOp")?, + integrate_op: get_sort("IntegrateOp")?, + + pred: get_sort("Pred")?, + true_pred: get_sort("TruePred")?, + false_pred: get_sort("FalsePred")?, + col_eq_pred: get_sort("ColEqPred")?, + const_eq_pred: get_sort("ConstEqPred")?, + and_pred: get_sort("AndPred")?, + or_pred: get_sort("OrPred")?, + + join_cond: get_sort("JoinCond")?, + equi_join_cond: get_sort("EquiJoinCond")?, + cross_join_cond: get_sort("CrossJoinCond")?, + + col_ref: get_sort("ColRef")?, + col_path: get_sort("ColPath")?, + here_path: get_sort("HerePath")?, + left_path: get_sort("LeftPath")?, + right_path: get_sort("RightPath")?, + + srt: get_sort("GeologMeta/Srt")?, + elem: get_sort("GeologMeta/Elem")?, + func: get_sort("GeologMeta/Func")?, + }) + } +} + +/// Function IDs for navigating RelAlgIR structure +#[allow(dead_code)] // Some IDs are for future use +struct RelAlgFuncs { + // ScanOp accessors + scan_op_srt: usize, + scan_op_out: usize, + + // FilterOp accessors + filter_op_in: usize, + filter_op_out: usize, + filter_op_pred: usize, + + // DistinctOp accessors + distinct_op_in: usize, + distinct_op_out: usize, + + // NegateOp accessors + negate_op_in: usize, + negate_op_out: usize, + + // JoinOp accessors + join_op_left_in: usize, + join_op_right_in: usize, + join_op_out: usize, + join_op_cond: usize, + + // UnionOp accessors + union_op_left_in: usize, + union_op_right_in: usize, + union_op_out: usize, + + // EmptyOp accessors + empty_op_out: usize, + + // DelayOp accessors + delay_op_in: usize, + delay_op_out: usize, + + // DiffOp accessors + diff_op_in: usize, + diff_op_out: usize, + + // IntegrateOp accessors + integrate_op_in: usize, + integrate_op_out: usize, + + // Predicate accessors + true_pred_pred: usize, + false_pred_pred: usize, + col_eq_pred_pred: usize, + col_eq_pred_left: usize, + col_eq_pred_right: usize, + const_eq_pred_pred: usize, + const_eq_pred_col: usize, + const_eq_pred_val: usize, + and_pred_pred: usize, + and_pred_left: usize, + and_pred_right: usize, + or_pred_pred: usize, + or_pred_left: usize, + or_pred_right: usize, + + // Join condition accessors + equi_join_cond_cond: usize, + equi_join_cond_left_col: usize, + equi_join_cond_right_col: usize, + cross_join_cond_cond: usize, + + // ColRef accessors + col_ref_wire: usize, + col_ref_path: usize, + + // ColPath accessors + here_path_path: usize, + left_path_path: usize, + left_path_rest: usize, + right_path_path: usize, + right_path_rest: usize, +} + +impl RelAlgFuncs { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + + let get_func = |name: &str| -> Result { + sig.func_names + .get(name) + .copied() + .ok_or_else(|| RelAlgError::MissingFuncId(name.to_string())) + }; + + Ok(Self { + scan_op_srt: get_func("ScanOp/srt")?, + scan_op_out: get_func("ScanOp/out")?, + + filter_op_in: get_func("FilterOp/in")?, + filter_op_out: get_func("FilterOp/out")?, + filter_op_pred: get_func("FilterOp/pred")?, + + distinct_op_in: get_func("DistinctOp/in")?, + distinct_op_out: get_func("DistinctOp/out")?, + + negate_op_in: get_func("NegateOp/in")?, + negate_op_out: get_func("NegateOp/out")?, + + join_op_left_in: get_func("JoinOp/left_in")?, + join_op_right_in: get_func("JoinOp/right_in")?, + join_op_out: get_func("JoinOp/out")?, + join_op_cond: get_func("JoinOp/cond")?, + + union_op_left_in: get_func("UnionOp/left_in")?, + union_op_right_in: get_func("UnionOp/right_in")?, + union_op_out: get_func("UnionOp/out")?, + + empty_op_out: get_func("EmptyOp/out")?, + + delay_op_in: get_func("DelayOp/in")?, + delay_op_out: get_func("DelayOp/out")?, + + diff_op_in: get_func("DiffOp/in")?, + diff_op_out: get_func("DiffOp/out")?, + + integrate_op_in: get_func("IntegrateOp/in")?, + integrate_op_out: get_func("IntegrateOp/out")?, + + true_pred_pred: get_func("TruePred/pred")?, + false_pred_pred: get_func("FalsePred/pred")?, + col_eq_pred_pred: get_func("ColEqPred/pred")?, + col_eq_pred_left: get_func("ColEqPred/left")?, + col_eq_pred_right: get_func("ColEqPred/right")?, + const_eq_pred_pred: get_func("ConstEqPred/pred")?, + const_eq_pred_col: get_func("ConstEqPred/col")?, + const_eq_pred_val: get_func("ConstEqPred/val")?, + and_pred_pred: get_func("AndPred/pred")?, + and_pred_left: get_func("AndPred/left")?, + and_pred_right: get_func("AndPred/right")?, + or_pred_pred: get_func("OrPred/pred")?, + or_pred_left: get_func("OrPred/left")?, + or_pred_right: get_func("OrPred/right")?, + + equi_join_cond_cond: get_func("EquiJoinCond/cond")?, + equi_join_cond_left_col: get_func("EquiJoinCond/left_col")?, + equi_join_cond_right_col: get_func("EquiJoinCond/right_col")?, + cross_join_cond_cond: get_func("CrossJoinCond/cond")?, + + col_ref_wire: get_func("ColRef/wire")?, + col_ref_path: get_func("ColRef/path")?, + + here_path_path: get_func("HerePath/path")?, + left_path_path: get_func("LeftPath/path")?, + left_path_rest: get_func("LeftPath/rest")?, + right_path_path: get_func("RightPath/path")?, + right_path_rest: get_func("RightPath/rest")?, + }) + } +} + +/// Parsed operation from a RelAlgIR instance +#[derive(Debug, Clone)] +enum ParsedOp { + Scan { + sort_idx: usize, + out_wire: Slid, + }, + Filter { + in_wire: Slid, + out_wire: Slid, + pred: Slid, + }, + Distinct { + in_wire: Slid, + out_wire: Slid, + }, + Negate { + in_wire: Slid, + out_wire: Slid, + }, + Join { + left_wire: Slid, + right_wire: Slid, + out_wire: Slid, + cond: Slid, + }, + Union { + left_wire: Slid, + right_wire: Slid, + out_wire: Slid, + }, + Empty { + out_wire: Slid, + }, + Delay { + in_wire: Slid, + out_wire: Slid, + }, + Diff { + in_wire: Slid, + out_wire: Slid, + }, + Integrate { + in_wire: Slid, + out_wire: Slid, + }, +} + +impl ParsedOp { + fn out_wire(&self) -> Slid { + match self { + Self::Scan { out_wire, .. } + | Self::Filter { out_wire, .. } + | Self::Distinct { out_wire, .. } + | Self::Negate { out_wire, .. } + | Self::Join { out_wire, .. } + | Self::Union { out_wire, .. } + | Self::Empty { out_wire, .. } + | Self::Delay { out_wire, .. } + | Self::Diff { out_wire, .. } + | Self::Integrate { out_wire, .. } => *out_wire, + } + } + + fn in_wires(&self) -> Vec { + match self { + Self::Scan { .. } | Self::Empty { .. } => vec![], + Self::Filter { in_wire, .. } + | Self::Distinct { in_wire, .. } + | Self::Negate { in_wire, .. } + | Self::Delay { in_wire, .. } + | Self::Diff { in_wire, .. } + | Self::Integrate { in_wire, .. } => vec![*in_wire], + Self::Join { + left_wire, + right_wire, + .. + } + | Self::Union { + left_wire, + right_wire, + .. + } => vec![*left_wire, *right_wire], + } + } + + /// Returns true if this operation breaks instantaneous cycles + fn breaks_cycle(&self) -> bool { + matches!(self, Self::Delay { .. } | Self::Integrate { .. }) + } +} + +/// Parsed predicate from a RelAlgIR instance +#[derive(Debug, Clone)] +pub enum ParsedPred { + True, + False, + ColEq { left: usize, right: usize }, + ConstEq { col: usize, val: Slid }, + And(Box, Box), + Or(Box, Box), +} + +/// Parsed join condition +#[derive(Debug, Clone)] +pub enum ParsedJoinCond { + Cross, + Equi { left_col: usize, right_col: usize }, +} + +/// Context for interpreting RelAlgIR instances +struct InterpretContext<'a> { + /// The RelAlgIR instance being interpreted + instance: &'a RelAlgInstance, + /// RelAlgIR theory sort IDs + ids: RelAlgIds, + /// RelAlgIR theory function IDs + funcs: RelAlgFuncs, + /// Wire values during execution + wire_values: HashMap, + /// Target structure being queried + target: &'a Structure, +} + +impl<'a> InterpretContext<'a> { + fn new( + instance: &'a RelAlgInstance, + theory: &ElaboratedTheory, + target: &'a Structure, + ) -> Result { + Ok(Self { + instance, + ids: RelAlgIds::from_theory(theory)?, + funcs: RelAlgFuncs::from_theory(theory)?, + wire_values: HashMap::new(), + target, + }) + } + + /// Get function value for an element + fn get_func_value(&self, func_id: usize, elem: Slid) -> Option { + let structure = &self.instance.structure; + // Convert global Slid to sort-local index + let local_idx = structure.sort_local_id(elem).index(); + structure + .functions + .get(func_id) + .and_then(|f| get_slid(f.get_local(local_idx))) + } + + /// Get sort index from a GeologMeta/Srt element using the sort_mapping + fn get_srt_sort_idx(&self, srt_elem: Slid) -> Result { + self.instance + .sort_mapping + .get(&srt_elem) + .copied() + .ok_or_else(|| RelAlgError::InvalidOp(format!( + "Unknown Srt element {:?} - not in sort_mapping", + srt_elem + ))) + } + + /// Parse all operations from the instance + fn parse_operations(&self) -> Result, RelAlgError> { + let mut ops = Vec::new(); + let structure = &self.instance.structure; + + // Find all ScanOp elements + for elem_idx in structure.carriers[self.ids.scan_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let srt = self + .get_func_value(self.funcs.scan_op_srt, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ScanOp missing srt".into()))?; + let out_wire = self + .get_func_value(self.funcs.scan_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ScanOp missing out".into()))?; + + let sort_idx = self.get_srt_sort_idx(srt)?; + ops.push(ParsedOp::Scan { sort_idx, out_wire }); + } + + // Find all FilterOp elements + for elem_idx in structure.carriers[self.ids.filter_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.filter_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.filter_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing out".into()))?; + let pred = self + .get_func_value(self.funcs.filter_op_pred, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing pred".into()))?; + + ops.push(ParsedOp::Filter { + in_wire, + out_wire, + pred, + }); + } + + // Find all DistinctOp elements + for elem_idx in structure.carriers[self.ids.distinct_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.distinct_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DistinctOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.distinct_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DistinctOp missing out".into()))?; + + ops.push(ParsedOp::Distinct { in_wire, out_wire }); + } + + // Find all NegateOp elements + for elem_idx in structure.carriers[self.ids.negate_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.negate_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("NegateOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.negate_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("NegateOp missing out".into()))?; + + ops.push(ParsedOp::Negate { in_wire, out_wire }); + } + + // Find all JoinOp elements + for elem_idx in structure.carriers[self.ids.join_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let left_wire = self + .get_func_value(self.funcs.join_op_left_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing left_in".into()))?; + let right_wire = self + .get_func_value(self.funcs.join_op_right_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing right_in".into()))?; + let out_wire = self + .get_func_value(self.funcs.join_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing out".into()))?; + let cond = self + .get_func_value(self.funcs.join_op_cond, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing cond".into()))?; + + ops.push(ParsedOp::Join { + left_wire, + right_wire, + out_wire, + cond, + }); + } + + // Find all UnionOp elements + for elem_idx in structure.carriers[self.ids.union_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let left_wire = self + .get_func_value(self.funcs.union_op_left_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing left_in".into()))?; + let right_wire = self + .get_func_value(self.funcs.union_op_right_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing right_in".into()))?; + let out_wire = self + .get_func_value(self.funcs.union_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing out".into()))?; + + ops.push(ParsedOp::Union { + left_wire, + right_wire, + out_wire, + }); + } + + // Find all EmptyOp elements + for elem_idx in structure.carriers[self.ids.empty_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let out_wire = self + .get_func_value(self.funcs.empty_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("EmptyOp missing out".into()))?; + + ops.push(ParsedOp::Empty { out_wire }); + } + + // Find all DelayOp elements + for elem_idx in structure.carriers[self.ids.delay_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.delay_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DelayOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.delay_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DelayOp missing out".into()))?; + + ops.push(ParsedOp::Delay { in_wire, out_wire }); + } + + // Find all DiffOp elements + for elem_idx in structure.carriers[self.ids.diff_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.diff_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DiffOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.diff_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DiffOp missing out".into()))?; + + ops.push(ParsedOp::Diff { in_wire, out_wire }); + } + + // Find all IntegrateOp elements + for elem_idx in structure.carriers[self.ids.integrate_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.integrate_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("IntegrateOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.integrate_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("IntegrateOp missing out".into()))?; + + ops.push(ParsedOp::Integrate { in_wire, out_wire }); + } + + Ok(ops) + } + + /// Parse a predicate element + fn parse_predicate(&self, pred: Slid) -> Result { + // Try to find which sort the predicate element belongs to + let structure = &self.instance.structure; + + // Check if it's TruePred + for elem_idx in structure.carriers[self.ids.true_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.true_pred_pred, elem) + && p == pred { + return Ok(ParsedPred::True); + } + } + + // Check if it's FalsePred + for elem_idx in structure.carriers[self.ids.false_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.false_pred_pred, elem) + && p == pred { + return Ok(ParsedPred::False); + } + } + + // Check if it's ColEqPred + for elem_idx in structure.carriers[self.ids.col_eq_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.col_eq_pred_pred, elem) + && p == pred { + let left_ref = self + .get_func_value(self.funcs.col_eq_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ColEqPred missing left".into()))?; + let right_ref = self + .get_func_value(self.funcs.col_eq_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ColEqPred missing right".into()))?; + + let left = self.parse_col_ref(left_ref)?; + let right = self.parse_col_ref(right_ref)?; + + return Ok(ParsedPred::ColEq { left, right }); + } + } + + // Check if it's ConstEqPred + for elem_idx in structure.carriers[self.ids.const_eq_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.const_eq_pred_pred, elem) + && p == pred { + let col_ref = self + .get_func_value(self.funcs.const_eq_pred_col, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ConstEqPred missing col".into()))?; + let elem_ref = self + .get_func_value(self.funcs.const_eq_pred_val, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ConstEqPred missing val".into()))?; + + let col = self.parse_col_ref(col_ref)?; + + // Look up the original target value from the Elem element + let val = self.instance + .elem_value_mapping + .get(&elem_ref) + .copied() + .ok_or_else(|| RelAlgError::InvalidOp(format!( + "ConstEqPred val {:?} not in elem_value_mapping", + elem_ref + )))?; + + return Ok(ParsedPred::ConstEq { col, val }); + } + } + + // Check if it's AndPred + for elem_idx in structure.carriers[self.ids.and_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.and_pred_pred, elem) + && p == pred { + let left = self + .get_func_value(self.funcs.and_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("AndPred missing left".into()))?; + let right = self + .get_func_value(self.funcs.and_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("AndPred missing right".into()))?; + + let left_pred = self.parse_predicate(left)?; + let right_pred = self.parse_predicate(right)?; + + return Ok(ParsedPred::And(Box::new(left_pred), Box::new(right_pred))); + } + } + + // Check if it's OrPred + for elem_idx in structure.carriers[self.ids.or_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.or_pred_pred, elem) + && p == pred { + let left = self + .get_func_value(self.funcs.or_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("OrPred missing left".into()))?; + let right = self + .get_func_value(self.funcs.or_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("OrPred missing right".into()))?; + + let left_pred = self.parse_predicate(left)?; + let right_pred = self.parse_predicate(right)?; + + return Ok(ParsedPred::Or(Box::new(left_pred), Box::new(right_pred))); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown predicate type for {:?}", + pred + ))) + } + + /// Parse a join condition element + fn parse_join_cond(&self, cond: Slid) -> Result { + let structure = &self.instance.structure; + + // Check if it's CrossJoinCond + for elem_idx in structure.carriers[self.ids.cross_join_cond].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(c) = self.get_func_value(self.funcs.cross_join_cond_cond, elem) + && c == cond { + return Ok(ParsedJoinCond::Cross); + } + } + + // Check if it's EquiJoinCond + for elem_idx in structure.carriers[self.ids.equi_join_cond].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(c) = self.get_func_value(self.funcs.equi_join_cond_cond, elem) + && c == cond { + let left_col_ref = self + .get_func_value(self.funcs.equi_join_cond_left_col, elem) + .ok_or_else(|| { + RelAlgError::InvalidOp("EquiJoinCond missing left_col".into()) + })?; + let right_col_ref = self + .get_func_value(self.funcs.equi_join_cond_right_col, elem) + .ok_or_else(|| { + RelAlgError::InvalidOp("EquiJoinCond missing right_col".into()) + })?; + + let left_col = self.parse_col_ref(left_col_ref)?; + let right_col = self.parse_col_ref(right_col_ref)?; + + return Ok(ParsedJoinCond::Equi { left_col, right_col }); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown join condition type for {:?}", + cond + ))) + } + + /// Parse a column reference to get the column index + fn parse_col_ref(&self, col_ref: Slid) -> Result { + // Get the path from the ColRef + let path = self + .get_func_value(self.funcs.col_ref_path, col_ref) + .ok_or_else(|| RelAlgError::InvalidOp("ColRef missing path".into()))?; + + self.parse_col_path(path) + } + + /// Parse a column path to get the column index + fn parse_col_path(&self, path: Slid) -> Result { + let structure = &self.instance.structure; + + // Check if it's HerePath (index 0) + for elem_idx in structure.carriers[self.ids.here_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.here_path_path, elem) + && p == path { + return Ok(0); + } + } + + // Check if it's LeftPath + for elem_idx in structure.carriers[self.ids.left_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.left_path_path, elem) + && p == path { + let rest = self + .get_func_value(self.funcs.left_path_rest, elem) + .ok_or_else(|| RelAlgError::InvalidOp("LeftPath missing rest".into()))?; + return self.parse_col_path(rest); + } + } + + // Check if it's RightPath + for elem_idx in structure.carriers[self.ids.right_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.right_path_path, elem) + && p == path { + let rest = self + .get_func_value(self.funcs.right_path_rest, elem) + .ok_or_else(|| RelAlgError::InvalidOp("RightPath missing rest".into()))?; + // Right path adds 1 to the column index + return Ok(1 + self.parse_col_path(rest)?); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown path type for {:?}", + path + ))) + } + + /// Topologically sort operations (respecting dependencies) + fn topological_sort(&self, ops: &[ParsedOp]) -> Result, RelAlgError> { + // Build output wire -> operation index map + let mut wire_to_op: HashMap = HashMap::new(); + for (idx, op) in ops.iter().enumerate() { + wire_to_op.insert(op.out_wire(), idx); + } + + // Build dependency graph + let mut in_degree: Vec = vec![0; ops.len()]; + let mut dependents: Vec> = vec![Vec::new(); ops.len()]; + + for (idx, op) in ops.iter().enumerate() { + for in_wire in op.in_wires() { + if let Some(&producer_idx) = wire_to_op.get(&in_wire) { + // Skip delay edges for cycle breaking + if !ops[producer_idx].breaks_cycle() { + in_degree[idx] += 1; + dependents[producer_idx].push(idx); + } + } + } + } + + // Kahn's algorithm + let mut queue: VecDeque = VecDeque::new(); + for (idx, °ree) in in_degree.iter().enumerate() { + if degree == 0 { + queue.push_back(idx); + } + } + + let mut sorted = Vec::new(); + while let Some(idx) = queue.pop_front() { + sorted.push(idx); + for &dep_idx in &dependents[idx] { + in_degree[dep_idx] -= 1; + if in_degree[dep_idx] == 0 { + queue.push_back(dep_idx); + } + } + } + + if sorted.len() != ops.len() { + return Err(RelAlgError::InstantaneousCycle); + } + + Ok(sorted) + } + + /// Execute a single operation + fn execute_op(&mut self, op: &ParsedOp) -> Result { + match op { + ParsedOp::Scan { sort_idx, .. } => { + // Emit all elements of the sort as singleton tuples + let mut result = Bag::new(); + if let Some(carrier) = self.target.carriers.get(*sort_idx) { + for elem in carrier.iter() { + let tuple = vec![Slid::from_usize(elem as usize)]; + result.insert(tuple, 1); + } + } + Ok(result) + } + + ParsedOp::Filter { + in_wire, + pred, + .. + } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| RelAlgError::InvalidOp("Filter input wire not found".into()))? + .clone(); + + let parsed_pred = self.parse_predicate(*pred)?; + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + if self.evaluate_predicate(&parsed_pred, tuple)? { + result.insert(tuple.clone(), *mult); + } + } + Ok(result) + } + + ParsedOp::Distinct { in_wire, .. } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Distinct input wire not found".into()) + })? + .clone(); + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + if *mult > 0 { + result.insert(tuple.clone(), 1); + } + } + Ok(result) + } + + ParsedOp::Negate { in_wire, .. } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Negate input wire not found".into()) + })? + .clone(); + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + result.insert(tuple.clone(), -mult); + } + Ok(result) + } + + ParsedOp::Join { + left_wire, + right_wire, + cond, + .. + } => { + let left = self + .wire_values + .get(left_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Join left input wire not found".into()) + })? + .clone(); + let right = self + .wire_values + .get(right_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Join right input wire not found".into()) + })? + .clone(); + + let parsed_cond = self.parse_join_cond(*cond)?; + + let mut result = Bag::new(); + + match parsed_cond { + ParsedJoinCond::Cross => { + // Cartesian product + for (l_tuple, l_mult) in left.iter() { + for (r_tuple, r_mult) in right.iter() { + let mut joined = l_tuple.clone(); + joined.extend(r_tuple.iter().cloned()); + result.insert(joined, l_mult * r_mult); + } + } + } + ParsedJoinCond::Equi { left_col, right_col } => { + // Hash join + let mut right_index: HashMap, i64)>> = HashMap::new(); + for (r_tuple, r_mult) in right.iter() { + if let Some(&key) = r_tuple.get(right_col) { + right_index.entry(key).or_default().push((r_tuple, *r_mult)); + } + } + + for (l_tuple, l_mult) in left.iter() { + if let Some(&key) = l_tuple.get(left_col) + && let Some(matches) = right_index.get(&key) { + for (r_tuple, r_mult) in matches { + let mut joined = l_tuple.clone(); + joined.extend(r_tuple.iter().cloned()); + result.insert(joined, l_mult * r_mult); + } + } + } + } + } + + Ok(result) + } + + ParsedOp::Union { + left_wire, + right_wire, + .. + } => { + let left = self + .wire_values + .get(left_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Union left input wire not found".into()) + })? + .clone(); + let right = self + .wire_values + .get(right_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Union right input wire not found".into()) + })? + .clone(); + + let mut result = left; + for (tuple, mult) in right.iter() { + result.insert(tuple.clone(), result.tuples.get(tuple).unwrap_or(&0) + mult); + } + Ok(result) + } + + ParsedOp::Empty { .. } => Ok(Bag::new()), + + ParsedOp::Delay { in_wire, .. } => { + // For non-streaming execution, delay is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Delay input wire not found".into()) + })? + .clone(); + Ok(input) + } + + ParsedOp::Diff { in_wire, .. } => { + // For non-streaming execution, diff is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Diff input wire not found".into()) + })? + .clone(); + Ok(input) + } + + ParsedOp::Integrate { in_wire, .. } => { + // For non-streaming execution, integrate is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Integrate input wire not found".into()) + })? + .clone(); + Ok(input) + } + } + } + + /// Evaluate a predicate on a tuple + #[allow(clippy::only_used_in_recursion)] + fn evaluate_predicate(&self, pred: &ParsedPred, tuple: &[Slid]) -> Result { + match pred { + ParsedPred::True => Ok(true), + ParsedPred::False => Ok(false), + ParsedPred::ColEq { left, right } => { + let l = tuple.get(*left); + let r = tuple.get(*right); + Ok(l.is_some() && l == r) + } + ParsedPred::ConstEq { col, val } => { + let c = tuple.get(*col); + Ok(c == Some(val)) + } + ParsedPred::And(left, right) => { + Ok(self.evaluate_predicate(left, tuple)? + && self.evaluate_predicate(right, tuple)?) + } + ParsedPred::Or(left, right) => { + Ok(self.evaluate_predicate(left, tuple)? + || self.evaluate_predicate(right, tuple)?) + } + } + } +} + +/// Execute a RelAlgIR instance against a target structure +/// +/// # Arguments +/// * `instance` - The RelAlgIR instance representing the query plan +/// * `relalg_theory` - The RelAlgIR theory +/// * `target` - The structure to query +/// * `output_wire_name` - Name of the output wire (defaults to "output") +/// +/// # Returns +/// The query result as a Z-set +pub fn execute_relalg( + instance: &RelAlgInstance, + relalg_theory: &ElaboratedTheory, + target: &Structure, + output_wire_name: Option<&str>, +) -> Result { + let mut ctx = InterpretContext::new(instance, relalg_theory, target)?; + + // Parse all operations + let ops = ctx.parse_operations()?; + + if ops.is_empty() { + return Ok(Bag::new()); + } + + // Find output wire - use instance.output_wire by default, or look up by name + let output_wire = if let Some(name) = output_wire_name { + instance + .names + .iter() + .find(|(_, n)| *n == name) + .map(|(slid, _)| *slid) + .ok_or(RelAlgError::NoOutputWire)? + } else { + instance.output_wire + }; + + // Topologically sort operations + let sorted = ctx.topological_sort(&ops)?; + + // Execute in order + for &idx in &sorted { + let result = ctx.execute_op(&ops[idx])?; + let out_wire = ops[idx].out_wire(); + ctx.wire_values.insert(out_wire, result); + } + + // Return output wire value + ctx.wire_values + .remove(&output_wire) + .ok_or(RelAlgError::NoOutputWire) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parsed_op_in_wires() { + let scan = ParsedOp::Scan { + sort_idx: 0, + out_wire: Slid::from_usize(0), + }; + assert!(scan.in_wires().is_empty()); + + let filter = ParsedOp::Filter { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + pred: Slid::from_usize(2), + }; + assert_eq!(filter.in_wires(), vec![Slid::from_usize(0)]); + + let join = ParsedOp::Join { + left_wire: Slid::from_usize(0), + right_wire: Slid::from_usize(1), + out_wire: Slid::from_usize(2), + cond: Slid::from_usize(3), + }; + assert_eq!( + join.in_wires(), + vec![Slid::from_usize(0), Slid::from_usize(1)] + ); + } + + #[test] + fn test_parsed_op_breaks_cycle() { + let scan = ParsedOp::Scan { + sort_idx: 0, + out_wire: Slid::from_usize(0), + }; + assert!(!scan.breaks_cycle()); + + let delay = ParsedOp::Delay { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + }; + assert!(delay.breaks_cycle()); + + let integrate = ParsedOp::Integrate { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + }; + assert!(integrate.breaks_cycle()); + } +} diff --git a/src/query/mod.rs b/src/query/mod.rs new file mode 100644 index 0000000..563d9f2 --- /dev/null +++ b/src/query/mod.rs @@ -0,0 +1,43 @@ +//! Query engine for geolog. +//! +//! **Semantics:** Queries are theory extensions. The result is the set of maximal +//! elements in the posetal reflection of well-formed Ext_M(T') — the category +//! of T'-extensions of base model M. +//! +//! See `loose_thoughts/2026-01-19_18:15_query_semantics.md` for full design. +//! +//! # Query Styles +//! +//! - **∃-style (closed sorts):** New sorts with declared constants. +//! Well-formedness requires exactly those constants exist. +//! Maximal elements = one per valid witness assignment. +//! Implementation: constraint satisfaction. +//! +//! - **∀-style (open sorts):** New sorts with no constants, constrained by +//! universal axioms. Bounded by constraint, posetal reflection identifies +//! observationally-equivalent duplicates. +//! Unique maximal element = cofree model. +//! Implementation: relational algebra / Datalog. +//! +//! # Implementation Phases +//! +//! 1. **Open sort computation** - what bootstrap_queries does manually +//! 2. **Closed sort enumeration** - constraint satisfaction +//! 3. **Chase for derived relations** - semi-naive fixpoint +//! 4. **Mixed queries** - combine both + +mod pattern; +mod exec; +pub mod backend; +pub mod optimize; +pub mod compile; +mod store_queries; +pub mod to_relalg; +pub mod from_relalg; +pub mod chase; + +pub use pattern::{Pattern, Constraint, Projection}; +pub use exec::{QueryResult, execute_pattern}; +pub use backend::{Bag, QueryOp, Predicate, JoinCond, execute, execute_optimized, StreamContext, execute_stream}; +pub use optimize::optimize; +pub use compile::{Query, QueryBuilder, compile_simple_filter, compile_filter_project}; diff --git a/src/query/optimize.rs b/src/query/optimize.rs new file mode 100644 index 0000000..69b69e6 --- /dev/null +++ b/src/query/optimize.rs @@ -0,0 +1,308 @@ +//! Query optimizer using algebraic laws. +//! +//! Applies rewrite rules corresponding to the algebraic laws defined in +//! RelAlgIR.geolog to transform query plans into more efficient forms. +//! +//! This is a simple "obviously correct" optimizer: +//! - Single-pass bottom-up rewriting +//! - No cost model (just simplification) +//! - Validated by proptests against the naive backend +//! +//! Key rewrites: +//! - Filter(True, x) → x +//! - Filter(False, x) → Empty +//! - Filter(p, Filter(q, x)) → Filter(And(p, q), x) +//! - Distinct(Distinct(x)) → Distinct(x) +//! - Union(x, Empty) → x +//! - Union(Empty, x) → x +//! - Negate(Negate(x)) → x +//! - Join(x, Empty) → Empty +//! - Join(Empty, x) → Empty + +use super::backend::{Predicate, QueryOp}; + +/// Optimize a query plan by applying algebraic laws. +/// +/// Returns an equivalent plan that may be more efficient to execute. +/// The optimization is semantics-preserving: optimize(p) produces the +/// same results as p for any structure. +pub fn optimize(plan: &QueryOp) -> QueryOp { + // Bottom-up: optimize children first, then apply rules + let optimized_children = optimize_children(plan); + apply_rules(optimized_children) +} + +/// Recursively optimize all children of a plan node. +fn optimize_children(plan: &QueryOp) -> QueryOp { + match plan { + QueryOp::Scan { sort_idx } => QueryOp::Scan { sort_idx: *sort_idx }, + + QueryOp::ScanRelation { rel_id } => QueryOp::ScanRelation { rel_id: *rel_id }, + + QueryOp::Filter { input, pred } => QueryOp::Filter { + input: Box::new(optimize(input)), + pred: pred.clone(), + }, + + QueryOp::Project { input, columns } => QueryOp::Project { + input: Box::new(optimize(input)), + columns: columns.clone(), + }, + + QueryOp::Join { left, right, cond } => QueryOp::Join { + left: Box::new(optimize(left)), + right: Box::new(optimize(right)), + cond: cond.clone(), + }, + + QueryOp::Union { left, right } => QueryOp::Union { + left: Box::new(optimize(left)), + right: Box::new(optimize(right)), + }, + + QueryOp::Distinct { input } => QueryOp::Distinct { + input: Box::new(optimize(input)), + }, + + QueryOp::Negate { input } => QueryOp::Negate { + input: Box::new(optimize(input)), + }, + + QueryOp::Constant { tuple } => QueryOp::Constant { tuple: tuple.clone() }, + + QueryOp::Empty => QueryOp::Empty, + + QueryOp::Apply { input, func_idx, arg_col } => QueryOp::Apply { + input: Box::new(optimize(input)), + func_idx: *func_idx, + arg_col: *arg_col, + }, + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => QueryOp::ApplyField { + input: Box::new(optimize(input)), + func_idx: *func_idx, + arg_col: *arg_col, + field_name: field_name.clone(), + }, + + // DBSP temporal operators: optimize children, preserve state_id + QueryOp::Delay { input, state_id } => QueryOp::Delay { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + + QueryOp::Diff { input, state_id } => QueryOp::Diff { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + + QueryOp::Integrate { input, state_id } => QueryOp::Integrate { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + } +} + +/// Apply algebraic rewrite rules to a plan node. +/// Assumes children are already optimized. +fn apply_rules(plan: QueryOp) -> QueryOp { + match plan { + // ============================================================ + // Filter Laws + // ============================================================ + + // Filter(True, x) → x + QueryOp::Filter { input, pred: Predicate::True } => *input, + + // Filter(False, x) → Empty + QueryOp::Filter { pred: Predicate::False, .. } => QueryOp::Empty, + + // Filter(p, Filter(q, x)) → Filter(And(p, q), x) + QueryOp::Filter { input, pred: outer_pred } => { + if let QueryOp::Filter { input: inner_input, pred: inner_pred } = *input { + QueryOp::Filter { + input: inner_input, + pred: Predicate::And( + Box::new(outer_pred), + Box::new(inner_pred), + ), + } + } else { + QueryOp::Filter { + input: Box::new(*input), + pred: outer_pred, + } + } + } + + // ============================================================ + // Distinct Laws + // ============================================================ + + // Distinct(Distinct(x)) → Distinct(x) + QueryOp::Distinct { input } => { + if matches!(*input, QueryOp::Distinct { .. }) { + *input + } else { + QueryOp::Distinct { input } + } + } + + // ============================================================ + // Union Laws + // ============================================================ + + // Union(x, Empty) → x + // Union(Empty, x) → x + QueryOp::Union { left, right } => { + match (&*left, &*right) { + (QueryOp::Empty, _) => *right, + (_, QueryOp::Empty) => *left, + _ => QueryOp::Union { left, right }, + } + } + + // ============================================================ + // Negate Laws + // ============================================================ + + // Negate(Negate(x)) → x + QueryOp::Negate { input } => { + if let QueryOp::Negate { input: inner } = *input { + *inner + } else { + QueryOp::Negate { input } + } + } + + // ============================================================ + // Join Laws + // ============================================================ + + // Join(x, Empty) → Empty + // Join(Empty, x) → Empty + QueryOp::Join { left, right, cond } => { + if matches!(*left, QueryOp::Empty) || matches!(*right, QueryOp::Empty) { + QueryOp::Empty + } else { + QueryOp::Join { left, right, cond } + } + } + + // No rewrite applies + other => other, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::query::backend::JoinCond; + use crate::id::{NumericId, Slid}; + + #[test] + fn test_filter_true_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::True, + }; + let optimized = optimize(&filter); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_filter_false_to_empty() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + let optimized = optimize(&filter); + assert!(matches!(optimized, QueryOp::Empty)); + } + + #[test] + fn test_filter_fusion() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter1 = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::ColEqConst { col: 0, val: Slid::from_usize(1) }, + }; + let filter2 = QueryOp::Filter { + input: Box::new(filter1), + pred: Predicate::ColEqConst { col: 0, val: Slid::from_usize(2) }, + }; + let optimized = optimize(&filter2); + + // Should be a single filter with And predicate + if let QueryOp::Filter { pred: Predicate::And(_, _), .. } = optimized { + // Good! + } else { + panic!("Expected fused filter with And predicate, got {:?}", optimized); + } + } + + #[test] + fn test_distinct_idempotent() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let distinct1 = QueryOp::Distinct { + input: Box::new(scan), + }; + let distinct2 = QueryOp::Distinct { + input: Box::new(distinct1.clone()), + }; + let optimized = optimize(&distinct2); + + // Should be single distinct + if let QueryOp::Distinct { input } = optimized { + assert!(matches!(*input, QueryOp::Scan { .. })); + } else { + panic!("Expected Distinct, got {:?}", optimized); + } + } + + #[test] + fn test_union_empty_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let union = QueryOp::Union { + left: Box::new(scan.clone()), + right: Box::new(QueryOp::Empty), + }; + let optimized = optimize(&union); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + + // Also test left empty + let union2 = QueryOp::Union { + left: Box::new(QueryOp::Empty), + right: Box::new(scan), + }; + let optimized2 = optimize(&union2); + assert!(matches!(optimized2, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_negate_involution() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + let optimized = optimize(&negate2); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_join_empty_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let join = QueryOp::Join { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + cond: JoinCond::Cross, + }; + let optimized = optimize(&join); + assert!(matches!(optimized, QueryOp::Empty)); + } +} diff --git a/src/query/pattern.rs b/src/query/pattern.rs new file mode 100644 index 0000000..60cb4a6 --- /dev/null +++ b/src/query/pattern.rs @@ -0,0 +1,171 @@ +//! Pattern-based query representation. +//! +//! This represents the common pattern from bootstrap_queries: +//! "find all X : Sort where X.func₁ = Y₁ ∧ X.func₂ = Y₂ ∧ ..." +//! +//! In query semantics terms, this is an ∀-style query with an open result sort: +//! ```text +//! theory Query extends Base { +//! Result : Sort; // Open (no constants) +//! elem : Result → Sort; // Projection to base +//! axiom { r : Result ⊢ elem(r).func₁ = Y₁ ∧ elem(r).func₂ = Y₂ } +//! } +//! ``` +//! +//! The unique maximal element (cofree model) is the set of all elements +//! satisfying the constraint. + +use crate::id::Slid; + +/// A pattern query: find all elements of a sort matching constraints. +/// +/// Equivalent to SQL: `SELECT elem FROM Sort WHERE func₁(elem) = v₁ AND ...` +/// +/// Uses `usize` for sort/function IDs (internal indices) and `Slid` for +/// element values (external references). +#[derive(Debug, Clone)] +pub struct Pattern { + /// The sort to scan (sort index) + pub source_sort: usize, + /// Constraints: each is (func_index, expected_value) + pub constraints: Vec, + /// What to project/return + pub projection: Projection, +} + +/// A constraint: func(elem) must equal expected_value +#[derive(Debug, Clone)] +pub struct Constraint { + /// Function index to apply to the scanned element + pub func: usize, + /// Expected value (must match) + pub expected: Slid, +} + +/// What to return from the query +#[derive(Debug, Clone)] +pub enum Projection { + /// Return the element itself + Element, + /// Return the value of a function applied to the element + Func(usize), + /// Return a tuple of function values + Tuple(Vec), +} + +impl Pattern { + /// Create a new pattern query. + /// + /// # Example + /// + /// ```ignore + /// // Find all Srt where Srt.theory == theory_slid + /// let pattern = Pattern::new(store.sort_ids.srt.unwrap()) + /// .filter(store.func_ids.srt_theory.unwrap(), theory_slid); + /// ``` + pub fn new(source_sort: usize) -> Self { + Self { + source_sort, + constraints: Vec::new(), + projection: Projection::Element, + } + } + + /// Add a constraint: func(elem) must equal value. + pub fn filter(mut self, func: usize, value: Slid) -> Self { + self.constraints.push(Constraint { + func, + expected: value, + }); + self + } + + /// Project a function value instead of the element. + pub fn project(mut self, func: usize) -> Self { + self.projection = Projection::Func(func); + self + } + + /// Project a tuple of function values. + pub fn project_tuple(mut self, funcs: Vec) -> Self { + self.projection = Projection::Tuple(funcs); + self + } +} + +// ============================================================================ +// Pattern → QueryOp Compilation +// ============================================================================ + +use super::backend::{QueryOp, Predicate}; + +impl Pattern { + /// Compile a Pattern into a QueryOp for the naive backend. + /// + /// A Pattern query: + /// 1. Scans all elements of source_sort + /// 2. Filters by constraints: func(elem) = expected for each constraint + /// 3. Projects according to projection type + /// + /// We implement this as: + /// - Scan → single-column tuples (elem) + /// - For each constraint, use FuncEqConst predicate + /// - Project to requested columns + pub fn compile(&self) -> QueryOp { + // Start with a scan of the sort + let mut plan = QueryOp::Scan { sort_idx: self.source_sort }; + + // Apply constraints as filters + // Each constraint checks: func(elem) = expected + for constraint in &self.constraints { + plan = QueryOp::Filter { + input: Box::new(plan), + pred: Predicate::FuncEqConst { + func_idx: constraint.func, + arg_col: 0, // The scanned element is always in column 0 + expected: constraint.expected, + }, + }; + } + + // Apply projection + match &self.projection { + Projection::Element => { + // Already have the element in col 0, no change needed + } + Projection::Func(func_idx) => { + // Apply function to element, return that instead + // This requires an Apply operation + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func_idx, + arg_col: 0, + }; + // Now we have (elem, func(elem)), project to just col 1 + plan = QueryOp::Project { + input: Box::new(plan), + columns: vec![1], + }; + } + Projection::Tuple(func_indices) => { + // Apply each function in sequence, then project + for func_idx in func_indices.iter() { + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func_idx, + arg_col: 0, // Always apply to original element + }; + } + // Now we have (elem, f1(elem), f2(elem), ...), project to func results + // Columns 1, 2, ... are the func results + let columns: Vec = (1..=func_indices.len()).collect(); + plan = QueryOp::Project { + input: Box::new(plan), + columns, + }; + } + } + + plan + } +} diff --git a/src/query/store_queries.rs b/src/query/store_queries.rs new file mode 100644 index 0000000..63a46d3 --- /dev/null +++ b/src/query/store_queries.rs @@ -0,0 +1,672 @@ +//! Store query integration: using compiled queries to replace bootstrap_queries. +//! +//! This module provides query methods on Store that use the compiled Query API +//! instead of handcoded iterations. It demonstrates that the Query compiler +//! can replace bootstrap_queries.rs. +//! +//! # Migration Path +//! +//! 1. First, create query versions here that match bootstrap_queries behavior +//! 2. Add tests that validate both produce same results +//! 3. Once validated, swap implementations in bootstrap_queries +//! 4. Eventually deprecate bootstrap_queries in favor of these +//! +//! # Example +//! +//! ```ignore +//! // Old: bootstrap_queries.rs +//! for srt_slid in self.elements_of_sort(srt_sort) { +//! if self.get_func(theory_func, srt_slid) == Some(theory_slid) { ... } +//! } +//! +//! // New: store_queries.rs using Query compiler +//! let plan = Query::scan(srt_sort) +//! .filter_eq(theory_func, 0, theory_slid) +//! .compile(); +//! let result = execute(&plan, &store.meta); +//! ``` + +use crate::core::DerivedSort; +use crate::id::{NumericId, Slid, Uuid}; +use crate::store::Store; +use crate::store::append::AppendOps; +use crate::store::bootstrap_queries::{SortInfo, FuncInfo, RelInfo, ElemInfo, FuncValInfo, RelTupleInfo}; +use super::backend::execute; +use super::compile::compile_simple_filter; + +impl Store { + /// Get the UUID for an element in GeologMeta by its Slid. + /// Used for deterministic ordering: UUIDs v7 are time-ordered. + pub fn get_element_uuid(&self, slid: Slid) -> Uuid { + if let Some(&luid) = self.meta.luids.get(slid.index()) { + self.universe.get(luid).unwrap_or(Uuid::nil()) + } else { + Uuid::nil() + } + } + + /// Query all sorts belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_sorts` in bootstrap_queries.rs, + /// but uses the Query compiler instead of handcoded iteration. + pub fn query_theory_sorts_compiled(&self, theory_slid: Slid) -> Vec { + let Some(srt_sort) = self.sort_ids.srt else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.srt_theory else { + return vec![]; + }; + + // Compile and execute the query + let plan = compile_simple_filter(srt_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to SortInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&srt_slid) = tuple.first() { + let name = self.get_element_name(srt_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + infos.push(SortInfo { + name: short_name, + slid: srt_slid, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + // (UUIDs v7 are time-ordered, so earlier-created elements come first) + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all functions belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_funcs` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_theory_funcs_compiled(&self, theory_slid: Slid) -> Vec { + let Some(func_sort) = self.sort_ids.func else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.func_theory else { + return vec![]; + }; + let Some(dom_func) = self.func_ids.func_dom else { + return vec![]; + }; + let Some(cod_func) = self.func_ids.func_cod else { + return vec![]; + }; + + // Compile and execute the query to find matching functions + let plan = compile_simple_filter(func_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to FuncInfo (with domain/codomain lookups) + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&func_slid) = tuple.first() { + let name = self.get_element_name(func_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + // Get domain and codomain DSorts (using bootstrap logic) + let domain = self + .get_func(dom_func, func_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + let codomain = self + .get_func(cod_func, func_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + infos.push(FuncInfo { + name: short_name, + slid: func_slid, + domain, + codomain, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all relations belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_rels` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_theory_rels_compiled(&self, theory_slid: Slid) -> Vec { + let Some(rel_sort) = self.sort_ids.rel else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.rel_theory else { + return vec![]; + }; + let Some(dom_func) = self.func_ids.rel_dom else { + return vec![]; + }; + + // Compile and execute the query to find matching relations + let plan = compile_simple_filter(rel_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to RelInfo (with domain lookup) + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&rel_slid) = tuple.first() { + let name = self.get_element_name(rel_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + // Get domain DSort (using bootstrap logic) + let domain = self + .get_func(dom_func, rel_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + infos.push(RelInfo { + name: short_name, + slid: rel_slid, + domain, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + // ======================================================================== + // Instance queries (compiled versions) + // ======================================================================== + + /// Query all elements belonging to an instance (using compiled query engine). + /// + /// This is equivalent to `query_instance_elems` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_instance_elems_compiled(&self, instance_slid: Slid) -> Vec { + let Some(elem_sort) = self.sort_ids.elem else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.elem_instance else { + return vec![]; + }; + let Some(sort_func) = self.func_ids.elem_sort else { + return vec![]; + }; + + // Compile and execute the query to find matching elements + let plan = compile_simple_filter(elem_sort, instance_func, instance_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to ElemInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&elem_slid) = tuple.first() { + let name = self.get_element_name(elem_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + let srt_slid = self.get_func(sort_func, elem_slid); + + infos.push(ElemInfo { + name: short_name, + slid: elem_slid, + srt_slid, + }); + } + } + // Sort by UUID to preserve original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all function values in an instance (using compiled query engine). + /// + /// This is equivalent to `query_instance_func_vals` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_instance_func_vals_compiled(&self, instance_slid: Slid) -> Vec { + let Some(fv_sort) = self.sort_ids.func_val else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.func_val_instance else { + return vec![]; + }; + let Some(func_func) = self.func_ids.func_val_func else { + return vec![]; + }; + let Some(arg_func) = self.func_ids.func_val_arg else { + return vec![]; + }; + let Some(result_func) = self.func_ids.func_val_result else { + return vec![]; + }; + + // Compile and execute the query + let plan = compile_simple_filter(fv_sort, instance_func, instance_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to FuncValInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&fv_slid) = tuple.first() { + infos.push(FuncValInfo { + slid: fv_slid, + func_slid: self.get_func(func_func, fv_slid), + arg_slid: self.get_func(arg_func, fv_slid), + result_slid: self.get_func(result_func, fv_slid), + }); + } + } + // Sort by UUID to preserve original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all relation tuples in an instance. + /// + /// NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + /// not as individual GeologMeta elements. This function returns empty until + /// columnar batch loading is implemented. + /// + /// TODO: Implement columnar batch loading for relation tuples. + pub fn query_instance_rel_tuples_compiled(&self, _instance_slid: Slid) -> Vec { + // Relation tuples are stored in columnar batches, not GeologMeta elements. + // Return empty until columnar batch loading is implemented. + vec![] + } +} + +#[cfg(test)] +mod tests { + use crate::repl::ReplState; + + /// Test that compiled query matches bootstrap query results. + #[test] + fn test_compiled_matches_bootstrap_sorts() { + let source = r#" + theory TestTheory { + A : Sort; + B : Sort; + C : Sort; + f : A -> B; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("TestTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_sorts(theory_slid); + let compiled = repl.store.query_theory_sorts_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} sorts, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|s| &s.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|s| &s.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Sort names should match"); + } + + /// Test compiled query with theory that has no sorts. + #[test] + fn test_compiled_empty_theory() { + let source = r#" + theory EmptyTheory { + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("EmptyTheory") + .expect("Theory should exist").0; + + let bootstrap = repl.store.query_theory_sorts(theory_slid); + let compiled = repl.store.query_theory_sorts_compiled(theory_slid); + + assert_eq!(bootstrap.len(), 0); + assert_eq!(compiled.len(), 0); + } + + /// Test that multiple theories have independent sorts. + #[test] + fn test_compiled_multiple_theories() { + let source = r#" + theory Theory1 { + X : Sort; + Y : Sort; + } + theory Theory2 { + P : Sort; + Q : Sort; + R : Sort; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory1_slid = repl.store.resolve_name("Theory1") + .expect("Theory1 should exist").0; + let theory2_slid = repl.store.resolve_name("Theory2") + .expect("Theory2 should exist").0; + + // Theory1 should have X, Y + let t1_bootstrap = repl.store.query_theory_sorts(theory1_slid); + let t1_compiled = repl.store.query_theory_sorts_compiled(theory1_slid); + + assert_eq!(t1_bootstrap.len(), 2); + assert_eq!(t1_compiled.len(), 2); + + // Theory2 should have P, Q, R + let t2_bootstrap = repl.store.query_theory_sorts(theory2_slid); + let t2_compiled = repl.store.query_theory_sorts_compiled(theory2_slid); + + assert_eq!(t2_bootstrap.len(), 3); + assert_eq!(t2_compiled.len(), 3); + + // Names should be independent + let t1_names: std::collections::HashSet<_> = + t1_compiled.iter().map(|s| &s.name).collect(); + let t2_names: std::collections::HashSet<_> = + t2_compiled.iter().map(|s| &s.name).collect(); + + assert!(t1_names.contains(&"X".to_string())); + assert!(t1_names.contains(&"Y".to_string())); + assert!(t2_names.contains(&"P".to_string())); + assert!(t2_names.contains(&"Q".to_string())); + assert!(t2_names.contains(&"R".to_string())); + } + + /// Test that compiled query matches bootstrap query for functions. + #[test] + fn test_compiled_matches_bootstrap_funcs() { + let source = r#" + theory FuncTheory { + A : Sort; + B : Sort; + C : Sort; + f : A -> B; + g : B -> C; + h : A -> C; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("FuncTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_funcs(theory_slid); + let compiled = repl.store.query_theory_funcs_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} funcs, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|f| &f.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|f| &f.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Function names should match"); + + // Verify we have the expected functions + assert!(compiled_names.contains(&&"f".to_string())); + assert!(compiled_names.contains(&&"g".to_string())); + assert!(compiled_names.contains(&&"h".to_string())); + } + + /// Test that compiled query matches bootstrap query for relations. + #[test] + fn test_compiled_matches_bootstrap_rels() { + let source = r#" + theory RelTheory { + Node : Sort; + Source : Node -> Prop; + Sink : Node -> Prop; + Connected : [x: Node, y: Node] -> Prop; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("RelTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_rels(theory_slid); + let compiled = repl.store.query_theory_rels_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} rels, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|r| &r.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|r| &r.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Relation names should match"); + + // Verify we have the expected relations + assert!(compiled_names.contains(&&"Source".to_string())); + assert!(compiled_names.contains(&&"Sink".to_string())); + assert!(compiled_names.contains(&&"Connected".to_string())); + } + + // ======================================================================== + // Instance query tests + // ======================================================================== + + /// Test that compiled query matches bootstrap for instance elements. + #[test] + fn test_compiled_matches_bootstrap_instance_elems() { + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + + instance SimpleGraph : Graph = { + a : V; + b : V; + c : V; + e1 : E; + e2 : E; + e1 src = a; + e1 tgt = b; + e2 src = b; + e2 tgt = c; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("SimpleGraph") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_instance_elems(instance_slid); + let compiled = repl.store.query_instance_elems_compiled(instance_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} elems, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Should have 5 elements: a, b, c, e1, e2 + assert_eq!(compiled.len(), 5, "Expected 5 elements"); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|e| &e.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|e| &e.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Element names should match"); + } + + /// Test that compiled query matches bootstrap for function values. + #[test] + fn test_compiled_matches_bootstrap_func_vals() { + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + + instance TwoEdges : Graph = { + v1 : V; + v2 : V; + v3 : V; + edge1 : E; + edge2 : E; + edge1 src = v1; + edge1 tgt = v2; + edge2 src = v2; + edge2 tgt = v3; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("TwoEdges") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_instance_func_vals(instance_slid); + let compiled = repl.store.query_instance_func_vals_compiled(instance_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} func_vals, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Should have 4 function values: edge1.src, edge1.tgt, edge2.src, edge2.tgt + assert_eq!(compiled.len(), 4, "Expected 4 function values"); + } + + /// Test that compiled query matches bootstrap for relation tuples. + /// + /// NOTE: Relation tuples are now stored in columnar batches (see store::columnar), + /// not as individual GeologMeta elements. The bootstrap and compiled queries + /// for RelTuple elements return empty since we no longer create those elements. + /// + /// Relation tuple data is now accessed via `Store::load_instance_data_batches()`. + #[test] + fn test_compiled_matches_bootstrap_rel_tuples() { + let source = r#" + theory NodeMarking { + Node : Sort; + Marked : [n: Node] -> Prop; + } + + instance ThreeNodes : NodeMarking = { + n1 : Node; + n2 : Node; + n3 : Node; + [n: n1] Marked; + [n: n3] Marked; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("ThreeNodes") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled - both should return empty now + // since relation tuples are stored in columnar batches, not GeologMeta + let bootstrap = repl.store.query_instance_rel_tuples(instance_slid); + let compiled = repl.store.query_instance_rel_tuples_compiled(instance_slid); + + // Same number of results (both empty) + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} rel_tuples, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Relation tuples are no longer stored as GeologMeta elements + // They're in columnar batches accessed via load_instance_data_batches() + assert_eq!(compiled.len(), 0, "RelTuple elements are not created (tuples in columnar batches)"); + + // Note: In in-memory mode (no store path), columnar batches aren't persisted. + // The in-memory Structure still has the relation tuples - they're just not + // serialized to disk. For tests with persistence, use a temp dir. + // + // The relation tuples are accessible via the in-memory Structure: + use crate::core::RelationStorage; + let entry = repl.instances.get("ThreeNodes").expect("Instance entry should exist"); + let rel_count: usize = entry.structure.relations.iter() + .map(|r| r.len()) + .sum(); + assert_eq!(rel_count, 2, "Expected 2 relation tuples in in-memory Structure"); + } + + /// Test compiled query with empty instance. + #[test] + fn test_compiled_empty_instance() { + let source = r#" + theory Simple { + T : Sort; + } + + instance EmptyInst : Simple = { + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("EmptyInst") + .expect("Instance should exist").0; + + let bootstrap_elems = repl.store.query_instance_elems(instance_slid); + let compiled_elems = repl.store.query_instance_elems_compiled(instance_slid); + assert_eq!(bootstrap_elems.len(), 0); + assert_eq!(compiled_elems.len(), 0); + + let bootstrap_fvs = repl.store.query_instance_func_vals(instance_slid); + let compiled_fvs = repl.store.query_instance_func_vals_compiled(instance_slid); + assert_eq!(bootstrap_fvs.len(), 0); + assert_eq!(compiled_fvs.len(), 0); + + let bootstrap_rts = repl.store.query_instance_rel_tuples(instance_slid); + let compiled_rts = repl.store.query_instance_rel_tuples_compiled(instance_slid); + assert_eq!(bootstrap_rts.len(), 0); + assert_eq!(compiled_rts.len(), 0); + } +} diff --git a/src/query/to_relalg.rs b/src/query/to_relalg.rs new file mode 100644 index 0000000..30dcfe4 --- /dev/null +++ b/src/query/to_relalg.rs @@ -0,0 +1,1386 @@ +//! Compiler from QueryOp plans to RelAlgIR instances. +//! +//! This module creates geolog Structure instances (of the RelAlgIR theory) +//! from QueryOp query plans. The resulting structures can be: +//! - Inspected as first-class data +//! - Optimized using the RelAlgIR optimization axioms +//! - Executed via a RelAlgIR backend +//! +//! # Design +//! +//! The compiler traverses a QueryOp tree and for each node: +//! 1. Creates the corresponding Op element (ScanOp, FilterOp, etc.) +//! 2. Creates Wire elements for inputs/outputs +//! 3. Creates Schema elements describing wire types +//! 4. Sets up function values connecting the elements +//! +//! The resulting Structure includes: +//! - GeologMeta elements representing the source signature (Srt, Func) +//! - RelAlgIR elements representing the query plan (Wire, Op, Schema) +//! +//! # Supported Operators +//! +//! The following QueryOp variants are compiled: +//! +//! | QueryOp | RelAlgIR Sort | Notes | +//! |------------------|------------------|------------------------------| +//! | `Scan` | `ScanOp` | Emits elements of a sort | +//! | `Filter` | `FilterOp` | With predicate compilation | +//! | `Distinct` | `DistinctOp` | Deduplication | +//! | `Join (Cross)` | `JoinOp` | Cartesian product | +//! | `Join (Equi)` | `JoinOp` | Hash join on key columns | +//! | `Union` | `UnionOp` | Bag union | +//! | `Project` | `ProjectOp` | Column selection/reordering | +//! | `Negate` | `NegateOp` | Flip multiplicities | +//! | `Empty` | `EmptyOp` | Identity for Union | +//! | `Delay` | `DelayOp` | DBSP: previous timestep | +//! | `Diff` | `DiffOp` | DBSP: change since last | +//! | `Integrate` | `IntegrateOp` | DBSP: accumulate | +//! +//! Not yet supported: `Constant` (needs Elem), `Apply` (needs Func). +//! +//! # Supported Predicates +//! +//! | Predicate | RelAlgIR Sort | Notes | +//! |------------------|---------------------|------------------------------| +//! | `True` | `TruePred` | Always true | +//! | `False` | `FalsePred` | Always false | +//! | `ColEqCol` | `ColEqPred` | Two columns equal | +//! | `ColEqConst` | `ConstEqPred` | Column equals constant | +//! | `FuncEq` | `FuncEqPred` | f(arg) = result | +//! | `FuncEqConst` | `FuncConstEqPred` | f(arg) = expected | +//! | `And` | `AndPred` | Conjunction | +//! | `Or` | `OrPred` | Disjunction | +//! +//! All predicate types are now supported! +//! +//! # Example +//! +//! ```ignore +//! use geolog::query::{QueryOp, to_relalg::compile_to_relalg}; +//! +//! let plan = QueryOp::Filter { +//! input: Box::new(QueryOp::Scan { sort_idx: 0 }), +//! pred: Predicate::True, +//! }; +//! +//! let instance = compile_to_relalg(&plan, &relalg_theory, &mut universe)?; +//! // instance.structure contains RelAlgIR elements +//! // instance.output_wire is the final Wire element +//! ``` + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::core::{ElaboratedTheory, SortId, Structure}; +use crate::id::Slid; +use crate::query::backend::QueryOp; +use crate::universe::Universe; + +/// Result of compiling a QueryOp to a RelAlgIR instance. +pub struct RelAlgInstance { + /// The RelAlgIR structure + pub structure: Structure, + /// The output wire of the compiled plan + pub output_wire: Slid, + /// Mapping from Slid to element names (for debugging) + pub names: HashMap, + /// Mapping from Srt elements to source sort indices (for interpreter) + pub sort_mapping: HashMap, + /// Mapping from Elem elements to original target Slid values (for interpreter) + pub elem_value_mapping: HashMap, +} + +/// Context for the compilation process. +struct CompileContext<'a> { + /// The RelAlgIR theory + relalg_theory: &'a ElaboratedTheory, + /// Universe for generating Luids + universe: &'a mut Universe, + /// The structure being built + structure: Structure, + /// Element names for debugging + names: HashMap, + /// Counter for generating unique names + counter: usize, + + // Sort IDs in RelAlgIR (cached for efficiency) + sort_ids: RelAlgSortIds, + + // GeologMeta sort elements already created + // Maps source signature SortId -> RelAlgIR Slid for GeologMeta/Srt element + srt_elements: HashMap, + + // GeologMeta/Elem elements for target instance elements + // Maps target instance Slid -> RelAlgIR Slid for GeologMeta/Elem element + elem_elements: HashMap, + + // GeologMeta/Func elements for target signature functions + // Maps target func index -> RelAlgIR Slid for GeologMeta/Func element + func_elements: HashMap, + + // The "self-referencing" Theory element (for standalone queries) + theory_elem: Option, + + // Placeholder Instance element for Elem references + instance_elem: Option, +} + +/// Cached sort IDs from the RelAlgIR theory. +/// Many fields are reserved for future operator support. +#[allow(dead_code)] +struct RelAlgSortIds { + // GeologMeta inherited sorts + theory: SortId, + srt: SortId, + dsort: SortId, + base_ds: SortId, + func: SortId, + elem: SortId, + instance: SortId, + + // RelAlgIR sorts + schema: SortId, + unit_schema: SortId, + base_schema: SortId, + prod_schema: SortId, + wire: SortId, + op: SortId, + scan_op: SortId, + filter_op: SortId, + distinct_op: SortId, + negate_op: SortId, + join_op: SortId, + union_op: SortId, + delay_op: SortId, + diff_op: SortId, + integrate_op: SortId, + empty_op: SortId, + const_op: SortId, + project_op: SortId, + apply_op: SortId, + + // Projection mapping + proj_mapping: SortId, + proj_entry: SortId, + + // Predicates + pred: SortId, + true_pred: SortId, + false_pred: SortId, + col_eq_pred: SortId, + const_eq_pred: SortId, + func_eq_pred: SortId, + func_const_eq_pred: SortId, + and_pred: SortId, + or_pred: SortId, + + // Join conditions + join_cond: SortId, + equi_join_cond: SortId, + cross_join_cond: SortId, + + // Column references + col_ref: SortId, + col_path: SortId, + here_path: SortId, +} + +impl RelAlgSortIds { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + let lookup = |name: &str| -> Result { + sig.lookup_sort(name) + .ok_or_else(|| format!("RelAlgIR theory missing sort: {}", name)) + }; + + Ok(Self { + // GeologMeta sorts are prefixed + theory: lookup("GeologMeta/Theory")?, + srt: lookup("GeologMeta/Srt")?, + dsort: lookup("GeologMeta/DSort")?, + base_ds: lookup("GeologMeta/BaseDS")?, + func: lookup("GeologMeta/Func")?, + elem: lookup("GeologMeta/Elem")?, + instance: lookup("GeologMeta/Instance")?, + + // RelAlgIR sorts + schema: lookup("Schema")?, + unit_schema: lookup("UnitSchema")?, + base_schema: lookup("BaseSchema")?, + prod_schema: lookup("ProdSchema")?, + wire: lookup("Wire")?, + op: lookup("Op")?, + scan_op: lookup("ScanOp")?, + filter_op: lookup("FilterOp")?, + distinct_op: lookup("DistinctOp")?, + negate_op: lookup("NegateOp")?, + join_op: lookup("JoinOp")?, + union_op: lookup("UnionOp")?, + delay_op: lookup("DelayOp")?, + diff_op: lookup("DiffOp")?, + integrate_op: lookup("IntegrateOp")?, + empty_op: lookup("EmptyOp")?, + const_op: lookup("ConstOp")?, + project_op: lookup("ProjectOp")?, + apply_op: lookup("ApplyOp")?, + + proj_mapping: lookup("ProjMapping")?, + proj_entry: lookup("ProjEntry")?, + + pred: lookup("Pred")?, + true_pred: lookup("TruePred")?, + false_pred: lookup("FalsePred")?, + col_eq_pred: lookup("ColEqPred")?, + const_eq_pred: lookup("ConstEqPred")?, + func_eq_pred: lookup("FuncEqPred")?, + func_const_eq_pred: lookup("FuncConstEqPred")?, + and_pred: lookup("AndPred")?, + or_pred: lookup("OrPred")?, + + join_cond: lookup("JoinCond")?, + equi_join_cond: lookup("EquiJoinCond")?, + cross_join_cond: lookup("CrossJoinCond")?, + + col_ref: lookup("ColRef")?, + col_path: lookup("ColPath")?, + here_path: lookup("HerePath")?, + }) + } +} + +impl<'a> CompileContext<'a> { + fn new( + relalg_theory: &'a ElaboratedTheory, + universe: &'a mut Universe, + ) -> Result { + let sort_ids = RelAlgSortIds::from_theory(relalg_theory)?; + let num_sorts = relalg_theory.theory.signature.sorts.len(); + let num_funcs = relalg_theory.theory.signature.functions.len(); + + let mut structure = Structure::new(num_sorts); + + // Initialize function storage with empty columns for each function + // We use Local columns that will grow as elements are added + structure.functions = (0..num_funcs) + .map(|_| crate::core::FunctionColumn::Local(Vec::new())) + .collect(); + + // Initialize relation storage + let rel_arities: Vec = relalg_theory + .theory + .signature + .relations + .iter() + .map(|r| r.domain.arity()) + .collect(); + structure.init_relations(&rel_arities); + + Ok(Self { + relalg_theory, + universe, + structure, + names: HashMap::new(), + counter: 0, + sort_ids, + srt_elements: HashMap::new(), + elem_elements: HashMap::new(), + func_elements: HashMap::new(), + theory_elem: None, + instance_elem: None, + }) + } + + fn fresh_name(&mut self, prefix: &str) -> String { + self.counter += 1; + format!("{}_{}", prefix, self.counter) + } + + fn add_element(&mut self, sort_id: SortId, name: &str) -> Slid { + let (slid, _) = self.structure.add_element(self.universe, sort_id); + self.names.insert(slid, name.to_string()); + slid + } + + fn define_func(&mut self, func_name: &str, domain: Slid, codomain: Slid) -> Result<(), String> { + let func_id = self + .relalg_theory + .theory + .signature + .lookup_func(func_name) + .ok_or_else(|| format!("RelAlgIR missing function: {}", func_name))?; + + self.structure + .define_function(func_id, domain, codomain) + .map_err(|existing| { + format!( + "Conflicting definition for {} on {:?}: already defined as {:?}", + func_name, domain, existing + ) + }) + } + + /// Get or create the Theory element (self-referencing for standalone queries) + fn get_theory_elem(&mut self) -> Slid { + if let Some(elem) = self.theory_elem { + return elem; + } + + let elem = self.add_element(self.sort_ids.theory, "query_theory"); + + // Self-reference: Theory/parent = self + let _ = self.define_func("GeologMeta/Theory/parent", elem, elem); + + self.theory_elem = Some(elem); + elem + } + + /// Get or create a GeologMeta/Srt element for a source sort + fn get_srt_elem(&mut self, source_sort: usize) -> Result { + if let Some(&elem) = self.srt_elements.get(&source_sort) { + return Ok(elem); + } + + let theory = self.get_theory_elem(); + let name = self.fresh_name("srt"); + let elem = self.add_element(self.sort_ids.srt, &name); + + // Srt/theory = our theory element + self.define_func("GeologMeta/Srt/theory", elem, theory)?; + + self.srt_elements.insert(source_sort, elem); + Ok(elem) + } + + /// Get or create a placeholder Instance element for Elem references. + /// This represents "the instance being queried" - resolved at execution time. + fn get_instance_elem(&mut self) -> Slid { + if let Some(elem) = self.instance_elem { + return elem; + } + + let theory = self.get_theory_elem(); + let elem = self.add_element(self.sort_ids.instance, "query_instance"); + + // Instance/theory = our theory element + let _ = self.define_func("GeologMeta/Instance/theory", elem, theory); + + self.instance_elem = Some(elem); + elem + } + + /// Get or create an Elem element for a target instance element. + /// + /// Note: Slid doesn't encode the sort, so we use sort 0 as a placeholder. + /// A full implementation would require passing the source structure to look up + /// the actual sort. The Elem is still created and linked, just with incomplete + /// sort information. + fn get_elem(&mut self, target_slid: Slid) -> Result { + if let Some(&elem) = self.elem_elements.get(&target_slid) { + return Ok(elem); + } + + // TODO: To properly set Elem/sort, we'd need access to the source structure + // to look up target_slid's sort. For now, use sort 0 as a placeholder. + let placeholder_sort = 0; + let srt_elem = self.get_srt_elem(placeholder_sort)?; + let instance = self.get_instance_elem(); + + let name = self.fresh_name("elem"); + let elem = self.add_element(self.sort_ids.elem, &name); + + // Elem/instance = our instance element + self.define_func("GeologMeta/Elem/instance", elem, instance)?; + // Elem/sort = the sort element (placeholder) + self.define_func("GeologMeta/Elem/sort", elem, srt_elem)?; + + self.elem_elements.insert(target_slid, elem); + Ok(elem) + } + + /// Get or create a Func element for a target signature function. + fn get_func_elem(&mut self, func_idx: usize) -> Result { + if let Some(&elem) = self.func_elements.get(&func_idx) { + return Ok(elem); + } + + let theory = self.get_theory_elem(); + let name = self.fresh_name("func"); + let elem = self.add_element(self.sort_ids.func, &name); + + // Func/theory = our theory element + self.define_func("GeologMeta/Func/theory", elem, theory)?; + // Note: Func/dom and Func/cod require DSort elements, which we don't + // track. For now, these are left undefined (partial function). + + self.func_elements.insert(func_idx, elem); + Ok(elem) + } + + /// Create a BaseSchema for a sort + fn create_base_schema(&mut self, srt_elem: Slid) -> Result<(Slid, Slid), String> { + let bs_name = self.fresh_name("base_schema"); + let bs = self.add_element(self.sort_ids.base_schema, &bs_name); + + let schema_name = self.fresh_name("schema"); + let schema = self.add_element(self.sort_ids.schema, &schema_name); + + self.define_func("BaseSchema/schema", bs, schema)?; + self.define_func("BaseSchema/srt", bs, srt_elem)?; + + Ok((bs, schema)) + } + + /// Create a Wire with a given schema + fn create_wire(&mut self, schema: Slid) -> Result { + let name = self.fresh_name("wire"); + let wire = self.add_element(self.sort_ids.wire, &name); + self.define_func("Wire/schema", wire, schema)?; + Ok(wire) + } + + /// Create a TruePred and return the Pred elem + fn create_true_pred(&mut self) -> Result<(Slid, Slid), String> { + let tp_name = self.fresh_name("true_pred"); + let tp = self.add_element(self.sort_ids.true_pred, &tp_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("TruePred/pred", tp, pred)?; + + Ok((tp, pred)) + } + + /// Create a FalsePred and return the Pred elem + fn create_false_pred(&mut self) -> Result { + let fp_name = self.fresh_name("false_pred"); + let fp = self.add_element(self.sort_ids.false_pred, &fp_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FalsePred/pred", fp, pred)?; + + Ok(pred) + } + + /// Create an AndPred combining two predicates + fn create_and_pred(&mut self, left: Slid, right: Slid) -> Result { + let and_name = self.fresh_name("and_pred"); + let and_pred = self.add_element(self.sort_ids.and_pred, &and_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("AndPred/pred", and_pred, pred)?; + self.define_func("AndPred/left", and_pred, left)?; + self.define_func("AndPred/right", and_pred, right)?; + + Ok(pred) + } + + /// Create an OrPred combining two predicates + fn create_or_pred(&mut self, left: Slid, right: Slid) -> Result { + let or_name = self.fresh_name("or_pred"); + let or_pred = self.add_element(self.sort_ids.or_pred, &or_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("OrPred/pred", or_pred, pred)?; + self.define_func("OrPred/left", or_pred, left)?; + self.define_func("OrPred/right", or_pred, right)?; + + Ok(pred) + } + + /// Create a ColEqPred (left_col = right_col) + fn create_col_eq_pred(&mut self, wire: Slid, left_col: usize, right_col: usize) -> Result { + // Create left ColRef + let left_ref = self.create_col_ref(wire, left_col)?; + // Create right ColRef + let right_ref = self.create_col_ref(wire, right_col)?; + + let eq_name = self.fresh_name("col_eq_pred"); + let col_eq = self.add_element(self.sort_ids.col_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("ColEqPred/pred", col_eq, pred)?; + self.define_func("ColEqPred/left", col_eq, left_ref)?; + self.define_func("ColEqPred/right", col_eq, right_ref)?; + + Ok(pred) + } + + /// Create a ColRef for column index + fn create_col_ref(&mut self, wire: Slid, _col: usize) -> Result { + // For now, always use HerePath (column 0) + // TODO: Implement proper column path navigation for nested schemas + let here_name = self.fresh_name("here_path"); + let here = self.add_element(self.sort_ids.here_path, &here_name); + + let path_name = self.fresh_name("col_path"); + let col_path = self.add_element(self.sort_ids.col_path, &path_name); + + self.define_func("HerePath/path", here, col_path)?; + + let ref_name = self.fresh_name("col_ref"); + let col_ref = self.add_element(self.sort_ids.col_ref, &ref_name); + + self.define_func("ColRef/wire", col_ref, wire)?; + self.define_func("ColRef/path", col_ref, col_path)?; + + Ok(col_ref) + } + + /// Create a ConstEqPred (col = constant) + fn create_const_eq_pred(&mut self, wire: Slid, col: usize, val: Slid) -> Result { + // Create ColRef for the column + let col_ref = self.create_col_ref(wire, col)?; + + // Create Elem element for the constant value + let elem = self.get_elem(val)?; + + let eq_name = self.fresh_name("const_eq_pred"); + let const_eq = self.add_element(self.sort_ids.const_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("ConstEqPred/pred", const_eq, pred)?; + self.define_func("ConstEqPred/col", const_eq, col_ref)?; + self.define_func("ConstEqPred/val", const_eq, elem)?; + + Ok(pred) + } + + /// Create a FuncEqPred (func(arg_col) = result_col) + fn create_func_eq_pred( + &mut self, + wire: Slid, + func_idx: usize, + arg_col: usize, + result_col: usize, + ) -> Result { + // Create Func element + let func = self.get_func_elem(func_idx)?; + + // Create ColRefs + let arg_ref = self.create_col_ref(wire, arg_col)?; + let result_ref = self.create_col_ref(wire, result_col)?; + + let eq_name = self.fresh_name("func_eq_pred"); + let func_eq = self.add_element(self.sort_ids.func_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FuncEqPred/pred", func_eq, pred)?; + self.define_func("FuncEqPred/func", func_eq, func)?; + self.define_func("FuncEqPred/arg", func_eq, arg_ref)?; + self.define_func("FuncEqPred/result", func_eq, result_ref)?; + + Ok(pred) + } + + /// Create a FuncConstEqPred (func(arg_col) = expected_elem) + fn create_func_const_eq_pred( + &mut self, + wire: Slid, + func_idx: usize, + arg_col: usize, + expected: Slid, + ) -> Result { + // Create Func element + let func = self.get_func_elem(func_idx)?; + + // Create ColRef for argument + let arg_ref = self.create_col_ref(wire, arg_col)?; + + // Create Elem for expected value + let expected_elem = self.get_elem(expected)?; + + let eq_name = self.fresh_name("func_const_eq_pred"); + let func_const_eq = self.add_element(self.sort_ids.func_const_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FuncConstEqPred/pred", func_const_eq, pred)?; + self.define_func("FuncConstEqPred/func", func_const_eq, func)?; + self.define_func("FuncConstEqPred/arg", func_const_eq, arg_ref)?; + self.define_func("FuncConstEqPred/expected", func_const_eq, expected_elem)?; + + Ok(pred) + } +} + +/// Compile a predicate to a Pred element +fn compile_predicate( + ctx: &mut CompileContext<'_>, + wire: Slid, + pred: &crate::query::backend::Predicate, +) -> Result { + use crate::query::backend::Predicate; + + match pred { + Predicate::True => { + let (_, pred_elem) = ctx.create_true_pred()?; + Ok(pred_elem) + } + Predicate::False => { + ctx.create_false_pred() + } + Predicate::ColEqCol { left, right } => { + ctx.create_col_eq_pred(wire, *left, *right) + } + Predicate::ColEqConst { col, val } => { + ctx.create_const_eq_pred(wire, *col, *val) + } + Predicate::FuncEq { + func_idx, + arg_col, + result_col, + } => { + ctx.create_func_eq_pred(wire, *func_idx, *arg_col, *result_col) + } + Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } => { + ctx.create_func_const_eq_pred(wire, *func_idx, *arg_col, *expected) + } + Predicate::And(left, right) => { + let left_pred = compile_predicate(ctx, wire, left)?; + let right_pred = compile_predicate(ctx, wire, right)?; + ctx.create_and_pred(left_pred, right_pred) + } + Predicate::Or(left, right) => { + let left_pred = compile_predicate(ctx, wire, left)?; + let right_pred = compile_predicate(ctx, wire, right)?; + ctx.create_or_pred(left_pred, right_pred) + } + } +} + +/// Compile a QueryOp into a RelAlgIR instance. +/// +/// # Arguments +/// * `plan` - The query plan to compile +/// * `relalg_theory` - The RelAlgIR theory +/// * `universe` - Universe for Luid generation +/// +/// # Returns +/// The compiled RelAlgIR instance, or an error message +pub fn compile_to_relalg( + plan: &QueryOp, + relalg_theory: &Rc, + universe: &mut Universe, +) -> Result { + let mut ctx = CompileContext::new(relalg_theory, universe)?; + + // Initialize function storage (will be lazy-initialized on first use) + // For now, we don't pre-init since we use define_function which auto-grows + + let output_wire = compile_op(&mut ctx, plan)?; + + // Invert srt_elements to get Slid -> sort_idx mapping + let sort_mapping: HashMap = ctx + .srt_elements + .iter() + .map(|(&sort_idx, &slid)| (slid, sort_idx)) + .collect(); + + // Invert elem_elements to get Elem Slid -> original value mapping + let elem_value_mapping: HashMap = ctx + .elem_elements + .iter() + .map(|(&target_slid, &elem_slid)| (elem_slid, target_slid)) + .collect(); + + Ok(RelAlgInstance { + structure: ctx.structure, + output_wire, + names: ctx.names, + sort_mapping, + elem_value_mapping, + }) +} + +/// Compile a single QueryOp, returning the output wire Slid. +fn compile_op(ctx: &mut CompileContext<'_>, op: &QueryOp) -> Result { + match op { + QueryOp::Scan { sort_idx } => compile_scan(ctx, *sort_idx), + + QueryOp::ScanRelation { rel_id } => { + // TODO: Add ScanRelationOp to RelAlgIR theory and implement + Err(format!("ScanRelation compilation not yet implemented (rel_id={})", rel_id)) + } + + QueryOp::Filter { input, pred } => { + let input_wire = compile_op(ctx, input)?; + compile_filter(ctx, input_wire, pred) + } + + QueryOp::Distinct { input } => { + let input_wire = compile_op(ctx, input)?; + compile_distinct(ctx, input_wire) + } + + QueryOp::Join { left, right, cond } => { + let left_wire = compile_op(ctx, left)?; + let right_wire = compile_op(ctx, right)?; + compile_join(ctx, left_wire, right_wire, cond) + } + + QueryOp::Union { left, right } => { + let left_wire = compile_op(ctx, left)?; + let right_wire = compile_op(ctx, right)?; + compile_union(ctx, left_wire, right_wire) + } + + // DBSP operators + QueryOp::Delay { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_delay(ctx, input_wire) + } + + QueryOp::Diff { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_diff(ctx, input_wire) + } + + QueryOp::Integrate { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_integrate(ctx, input_wire) + } + + QueryOp::Negate { input } => { + let input_wire = compile_op(ctx, input)?; + compile_negate(ctx, input_wire) + } + + QueryOp::Empty => compile_empty(ctx), + + QueryOp::Project { input, columns } => { + let input_wire = compile_op(ctx, input)?; + compile_project(ctx, input_wire, columns) + } + + // Not yet implemented (require additional context) + QueryOp::Constant { .. } => Err("ConstantOp compilation not yet implemented (needs Elem)".to_string()), + QueryOp::Apply { .. } => Err("ApplyOp compilation not yet implemented (needs Func)".to_string()), + QueryOp::ApplyField { .. } => Err("ApplyFieldOp compilation not yet implemented".to_string()), + } +} + +fn compile_scan(ctx: &mut CompileContext<'_>, sort_idx: usize) -> Result { + // Get or create Srt element + let srt_elem = ctx.get_srt_elem(sort_idx)?; + + // Create schema for output + let (_, schema) = ctx.create_base_schema(srt_elem)?; + + // Create output wire + let out_wire = ctx.create_wire(schema)?; + + // Create ScanOp + let scan_name = ctx.fresh_name("scan"); + let scan = ctx.add_element(ctx.sort_ids.scan_op, &scan_name); + + // Create Op (sum type injection) + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + // Set function values + ctx.define_func("ScanOp/op", scan, op)?; + ctx.define_func("ScanOp/srt", scan, srt_elem)?; + ctx.define_func("ScanOp/out", scan, out_wire)?; + + Ok(out_wire) +} + +fn compile_filter( + ctx: &mut CompileContext<'_>, + input_wire: Slid, + predicate: &crate::query::backend::Predicate, +) -> Result { + // Compile the predicate + let pred = compile_predicate(ctx, input_wire, predicate)?; + + // Get input wire's schema for output + // In a full implementation, we'd look this up. For now, create a dummy schema. + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + + // Create output wire + let out_wire = ctx.create_wire(out_schema)?; + + // Create FilterOp + let filter_name = ctx.fresh_name("filter"); + let filter = ctx.add_element(ctx.sort_ids.filter_op, &filter_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("FilterOp/op", filter, op)?; + ctx.define_func("FilterOp/in", filter, input_wire)?; + ctx.define_func("FilterOp/out", filter, out_wire)?; + ctx.define_func("FilterOp/pred", filter, pred)?; + + Ok(out_wire) +} + +fn compile_distinct(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + // Create output schema (same as input) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create DistinctOp + let distinct_name = ctx.fresh_name("distinct"); + let distinct = ctx.add_element(ctx.sort_ids.distinct_op, &distinct_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DistinctOp/op", distinct, op)?; + ctx.define_func("DistinctOp/in", distinct, input_wire)?; + ctx.define_func("DistinctOp/out", distinct, out_wire)?; + + Ok(out_wire) +} + +fn compile_join( + ctx: &mut CompileContext<'_>, + left_wire: Slid, + right_wire: Slid, + condition: &crate::query::backend::JoinCond, +) -> Result { + use crate::query::backend::JoinCond; + + // Create output schema (product of inputs) - simplified for now + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create join condition based on type + let join_cond = match condition { + JoinCond::Cross => { + let cond_name = ctx.fresh_name("cross_join"); + let cross_join = ctx.add_element(ctx.sort_ids.cross_join_cond, &cond_name); + let join_cond_name = ctx.fresh_name("join_cond"); + let join_cond_elem = ctx.add_element(ctx.sort_ids.join_cond, &join_cond_name); + ctx.define_func("CrossJoinCond/cond", cross_join, join_cond_elem)?; + join_cond_elem + } + JoinCond::Equi { left_col, right_col } => { + // Create column references for the join keys + let left_ref = ctx.create_col_ref(left_wire, *left_col)?; + let right_ref = ctx.create_col_ref(right_wire, *right_col)?; + + let cond_name = ctx.fresh_name("equi_join"); + let equi_join = ctx.add_element(ctx.sort_ids.equi_join_cond, &cond_name); + let join_cond_name = ctx.fresh_name("join_cond"); + let join_cond_elem = ctx.add_element(ctx.sort_ids.join_cond, &join_cond_name); + + ctx.define_func("EquiJoinCond/cond", equi_join, join_cond_elem)?; + ctx.define_func("EquiJoinCond/left_col", equi_join, left_ref)?; + ctx.define_func("EquiJoinCond/right_col", equi_join, right_ref)?; + + join_cond_elem + } + }; + + // Create JoinOp + let join_name = ctx.fresh_name("join"); + let join = ctx.add_element(ctx.sort_ids.join_op, &join_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("JoinOp/op", join, op)?; + ctx.define_func("JoinOp/left_in", join, left_wire)?; + ctx.define_func("JoinOp/right_in", join, right_wire)?; + ctx.define_func("JoinOp/out", join, out_wire)?; + ctx.define_func("JoinOp/cond", join, join_cond)?; + + Ok(out_wire) +} + +fn compile_union( + ctx: &mut CompileContext<'_>, + left_wire: Slid, + right_wire: Slid, +) -> Result { + // Create output schema (same as inputs) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create UnionOp + let union_name = ctx.fresh_name("union"); + let union_op = ctx.add_element(ctx.sort_ids.union_op, &union_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("UnionOp/op", union_op, op)?; + ctx.define_func("UnionOp/left_in", union_op, left_wire)?; + ctx.define_func("UnionOp/right_in", union_op, right_wire)?; + ctx.define_func("UnionOp/out", union_op, out_wire)?; + + Ok(out_wire) +} + +fn compile_delay(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let delay_name = ctx.fresh_name("delay"); + let delay = ctx.add_element(ctx.sort_ids.delay_op, &delay_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DelayOp/op", delay, op)?; + ctx.define_func("DelayOp/in", delay, input_wire)?; + ctx.define_func("DelayOp/out", delay, out_wire)?; + + Ok(out_wire) +} + +fn compile_diff(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let diff_name = ctx.fresh_name("diff"); + let diff = ctx.add_element(ctx.sort_ids.diff_op, &diff_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DiffOp/op", diff, op)?; + ctx.define_func("DiffOp/in", diff, input_wire)?; + ctx.define_func("DiffOp/out", diff, out_wire)?; + + Ok(out_wire) +} + +fn compile_integrate(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let integrate_name = ctx.fresh_name("integrate"); + let integrate = ctx.add_element(ctx.sort_ids.integrate_op, &integrate_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("IntegrateOp/op", integrate, op)?; + ctx.define_func("IntegrateOp/in", integrate, input_wire)?; + ctx.define_func("IntegrateOp/out", integrate, out_wire)?; + + Ok(out_wire) +} + +fn compile_negate(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + // Negate preserves schema (from wf/negate_schema axiom) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let negate_name = ctx.fresh_name("negate"); + let negate = ctx.add_element(ctx.sort_ids.negate_op, &negate_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("NegateOp/op", negate, op)?; + ctx.define_func("NegateOp/in", negate, input_wire)?; + ctx.define_func("NegateOp/out", negate, out_wire)?; + + Ok(out_wire) +} + +fn compile_empty(ctx: &mut CompileContext<'_>) -> Result { + // Empty produces a wire with some schema (we use a fresh placeholder) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let empty_name = ctx.fresh_name("empty"); + let empty = ctx.add_element(ctx.sort_ids.empty_op, &empty_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("EmptyOp/op", empty, op)?; + ctx.define_func("EmptyOp/out", empty, out_wire)?; + + Ok(out_wire) +} + +fn compile_project( + ctx: &mut CompileContext<'_>, + input_wire: Slid, + columns: &[usize], +) -> Result { + // Create output schema (different from input - projected schema) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create ProjMapping + let mapping_name = ctx.fresh_name("proj_mapping"); + let proj_mapping = ctx.add_element(ctx.sort_ids.proj_mapping, &mapping_name); + + // Create ProjEntry for each column + for (target_idx, &source_col) in columns.iter().enumerate() { + let entry_name = ctx.fresh_name("proj_entry"); + let entry = ctx.add_element(ctx.sort_ids.proj_entry, &entry_name); + + // Source column reference (from input wire) + let source_ref = ctx.create_col_ref(input_wire, source_col)?; + + // Target path (simplified: just use HerePath for now) + // In a full implementation, we'd create proper paths for each output column + let target_path_name = ctx.fresh_name("col_path"); + let target_path = ctx.add_element(ctx.sort_ids.col_path, &target_path_name); + + // If this is not the first column, we'd need FstPath/SndPath navigation + // For now, we just use HerePath for all (placeholder behavior) + if target_idx == 0 { + let here_name = ctx.fresh_name("here_path"); + let here = ctx.add_element(ctx.sort_ids.here_path, &here_name); + ctx.define_func("HerePath/path", here, target_path)?; + } + + ctx.define_func("ProjEntry/mapping", entry, proj_mapping)?; + ctx.define_func("ProjEntry/source", entry, source_ref)?; + ctx.define_func("ProjEntry/target_path", entry, target_path)?; + } + + // Create ProjectOp + let project_name = ctx.fresh_name("project"); + let project = ctx.add_element(ctx.sort_ids.project_op, &project_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("ProjectOp/op", project, op)?; + ctx.define_func("ProjectOp/in", project, input_wire)?; + ctx.define_func("ProjectOp/out", project, out_wire)?; + ctx.define_func("ProjectOp/mapping", project, proj_mapping)?; + + Ok(out_wire) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::repl::ReplState; + use egglog_numeric_id::NumericId; + + fn load_relalg_theory() -> Rc { + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone() + } + + #[test] + fn test_compile_scan() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let plan = QueryOp::Scan { sort_idx: 0 }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Scan compilation should succeed"); + + let instance = result.unwrap(); + // Should have: Theory, Srt, BaseSchema, Schema, Wire, ScanOp, Op + assert!( + instance.structure.len() >= 7, + "Scan should create at least 7 elements" + ); + } + + #[test] + fn test_compile_filter_scan() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::True, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Filter(Scan) compilation should succeed"); + + let instance = result.unwrap(); + // Should have scan elements + filter elements + assert!( + instance.structure.len() >= 12, + "Filter(Scan) should create at least 12 elements" + ); + } + + #[test] + fn test_compile_join() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test cross join + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Cross, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Cross join compilation should succeed"); + + // Test equi-join + let equi_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let result = compile_to_relalg(&equi_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Equi-join compilation should succeed"); + } + + #[test] + fn test_compile_predicate() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test And predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::And( + Box::new(crate::query::backend::Predicate::True), + Box::new(crate::query::backend::Predicate::False), + ), + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "And predicate compilation should succeed"); + + // Test Or predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::Or( + Box::new(crate::query::backend::Predicate::True), + Box::new(crate::query::backend::Predicate::True), + ), + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Or predicate compilation should succeed"); + + // Test ColEqCol predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::ColEqCol { left: 0, right: 1 }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "ColEqCol predicate compilation should succeed"); + + // Test ColEqConst predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(42), + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!( + result.is_ok(), + "ColEqConst predicate compilation should succeed" + ); + let instance = result.unwrap(); + // Should have created an Elem element for the constant + assert!( + instance.names.values().any(|n| n.starts_with("elem_")), + "Should create Elem element for constant" + ); + + // Test FuncEq predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::FuncEq { + func_idx: 0, + arg_col: 0, + result_col: 1, + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "FuncEq predicate compilation should succeed"); + let instance = result.unwrap(); + // Should have created a Func element + assert!( + instance.names.values().any(|n| n.starts_with("func_")), + "Should create Func element for function reference" + ); + + // Test FuncEqConst predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::FuncEqConst { + func_idx: 0, + arg_col: 0, + expected: Slid::from_usize(99), + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!( + result.is_ok(), + "FuncEqConst predicate compilation should succeed" + ); + let instance = result.unwrap(); + // Should have both Func and Elem elements + assert!( + instance.names.values().any(|n| n.starts_with("func_")), + "Should create Func element" + ); + assert!( + instance.names.values().any(|n| n.starts_with("elem_")), + "Should create Elem element for expected value" + ); + } + + #[test] + fn test_compile_dbsp_operators() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test Delay + let delay_plan = QueryOp::Delay { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&delay_plan, &relalg_theory, &mut universe).is_ok(), + "Delay compilation should succeed" + ); + + // Test Diff + let diff_plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&diff_plan, &relalg_theory, &mut universe).is_ok(), + "Diff compilation should succeed" + ); + + // Test Integrate + let integrate_plan = QueryOp::Integrate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&integrate_plan, &relalg_theory, &mut universe).is_ok(), + "Integrate compilation should succeed" + ); + } + + #[test] + fn test_compile_negate_and_empty() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test Negate + let negate_plan = QueryOp::Negate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + let result = compile_to_relalg(&negate_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Negate compilation should succeed"); + + // Test Empty + let empty_plan = QueryOp::Empty; + let result = compile_to_relalg(&empty_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Empty compilation should succeed"); + + // Should have: Schema, Wire, EmptyOp, Op + let instance = result.unwrap(); + assert!( + instance.structure.len() >= 4, + "Empty should create at least 4 elements" + ); + + // Test Union with Empty (common pattern) + let union_empty_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Empty), + }; + let result = compile_to_relalg(&union_empty_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Union(Scan, Empty) compilation should succeed"); + } + + #[test] + fn test_compile_project() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Project columns 0 and 2 from a join result + let project_plan = QueryOp::Project { + input: Box::new(QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Cross, + }), + columns: vec![0, 2], + }; + + let result = compile_to_relalg(&project_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Project compilation should succeed: {:?}", result.err()); + + // Simple project: select single column + let simple_project = QueryOp::Project { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + columns: vec![0], + }; + let result = compile_to_relalg(&simple_project, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Single column project should succeed"); + + // Identity project (all columns in order) + let identity_project = QueryOp::Project { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + columns: vec![0, 1, 2], + }; + let result = compile_to_relalg(&identity_project, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Identity project should succeed"); + } +} diff --git a/src/repl.rs b/src/repl.rs new file mode 100644 index 0000000..7482da8 --- /dev/null +++ b/src/repl.rs @@ -0,0 +1,1659 @@ +//! REPL (Read-Eval-Print Loop) for Geolog +//! +//! Provides an interactive environment for defining theories and instances, +//! inspecting structures, and managing workspaces. +//! +//! ## Architecture Note +//! +//! This module uses `Store` as the source of truth for all data. The `theories` +//! and `instances` HashMaps are TRANSITIONAL: they maintain runtime objects +//! needed for elaboration until the full GeologMeta-based elaboration is complete. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::rc::Rc; + +use crate::ast; +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::elaborate::{Env, ElaborationContext, InstanceElaborationResult, elaborate_instance_ctx, elaborate_theory}; +use crate::id::{NumericId, Slid}; +use crate::store::Store; + +// Re-export InstanceEntry from elaborate for backwards compatibility +pub use crate::elaborate::InstanceEntry; + +/// REPL state - backed by Store with transitional runtime objects. +/// +/// The `store` is the source of truth for persistence and version control. +/// The `theories` and `instances` HashMaps are transitional: they hold +/// runtime objects needed for elaboration until we complete the migration +/// to fully GeologMeta-based elaboration. +pub struct ReplState { + /// The append-only store (source of truth for persistence) + pub store: Store, + + /// TRANSITIONAL: Runtime theories for elaboration + /// Will be removed once elaboration writes directly to Store + pub theories: HashMap>, + + /// TRANSITIONAL: Runtime instances for elaboration and display + /// Will be removed once elaboration and display use Store directly + pub instances: HashMap, + + /// Multi-line input buffer + pub input_buffer: String, + + /// Bracket depth for multi-line detection + pub bracket_depth: i32, +} + +impl Default for ReplState { + fn default() -> Self { + Self::new() + } +} + +impl ReplState { + /// Create a new REPL state with empty store + pub fn new() -> Self { + Self { + store: Store::new(), + theories: HashMap::new(), + instances: HashMap::new(), + input_buffer: String::new(), + bracket_depth: 0, + } + } + + /// Create a new REPL state with a persistence path + /// + /// If the path exists, loads the persisted Store and reconstructs runtime objects. + pub fn with_path(path: impl Into) -> Self { + let path = path.into(); + let store = Store::load_or_create(&path); + + // Reconstruct theories from persisted GeologMeta + let theories = store.reconstruct_all_theories(); + + // Reconstruct instances from persisted GeologMeta + let reconstructed = store.reconstruct_all_instances(); + let instances: HashMap = reconstructed + .into_iter() + .map(|(name, ri)| { + // For now, use theory_name as theory_type too + // TODO: Store full theory_type in GeologMeta for proper reconstruction + let theory_type = ri.theory_name.clone(); + let mut entry = InstanceEntry::new(ri.structure, ri.theory_name, theory_type); + // Populate element names + for (slid, elem_name) in ri.element_names { + entry.register_element(elem_name, slid); + } + (name, entry) + }) + .collect(); + + Self { + store, + theories, + instances, + input_buffer: String::new(), + bracket_depth: 0, + } + } + + /// Reset to initial state (clear all theories and instances) + pub fn reset(&mut self) { + self.store = Store::new(); + self.theories.clear(); + self.instances.clear(); + self.input_buffer.clear(); + self.bracket_depth = 0; + } + + /// Get a structure by instance name + pub fn get_structure(&self, name: &str) -> Option<&Structure> { + self.instances.get(name).map(|e| &e.structure) + } + + /// Check if the state has uncommitted changes + pub fn is_dirty(&self) -> bool { + self.store.is_dirty() + } + + /// Commit current changes to the store + pub fn commit(&mut self, message: Option<&str>) -> Result { + self.store.commit(message) + } + + /// Get commit history + pub fn commit_history(&self) -> Vec { + self.store.commit_history() + } + + /// Process a line of input, handling multi-line bracket matching + pub fn process_line(&mut self, line: &str) -> InputResult { + let trimmed = line.trim(); + + // Empty line while buffering - submit incomplete input (will error) + if trimmed.is_empty() { + if self.input_buffer.is_empty() { + return InputResult::Empty; + } + // Double-empty-line to force submit + return InputResult::Incomplete; + } + + // Meta-command (only at start, not in continuation) + if trimmed.starts_with(':') && self.input_buffer.is_empty() { + return InputResult::MetaCommand(MetaCommand::parse(trimmed)); + } + + // Accumulate geolog input + if !self.input_buffer.is_empty() { + self.input_buffer.push('\n'); + } + self.input_buffer.push_str(line); + + // Update bracket depth (ignoring brackets in strings/comments) + for ch in line.chars() { + match ch { + '{' | '(' | '[' => self.bracket_depth += 1, + '}' | ')' | ']' => self.bracket_depth -= 1, + _ => {} + } + } + + // Complete when brackets balanced and line ends with } or ; + if self.bracket_depth <= 0 && (trimmed.ends_with('}') || trimmed.ends_with(';')) { + let input = std::mem::take(&mut self.input_buffer); + self.bracket_depth = 0; + InputResult::GeologInput(input) + } else { + InputResult::Incomplete + } + } + + /// Force submit current buffer (for Ctrl-D or double-empty-line) + pub fn force_submit(&mut self) -> Option { + if self.input_buffer.is_empty() { + None + } else { + self.bracket_depth = 0; + Some(std::mem::take(&mut self.input_buffer)) + } + } + + /// Execute geolog source code (theory or instance definitions) + /// + /// Returns a list of results, one for each declaration processed. + pub fn execute_geolog(&mut self, source: &str) -> Result, String> { + // Parse the input + let file = crate::parse(source).map_err(|e| format!("Parse error: {}", e))?; + + let mut results = Vec::new(); + + for decl in &file.declarations { + match &decl.node { + ast::Declaration::Namespace(name) => { + results.push(ExecuteResult::Namespace(name.clone())); + } + ast::Declaration::Theory(t) => { + // Check for duplicate theory name + if self.theories.contains_key(&t.name) { + return Err(format!( + "Theory '{}' already exists. Use a different name or :reset to clear.", + t.name + )); + } + + // TRANSITIONAL: Build an Env from self.theories for elaborate_theory + let mut env = Env::new(); + for (name, theory) in &self.theories { + env.theories.insert(name.clone(), theory.clone()); + } + + let elab = elaborate_theory(&mut env, t) + .map_err(|e| format!("Elaboration error: {}", e))?; + + let name = elab.theory.name.clone(); + let num_sorts = elab.theory.signature.sorts.len(); + let num_functions = elab.theory.signature.functions.len(); + let num_relations = elab.theory.signature.relations.len(); + let num_axioms = elab.theory.axioms.len(); + + // Register in Store with full signature + let theory_slid = self.store.create_theory(&name)?; + let sig_result = self.store.persist_signature(theory_slid, &elab.theory.signature)?; + + // Persist axioms + self.store.persist_axioms( + theory_slid, + &elab.theory.axioms, + &elab.theory.axiom_names, + &sig_result, + )?; + + // Store in transitional HashMap (will be removed once we query Store directly) + self.theories.insert(name.clone(), Rc::new(elab)); + + results.push(ExecuteResult::Theory { + name, + num_sorts, + num_functions, + num_relations, + num_axioms, + }); + } + ast::Declaration::Instance(inst) => { + // Check for duplicate instance name + if self.instances.contains_key(&inst.name) { + return Err(format!( + "Instance '{}' already exists. Use a different name or :reset to clear.", + inst.name + )); + } + + // Use the elaboration that works with our transitional state + // If totality check fails, try again with partial elaboration + let (elab_result, is_partial) = match self.elaborate_instance_internal(inst) { + Ok(result) => (result, false), + Err(e) if e.contains("partial function") => { + // Retry with partial elaboration + eprintln!("Note: Instance has partial functions, allowing for chase to complete them"); + let result = self.elaborate_instance_partial(inst) + .map_err(|e| format!("Elaboration error: {}", e))?; + (result, true) + } + Err(e) => return Err(format!("Elaboration error: {}", e)), + }; + let _ = is_partial; // Used for logging/warnings + + let instance_name = inst.name.clone(); + let theory_name = type_expr_to_theory_name(&inst.theory); + let theory_type = type_expr_to_full_string(&inst.theory); + let num_elements = elab_result.structure.len(); + + // Build InstanceEntry with element names from elaboration + // This includes BOTH imported elements AND locally declared elements + let mut entry = InstanceEntry::new(elab_result.structure, theory_name.clone(), theory_type); + + // Copy nested instance metadata for cross-instance references + entry.nested_meta = elab_result.nested_meta; + + // Register ALL element names from elaboration result + for (slid, elem_name) in elab_result.slid_to_name { + entry.register_element(elem_name.clone(), slid); + + // Register local (non-qualified) names in store's naming index + // Only register names that don't contain '/' (local to this instance) + if !elem_name.contains('/') { + let luid = entry.structure.get_luid(slid); + if let Some(uuid) = self.store.universe.get(luid) { + self.store.naming.insert( + uuid, + vec![instance_name.clone(), elem_name.clone()], + ); + } + } + } + + // Register in Store and persist instance data + if let Some((theory_slid, _)) = self.store.resolve_name(&theory_name) { + let instance_slid = self.store.create_instance(&instance_name, theory_slid)?; + + // Build element name map (Slid -> String) for persistence + let elem_names: HashMap = entry + .slid_to_name + .iter() + .map(|(&slid, name)| (slid, name.clone())) + .collect(); + + // Persist all instance data to GeologMeta + self.store.persist_instance_data( + instance_slid, + theory_slid, + &entry.structure, + &elem_names, + )?; + } + + // Store in transitional HashMap + self.instances.insert(instance_name.clone(), entry); + + results.push(ExecuteResult::Instance { + name: instance_name, + theory_name, + num_elements, + }); + } + ast::Declaration::Query(q) => { + let result = self.execute_query(q)?; + results.push(ExecuteResult::Query(result)); + } + } + } + + // Return all results + if results.is_empty() { + Err("No declarations found".to_string()) + } else { + Ok(results) + } + } + + /// Internal instance elaboration that works with our transitional state + fn elaborate_instance_internal(&mut self, inst: &ast::InstanceDecl) -> Result { + // Build elaboration context from our state + let mut ctx = ElaborationContext { + theories: &self.theories, + instances: &self.instances, + universe: &mut self.store.universe, + siblings: HashMap::new(), + }; + + elaborate_instance_ctx(&mut ctx, inst) + .map_err(|e| e.to_string()) + } + + /// Internal instance elaboration that skips totality validation. + /// Use this for instances that will be completed by the chase algorithm. + pub fn elaborate_instance_partial(&mut self, inst: &ast::InstanceDecl) -> Result { + use crate::elaborate::elaborate_instance_ctx_partial; + + // Build elaboration context from our state + let mut ctx = ElaborationContext { + theories: &self.theories, + instances: &self.instances, + universe: &mut self.store.universe, + siblings: HashMap::new(), + }; + + elaborate_instance_ctx_partial(&mut ctx, inst) + .map_err(|e| e.to_string()) + } + + /// Execute a query: find an instance satisfying the goal type. + /// + /// For a query like `query q { ? : ExampleNet problem0 Solution instance; }`: + /// 1. Parse the goal type to get theory name and type arguments + /// 2. Look up the theory and param instances + /// 3. Build a base structure with imported elements from param instances + /// 4. Run the solver to find a satisfying extension + fn execute_query(&mut self, q: &ast::QueryDecl) -> Result { + use crate::solver::{query, Budget, EnumerationResult}; + + let start = std::time::Instant::now(); + + // The goal should be an Instance type: tokens ending with `instance` + if !q.goal.is_instance() { + return Err("Query goal must be an instance type (e.g., `T instance`)".to_string()); + } + let inner_type = q.goal.instance_inner() + .ok_or_else(|| "Failed to extract inner type from instance".to_string())?; + + // Resolve the instance type to get theory name and arguments + let resolved = self.resolve_query_type(&inner_type)?; + let theory = self.theories.get(&resolved.theory_name) + .ok_or_else(|| format!("Unknown theory: {}", resolved.theory_name))? + .clone(); + + // Build base structure from param instances + let (base_structure, universe) = self.build_query_base(&resolved, &theory)?; + + // Run the solver + let budget = Budget::new(5000, 10000); // 5 second timeout, 10k step limit + let result = query(base_structure, universe, theory.clone(), budget); + + let time_ms = start.elapsed().as_secs_f64() * 1000.0; + + match result { + EnumerationResult::Found { model, .. } => Ok(QueryResult::Found { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + model, + time_ms, + }), + EnumerationResult::Unsat { .. } => Ok(QueryResult::Unsat { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + time_ms, + }), + EnumerationResult::Incomplete { reason, .. } => Ok(QueryResult::Incomplete { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + reason, + time_ms, + }), + } + } + + /// Resolve a query goal type expression to get theory name and param bindings. + fn resolve_query_type(&self, ty: &ast::TypeExpr) -> Result { + use crate::ast::TypeToken; + + // Collect all path tokens from the type expression + let all_paths: Vec = ty.tokens.iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + if all_paths.is_empty() { + return Err("Query type has no path components".to_string()); + } + + // Simple case: just one path + if all_paths.len() == 1 { + return Ok(ResolvedQueryType { + theory_name: all_paths[0].clone(), + arguments: vec![], + }); + } + + // Multiple paths: rightmost is theory name, rest are args + let theory_name = all_paths.last().unwrap().clone(); + let args: Vec = all_paths[..all_paths.len() - 1].to_vec(); + + // Look up theory to match params + let theory = self.theories.get(&theory_name) + .ok_or_else(|| format!("Unknown theory: {}", theory_name))?; + + if args.len() != theory.params.len() { + return Err(format!( + "Theory {} expects {} parameters, got {}", + theory_name, theory.params.len(), args.len() + )); + } + + let arguments: Vec<(String, String)> = theory.params + .iter() + .zip(args.iter()) + .map(|(param, arg)| (param.name.clone(), arg.clone())) + .collect(); + + Ok(ResolvedQueryType { + theory_name, + arguments, + }) + } + + /// Build a base structure for a query by importing elements from param instances. + fn build_query_base( + &self, + resolved: &ResolvedQueryType, + theory: &Rc, + ) -> Result<(Structure, crate::universe::Universe), String> { + use crate::core::FunctionDomainInfo; + + let sig = &theory.theory.signature; + let mut structure = Structure::new(sig.sorts.len()); + let mut universe = crate::universe::Universe::new(); + + // Initialize relation storage + let relation_arities: Vec = sig.relations + .iter() + .map(|rel| rel.domain.arity()) + .collect(); + structure.init_relations(&relation_arities); + + // Track imported UUIDs to avoid duplicates across params + let mut imported_uuids = std::collections::HashSet::new(); + // Track Luid -> new Slid mapping for importing function values later + let mut luid_to_new_slid: std::collections::HashMap = std::collections::HashMap::new(); + + // Import elements from each param instance + for (param_name, instance_name) in &resolved.arguments { + let param_entry = self.instances.get(instance_name) + .ok_or_else(|| format!("Unknown instance: {}", instance_name))?; + + let param_theory = self.theories.get(¶m_entry.theory_name) + .ok_or_else(|| format!("Unknown theory: {}", param_entry.theory_name))?; + + // Import each element from the param instance + for &slid in param_entry.slid_to_name.keys() { + let param_sort_id = param_entry.structure.sorts[slid.index()]; + let param_sort_name = ¶m_theory.theory.signature.sorts[param_sort_id]; + + // Try different mappings for the local sort name. + // The sort might be: + // 1. param_name/param_sort_name (e.g., "N/P" for a PetriNet param) + // 2. Just param_sort_name if it already has a prefix (for nested params) + let local_sort_id = if let Some(id) = sig.lookup_sort(&format!("{}/{}", param_name, param_sort_name)) { + id + } else if let Some(id) = sig.lookup_sort(param_sort_name) { + // The sort might already be prefixed from an earlier param in the chain + // (e.g., "N/P" in problem0 should map to "N/P" in Solution, not "RP/N/P") + id + } else { + // Sort not found - skip this element (might be from a nested instance + // that will be imported separately or doesn't map to this theory) + continue; + }; + + // Get the existing Luid and its Uuid + let luid = param_entry.structure.get_luid(slid); + let uuid = self.store.universe.get(luid) + .ok_or_else(|| format!("No Uuid for Luid {:?}", luid))?; + + // Skip if already imported from an earlier param + if imported_uuids.contains(&uuid) { + continue; + } + imported_uuids.insert(uuid); + + // Register in our new universe and add element + let new_luid = universe.intern(uuid); + let local_slid = structure.add_element_with_luid(new_luid, local_sort_id); + luid_to_new_slid.insert(luid, local_slid); + } + + // Import elements from nested structures (e.g., initial_marking, target_marking in ReachabilityProblem) + for (nested_name, nested_struct) in ¶m_entry.structure.nested { + // Nested structure elements have sorts like "initial_marking/token" in the param theory + // They map to sorts like "RP/initial_marking/token" in the target theory + for slid_idx in 0..nested_struct.sorts.len() { + let slid = crate::id::Slid::from_usize(slid_idx); + let _nested_sort_id = nested_struct.sorts[slid_idx]; + + // Get sort name from the nested theory (we don't have it directly, so reconstruct) + // The nested structure sorts are indexed locally starting from 0 + // We need to find the corresponding sort name in the target theory + let nested_sort_prefix = format!("{}/{}", param_name, nested_name); + + // Try to find a sort in the target theory that matches this nested element + let local_sort_id = sig.sorts.iter().position(|s| { + s.starts_with(&nested_sort_prefix) + }); + + if let Some(local_sort_id) = local_sort_id { + // Get the Luid and Uuid + let luid = nested_struct.get_luid(slid); + if let Some(uuid) = self.store.universe.get(luid) + && !imported_uuids.contains(&uuid) { + imported_uuids.insert(uuid); + let new_luid = universe.intern(uuid); + let local_slid = structure.add_element_with_luid(new_luid, local_sort_id); + luid_to_new_slid.insert(luid, local_slid); + } + } + } + } + } + + // Initialize function storage + let domains: Vec = sig.functions + .iter() + .map(|func| match &func.domain { + DerivedSort::Base(id) => FunctionDomainInfo::Base(*id), + DerivedSort::Product(fields) => { + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, + }) + .collect(); + FunctionDomainInfo::Product(field_sorts) + } + }) + .collect(); + structure.init_functions_full(&domains); + + // Import function values from param instances + for (param_name, instance_name) in &resolved.arguments { + let param_entry = self.instances.get(instance_name).unwrap(); + let param_theory = self.theories.get(¶m_entry.theory_name).unwrap(); + let param_sig = ¶m_theory.theory.signature; + + // For each function in the param theory + for (param_func_id, param_func) in param_sig.functions.iter().enumerate() { + // Find the corresponding function in the target theory + // It should be named "{param_name}/{func_name}" + let target_func_name = format!("{}/{}", param_name, param_func.name); + let target_func_id = sig.functions.iter().position(|f| f.name == target_func_name); + + if let Some(target_func_id) = target_func_id { + // Copy function values + // Iterate over all elements in the domain of the param function + if let DerivedSort::Base(domain_sort) = ¶m_func.domain { + // Get all elements of the domain sort in the param instance + for (idx, &sort_id) in param_entry.structure.sorts.iter().enumerate() { + if sort_id == *domain_sort { + let domain_slid = Slid::from_usize(idx); + let domain_sort_slid = param_entry.structure.sort_local_id(domain_slid); + + // Get the function value in the param instance + if let Some(codomain_slid) = param_entry.structure.get_function(param_func_id, domain_sort_slid) { + // Map both domain and codomain to new Slids + let domain_luid = param_entry.structure.get_luid(domain_slid); + let codomain_luid = param_entry.structure.get_luid(codomain_slid); + + if let (Some(&new_domain_slid), Some(&new_codomain_slid)) = + (luid_to_new_slid.get(&domain_luid), luid_to_new_slid.get(&codomain_luid)) + { + // Define the function value in the new structure + let _ = structure.define_function(target_func_id, new_domain_slid, new_codomain_slid); + } + } + } + } + } + } + } + } + + Ok((structure, universe)) + } + + /// List all theories (runtime + persisted) + pub fn list_theories(&self) -> Vec { + use crate::store::BindingKind; + use std::collections::HashSet; + + let mut result: Vec = self.theories + .iter() + .map(|(name, theory)| TheoryInfo { + name: name.clone(), + num_sorts: theory.theory.signature.sorts.len(), + num_functions: theory.theory.signature.functions.len(), + num_relations: theory.theory.signature.relations.len(), + num_axioms: theory.theory.axioms.len(), + }) + .collect(); + + // Add persisted theories that aren't in runtime + let runtime_names: HashSet<_> = self.theories.keys().cloned().collect(); + for (name, kind, slid) in self.store.list_bindings() { + if kind == BindingKind::Theory && !runtime_names.contains(&name) { + // Query the Store for theory structure + let sorts = self.store.query_theory_sorts(slid); + let funcs = self.store.query_theory_funcs(slid); + let rels = self.store.query_theory_rels(slid); + let axioms = self.store.query_theory_sequents(slid); + result.push(TheoryInfo { + name, + num_sorts: sorts.len(), + num_functions: funcs.len(), + num_relations: rels.len(), + num_axioms: axioms.len(), + }); + } + } + + result + } + + /// List all instances (runtime + persisted) + pub fn list_instances(&self) -> Vec { + use crate::store::BindingKind; + use std::collections::HashSet; + + let mut result: Vec = self.instances + .iter() + .map(|(name, entry)| InstanceInfo { + name: name.clone(), + theory_name: entry.theory_name.clone(), + num_elements: entry.structure.len(), + }) + .collect(); + + // Add persisted instances that aren't in runtime + let runtime_names: HashSet<_> = self.instances.keys().cloned().collect(); + for (name, kind, _slid) in self.store.list_bindings() { + if kind == BindingKind::Instance && !runtime_names.contains(&name) { + result.push(InstanceInfo { + name, + theory_name: "(persisted)".to_string(), // Would need query to get + num_elements: 0, // Unknown + }); + } + } + + result + } + + /// Inspect a theory or instance by name + pub fn inspect(&self, name: &str) -> Option { + // Check theories first + if let Some(theory) = self.theories.get(name) { + return Some(InspectResult::Theory(TheoryDetail { + name: name.to_string(), + params: theory + .params + .iter() + .map(|p| (p.name.clone(), p.theory_name.clone())) + .collect(), + sorts: theory.theory.signature.sorts.clone(), + functions: theory + .theory + .signature + .functions + .iter() + .map(|f| { + let domain = format_derived_sort(&f.domain, &theory.theory.signature); + let codomain = format_derived_sort(&f.codomain, &theory.theory.signature); + (f.name.clone(), domain, codomain) + }) + .collect(), + relations: theory + .theory + .signature + .relations + .iter() + .map(|r| { + let domain = format_derived_sort(&r.domain, &theory.theory.signature); + (r.name.clone(), domain) + }) + .collect(), + instance_fields: theory + .theory + .signature + .instance_fields + .iter() + .map(|f| (f.name.clone(), f.theory_type.clone())) + .collect(), + axioms: theory + .theory + .axioms + .iter() + .map(|ax| format_axiom(ax, &theory.theory.signature)) + .collect(), + })); + } + + // Check instances + if let Some(entry) = self.instances.get(name) { + let theory = self.theories.get(&entry.theory_name)?; + + // Collect nested instance info + let nested: Vec<(String, usize)> = entry + .structure + .nested + .iter() + .map(|(field_name, nested_struct)| { + (field_name.clone(), nested_struct.len()) + }) + .collect(); + + return Some(InspectResult::Instance(InstanceDetail { + name: name.to_string(), + theory_name: entry.theory_name.clone(), + elements: self.collect_elements(entry, &theory.theory.signature), + functions: self.collect_function_values(entry, &theory.theory.signature), + relations: self.collect_relation_tuples(entry, &theory.theory.signature), + nested, + })); + } + + None + } + + /// Collect elements grouped by sort + fn collect_elements( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec)> { + let mut result = Vec::new(); + for (sort_id, sort_name) in signature.sorts.iter().enumerate() { + let elements: Vec = entry + .structure + .carriers[sort_id] + .iter() + .map(|slid_u64| { + let slid = Slid::from_usize(slid_u64 as usize); + entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)) + }) + .collect(); + if !elements.is_empty() { + result.push((sort_name.clone(), elements)); + } + } + result + } + + /// Collect function values as "domain func = codomain" + fn collect_function_values( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec)> { + use crate::core::FunctionColumn; + + let mut result = Vec::new(); + for (func_id, func_sym) in signature.functions.iter().enumerate() { + if func_id >= entry.structure.functions.len() { + continue; + } + let mut values = Vec::new(); + + match &func_sym.domain { + DerivedSort::Base(domain_sort_id) => { + // Check if this is a product codomain function + if let FunctionColumn::ProductCodomain { field_columns, field_names, .. } = + &entry.structure.functions[func_id] + { + // Product codomain: format as `domain func = [field1: v1, ...]` + for slid_u64 in entry.structure.carriers[*domain_sort_id].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = entry.structure.sort_local_id(slid); + let idx = sort_slid.index(); + + // Check if all fields are defined for this element + let all_defined = field_columns.iter().all(|col| { + col.get(idx) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_some() + }); + + if all_defined { + let domain_name = entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)); + + let field_strs: Vec = field_names + .iter() + .zip(field_columns.iter()) + .map(|(name, col)| { + let codomain_slid = crate::id::get_slid(col[idx]).unwrap(); + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + format!("{}: {}", name, codomain_name) + }) + .collect(); + + values.push(format!( + "{} {} = [{}]", + domain_name, func_sym.name, field_strs.join(", ") + )); + } + } + } else { + // Base codomain: iterate over carrier elements + for slid_u64 in entry.structure.carriers[*domain_sort_id].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = entry.structure.sort_local_id(slid); + if let Some(codomain_slid) = + entry.structure.get_function(func_id, sort_slid) + { + let domain_name = entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)); + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + values.push(format!( + "{} {} = {}", + domain_name, func_sym.name, codomain_name + )); + } + } + } + } + DerivedSort::Product(fields) => { + // Product domain: iterate over defined entries in storage + if let FunctionColumn::ProductLocal { storage, .. } = + &entry.structure.functions[func_id] + { + for (tuple_indices, codomain_slid) in storage.iter_defined() { + // Map sort-local indices back to Slids for name lookup + let tuple_strs: Vec = tuple_indices + .iter() + .zip(fields.iter()) + .map(|(&local_idx, (field_name, field_sort))| { + // Get the Slid at this sort-local position + let slid = if let DerivedSort::Base(sort_id) = field_sort { + entry.structure.carriers[*sort_id] + .iter() + .nth(local_idx) + .map(|u| Slid::from_usize(u as usize)) + } else { + None + }; + + let elem_name = slid + .and_then(|s| entry.get_name(s).map(|n| n.to_string())) + .unwrap_or_else(|| format!("#{}", local_idx)); + + format!("{}: {}", field_name, elem_name) + }) + .collect(); + + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + + values.push(format!( + "[{}] {} = {}", + tuple_strs.join(", "), + func_sym.name, + codomain_name + )); + } + } + } + } + + if !values.is_empty() { + result.push((func_sym.name.clone(), values)); + } + } + result + } + + /// Collect relation tuples as vectors of element names + fn collect_relation_tuples( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec, Vec>)> { + let mut result = Vec::new(); + + for (rel_id, rel_sym) in signature.relations.iter().enumerate() { + if rel_id >= entry.structure.relations.len() { + continue; + } + + // Extract field names from the relation's domain type + let field_names: Vec = match &rel_sym.domain { + crate::core::DerivedSort::Base(_) => vec![], // Unary relation, no field names + crate::core::DerivedSort::Product(fields) => { + fields.iter().map(|(name, _)| name.clone()).collect() + } + }; + + let relation = &entry.structure.relations[rel_id]; + let mut tuples: Vec> = Vec::new(); + + for tuple in relation.iter() { + let tuple_names: Vec = tuple + .iter() + .map(|&slid| { + entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid)) + }) + .collect(); + tuples.push(tuple_names); + } + + if !tuples.is_empty() { + result.push((rel_sym.name.clone(), field_names, tuples)); + } + } + + result + } + + /// Execute a query on an instance. + /// + /// Returns all elements of the given sort in the instance. + pub fn query_sort(&self, instance_name: &str, sort_name: &str) -> Result, String> { + // Get the instance + let entry = self.instances.get(instance_name) + .ok_or_else(|| format!("Instance '{}' not found", instance_name))?; + + // Get the theory + let theory = self.theories.get(&entry.theory_name) + .ok_or_else(|| format!("Theory '{}' not found", entry.theory_name))?; + + // Find the sort index + let sort_idx = theory.theory.signature.sorts + .iter() + .position(|s| s == sort_name) + .ok_or_else(|| format!("Sort '{}' not found in theory '{}'", sort_name, entry.theory_name))?; + + // Use the query backend to scan all elements + use crate::query::{QueryOp, execute}; + + let plan = QueryOp::Scan { sort_idx }; + let result = execute(&plan, &entry.structure); + + // Convert results to element names + let elements: Vec = result.iter() + .filter_map(|(tuple, _)| tuple.first()) + .map(|&slid| { + entry.get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid)) + }) + .collect(); + + Ok(elements) + } +} + +/// Helper to extract theory name from a type expression +/// +/// For parameterized types like `ExampleNet Trace`, the theory is the rightmost +/// path element, not the first argument. +fn type_expr_to_theory_name(type_expr: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + // Handle special cases first + if type_expr.is_sort() { + return "Sort".to_string(); + } + if type_expr.is_prop() { + return "Prop".to_string(); + } + + // For instance types, recurse on the inner type + if let Some(inner) = type_expr.instance_inner() { + return type_expr_to_theory_name(&inner); + } + + // Find the last path token - that's the theory name + for token in type_expr.tokens.iter().rev() { + if let TypeToken::Path(path) = token { + return path.segments.join("/"); + } + } + + // Fallback for arrows, records, etc. + if type_expr.tokens.iter().any(|t| matches!(t, TypeToken::Arrow)) { + return "Arrow".to_string(); + } + if type_expr.as_record().is_some() { + return "Record".to_string(); + } + + "Unknown".to_string() +} + +/// Convert a type expression to its full string representation. +/// E.g., tokens [Path(ExampleNet), Path(problem0), Path(Solution)] -> "ExampleNet problem0 Solution" +fn type_expr_to_full_string(type_expr: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + let mut parts: Vec = vec![]; + + for token in &type_expr.tokens { + match token { + TypeToken::Sort => parts.push("Sort".to_string()), + TypeToken::Prop => parts.push("Prop".to_string()), + TypeToken::Path(path) => parts.push(path.segments.join("/")), + TypeToken::Arrow => parts.push("->".to_string()), + TypeToken::Instance => parts.push("instance".to_string()), + TypeToken::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, ty)| format!("{}: {}", name, type_expr_to_full_string(ty))) + .collect(); + parts.push(format!("[{}]", field_strs.join(", "))); + } + } + } + + parts.join(" ") +} + +/// Format a DerivedSort as a string using sort names from the signature +fn format_derived_sort(ds: &DerivedSort, sig: &crate::core::Signature) -> String { + match ds { + DerivedSort::Base(sort_id) => sig + .sorts + .get(*sort_id) + .cloned() + .unwrap_or_else(|| format!("Sort#{}", sort_id)), + DerivedSort::Product(fields) => { + if fields.is_empty() { + "Unit".to_string() + } else { + let field_strs: Vec = fields + .iter() + .map(|(name, ds)| format!("{}: {}", name, format_derived_sort(ds, sig))) + .collect(); + format!("[{}]", field_strs.join(", ")) + } + } + } +} + +/// Format a core::Sequent (axiom) for display +fn format_axiom(ax: &crate::core::Sequent, sig: &crate::core::Signature) -> AxiomDetail { + let context: Vec<(String, String)> = ax + .context + .vars + .iter() + .map(|(name, sort)| (name.clone(), format_derived_sort(sort, sig))) + .collect(); + let premise = format_core_formula(&ax.premise, sig); + let conclusion = format_core_formula(&ax.conclusion, sig); + AxiomDetail { + context, + premise, + conclusion, + } +} + +/// Format a core::Term for display +fn format_core_term(term: &crate::core::Term, sig: &crate::core::Signature) -> String { + match term { + crate::core::Term::Var(name, _) => name.clone(), + crate::core::Term::App(func_id, arg) => { + let func_name = sig + .functions + .get(*func_id) + .map(|f| f.name.clone()) + .unwrap_or_else(|| format!("func#{}", func_id)); + format!("{} {}", format_core_term(arg, sig), func_name) + } + crate::core::Term::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, t)| format!("{}: {}", name, format_core_term(t, sig))) + .collect(); + format!("[{}]", field_strs.join(", ")) + } + crate::core::Term::Project(base, field) => { + format!("{} .{}", format_core_term(base, sig), field) + } + } +} + +/// Format a core::Formula for display +fn format_core_formula(formula: &crate::core::Formula, sig: &crate::core::Signature) -> String { + match formula { + crate::core::Formula::True => "true".to_string(), + crate::core::Formula::False => "false".to_string(), + crate::core::Formula::Eq(lhs, rhs) => { + format!( + "{} = {}", + format_core_term(lhs, sig), + format_core_term(rhs, sig) + ) + } + crate::core::Formula::Rel(rel_id, arg) => { + let rel_name = sig + .relations + .get(*rel_id) + .map(|r| r.name.clone()) + .unwrap_or_else(|| format!("rel#{}", rel_id)); + format!("{} {}", format_core_term(arg, sig), rel_name) + } + crate::core::Formula::Conj(conjuncts) => { + if conjuncts.is_empty() { + "true".to_string() + } else { + conjuncts + .iter() + .map(|f| format_core_formula(f, sig)) + .collect::>() + .join(", ") + } + } + crate::core::Formula::Disj(disjuncts) => { + if disjuncts.is_empty() { + "false".to_string() + } else { + disjuncts + .iter() + .map(|f| { + let s = format_core_formula(f, sig); + if matches!( + f, + crate::core::Formula::Conj(_) | crate::core::Formula::Disj(_) + ) { + format!("({})", s) + } else { + s + } + }) + .collect::>() + .join(" \\/ ") + } + } + crate::core::Formula::Exists(name, sort, body) => { + format!( + "(exists {} : {}. {})", + name, + format_derived_sort(sort, sig), + format_core_formula(body, sig) + ) + } + } +} + +/// Resolved query type with theory name and argument bindings. +struct ResolvedQueryType { + /// The base theory name (e.g., "Solution") + theory_name: String, + /// Param bindings: (param_name, instance_name) pairs + /// e.g., [("N", "ExampleNet"), ("RP", "problem0")] + arguments: Vec<(String, String)>, +} + +/// Result of processing a line of input +#[derive(Debug)] +pub enum InputResult { + MetaCommand(MetaCommand), + GeologInput(String), + Incomplete, + Empty, +} + +/// Meta-commands supported by the REPL +#[derive(Debug)] +pub enum MetaCommand { + Help(Option), + Quit, + List(ListTarget), + Inspect(String), + Clear, + Reset, + Source(PathBuf), + /// Commit current changes with optional message + Commit(Option), + /// Show commit history + History, + /// Add element to instance: `:add ` + Add { instance: String, element: String, sort: String }, + /// Assert relation tuple: `:assert ` + Assert { instance: String, relation: String, args: Vec }, + /// Retract element from instance: `:retract ` + Retract { instance: String, element: String }, + /// Query instance: `:query [filter conditions]` + Query { instance: String, sort: String }, + /// Explain query plan: `:explain ` + Explain { instance: String, sort: String }, + /// Compile query to RelAlgIR: `:compile ` + Compile { instance: String, sort: String }, + /// Solve: find an instance of a theory using the geometric logic solver + /// `:solve [budget_ms]` + Solve { theory: String, budget_ms: Option }, + /// Extend: find extensions of an existing instance to a (larger) theory + /// `:extend [budget_ms]` + Extend { instance: String, theory: String, budget_ms: Option }, + /// Chase: run chase algorithm on instance to compute derived relations/functions + /// `:chase [max_iterations]` + Chase { instance: String, max_iterations: Option }, + Unknown(String), +} + +impl MetaCommand { + pub fn parse(input: &str) -> Self { + let input = input.trim_start_matches(':').trim(); + let mut parts = input.split_whitespace(); + let cmd = parts.next().unwrap_or(""); + let arg = parts.next(); + + match cmd { + "help" | "h" | "?" => MetaCommand::Help(arg.map(String::from)), + "quit" | "q" | "exit" => MetaCommand::Quit, + "list" | "ls" | "l" => { + let target = match arg { + Some("theories" | "theory" | "t") => ListTarget::Theories, + Some("instances" | "instance" | "i") => ListTarget::Instances, + _ => ListTarget::All, + }; + MetaCommand::List(target) + } + "inspect" | "i" | "show" => { + if let Some(name) = arg { + MetaCommand::Inspect(name.to_string()) + } else { + MetaCommand::Unknown(":inspect requires a name".to_string()) + } + } + "clear" | "cls" => MetaCommand::Clear, + "reset" => MetaCommand::Reset, + "source" | "load" => { + if let Some(path) = arg { + MetaCommand::Source(PathBuf::from(path)) + } else { + MetaCommand::Unknown(":source requires a file path".to_string()) + } + } + "commit" | "ci" => { + // Collect remaining args as message + let message: Vec<&str> = parts.collect(); + let msg = if let Some(first) = arg { + let mut full_msg = first.to_string(); + for part in message { + full_msg.push(' '); + full_msg.push_str(part); + } + Some(full_msg) + } else { + None + }; + MetaCommand::Commit(msg) + } + "history" | "log" => MetaCommand::History, + "add" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 3 { + MetaCommand::Add { + instance: args[0].to_string(), + element: args[1].to_string(), + sort: args[2].to_string(), + } + } else { + MetaCommand::Unknown(":add requires ".to_string()) + } + } + "assert" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Assert { + instance: args[0].to_string(), + relation: args[1].to_string(), + args: args[2..].iter().map(|s| s.to_string()).collect(), + } + } else { + MetaCommand::Unknown(":assert requires [args...]".to_string()) + } + } + "retract" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Retract { + instance: args[0].to_string(), + element: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":retract requires ".to_string()) + } + } + "query" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Query { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":query requires ".to_string()) + } + } + "explain" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Explain { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":explain requires ".to_string()) + } + } + "compile" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Compile { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":compile requires ".to_string()) + } + } + "solve" => { + if let Some(theory_name) = arg { + // Optional budget in ms + let budget_ms = parts.next().and_then(|s| s.parse().ok()); + MetaCommand::Solve { + theory: theory_name.to_string(), + budget_ms, + } + } else { + MetaCommand::Unknown(":solve requires [budget_ms]".to_string()) + } + } + "extend" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + let budget_ms = args.get(2).and_then(|s| s.parse().ok()); + MetaCommand::Extend { + instance: args[0].to_string(), + theory: args[1].to_string(), + budget_ms, + } + } else { + MetaCommand::Unknown(":extend requires [budget_ms]".to_string()) + } + } + "chase" => { + if let Some(instance_name) = arg { + let max_iterations = parts.next().and_then(|s| s.parse().ok()); + MetaCommand::Chase { + instance: instance_name.to_string(), + max_iterations, + } + } else { + MetaCommand::Unknown(":chase requires [max_iterations]".to_string()) + } + } + other => MetaCommand::Unknown(format!("Unknown command: :{}", other)), + } + } +} + +#[derive(Debug)] +pub enum ListTarget { + Theories, + Instances, + All, +} + +#[derive(Debug)] +pub enum ExecuteResult { + Namespace(String), + Theory { + name: String, + num_sorts: usize, + num_functions: usize, + num_relations: usize, + num_axioms: usize, + }, + Instance { + name: String, + theory_name: String, + num_elements: usize, + }, + Query(QueryResult), +} + +/// Result of executing a query +#[derive(Debug)] +pub enum QueryResult { + /// Found a satisfying instance + Found { + query_name: String, + theory_name: String, + model: crate::core::Structure, + time_ms: f64, + }, + /// No solution exists + Unsat { + query_name: String, + theory_name: String, + time_ms: f64, + }, + /// Search incomplete (timeout or other reason) + Incomplete { + query_name: String, + theory_name: String, + reason: String, + time_ms: f64, + }, +} + +#[derive(Debug)] +pub struct TheoryInfo { + pub name: String, + pub num_sorts: usize, + pub num_functions: usize, + pub num_relations: usize, + pub num_axioms: usize, +} + +#[derive(Debug)] +pub struct InstanceInfo { + pub name: String, + pub theory_name: String, + pub num_elements: usize, +} + +#[derive(Debug)] +pub struct TheoryDetail { + pub name: String, + pub params: Vec<(String, String)>, + pub sorts: Vec, + pub functions: Vec<(String, String, String)>, + pub relations: Vec<(String, String)>, + /// Instance fields: (name, theory_type) + pub instance_fields: Vec<(String, String)>, + pub axioms: Vec, +} + +#[derive(Debug)] +pub struct AxiomDetail { + pub context: Vec<(String, String)>, + pub premise: String, + pub conclusion: String, +} + +#[derive(Debug)] +pub struct InstanceDetail { + pub name: String, + pub theory_name: String, + pub elements: Vec<(String, Vec)>, + pub functions: Vec<(String, Vec)>, + /// Relations: (name, field_names, list of tuples-as-element-names) + pub relations: Vec<(String, Vec, Vec>)>, + /// Nested instances: (field_name, element_count) + pub nested: Vec<(String, usize)>, +} + +#[derive(Debug)] +pub enum InspectResult { + Theory(TheoryDetail), + Instance(InstanceDetail), +} + +/// Format instance detail as geolog-like syntax +pub fn format_instance_detail(detail: &InstanceDetail) -> String { + let mut out = String::new(); + out.push_str(&format!( + "instance {} : {} = {{\n", + detail.name, detail.theory_name + )); + + for (sort_name, elements) in &detail.elements { + out.push_str(&format!(" // {} ({}):\n", sort_name, elements.len())); + for elem in elements { + out.push_str(&format!(" {} : {};\n", elem, sort_name)); + } + } + + for (func_name, values) in &detail.functions { + if !values.is_empty() { + out.push_str(&format!(" // {}:\n", func_name)); + for value in values { + out.push_str(&format!(" {};\n", value)); + } + } + } + + // Relations + for (rel_name, field_names, tuples) in &detail.relations { + if !tuples.is_empty() { + out.push_str(&format!(" // {} ({} tuples):\n", rel_name, tuples.len())); + for tuple in tuples { + if field_names.is_empty() { + // Unary relation: just the element name + out.push_str(&format!(" {} {};\n", tuple.join(", "), rel_name)); + } else { + // Multi-ary relation: format as [field1: val1, field2: val2] rel_name; + let formatted: Vec = field_names + .iter() + .zip(tuple.iter()) + .map(|(fname, val)| format!("{}: {}", fname, val)) + .collect(); + out.push_str(&format!(" [{}] {};\n", formatted.join(", "), rel_name)); + } + } + } + } + + // Nested instances + if !detail.nested.is_empty() { + out.push_str(" // Nested instances:\n"); + for (field_name, element_count) in &detail.nested { + out.push_str(&format!(" {} = {{ /* {} elements */ }};\n", field_name, element_count)); + } + } + + out.push_str("}\n"); + out +} + +/// Format theory detail +pub fn format_theory_detail(detail: &TheoryDetail) -> String { + let mut out = String::new(); + + out.push_str("theory "); + for (param_name, theory_name) in &detail.params { + if theory_name == "Sort" { + out.push_str(&format!("({} : Sort) ", param_name)); + } else { + out.push_str(&format!("({} : {} instance) ", param_name, theory_name)); + } + } + out.push_str(&format!("{} {{\n", detail.name)); + + for sort in &detail.sorts { + out.push_str(&format!(" {} : Sort;\n", sort)); + } + + for (name, domain, codomain) in &detail.functions { + out.push_str(&format!(" {} : {} -> {};\n", name, domain, codomain)); + } + + for (name, domain) in &detail.relations { + out.push_str(&format!(" {} : {} -> Prop;\n", name, domain)); + } + + for (name, theory_type) in &detail.instance_fields { + out.push_str(&format!(" {} : {} instance;\n", name, theory_type)); + } + + for axiom in &detail.axioms { + let quantified: Vec = axiom + .context + .iter() + .map(|(name, sort)| format!("{} : {}", name, sort)) + .collect(); + + if axiom.premise == "true" { + out.push_str(&format!( + " forall {}. |- {};\n", + quantified.join(", "), + axiom.conclusion + )); + } else { + out.push_str(&format!( + " forall {}. {} |- {};\n", + quantified.join(", "), + axiom.premise, + axiom.conclusion + )); + } + } + + out.push_str("}\n"); + out +} diff --git a/src/serialize.rs b/src/serialize.rs new file mode 100644 index 0000000..fbcea7b --- /dev/null +++ b/src/serialize.rs @@ -0,0 +1,294 @@ +//! Structure serialization and deserialization. +//! +//! Provides rkyv-based serialization for `Structure` with both: +//! - `save_structure` / `load_structure`: heap-allocated deserialization +//! - `load_structure_mapped`: zero-copy memory-mapped access + +use std::fs::{self, File}; +use std::io::Write; +use std::path::Path; + +use memmap2::Mmap; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::ser::Serializer; +use rkyv::{check_archived_root, Archive, Deserialize, Serialize}; + +use crate::core::{FunctionColumn, ProductStorage, RelationStorage, SortId, Structure, TupleId, VecRelation}; +use crate::id::{get_luid, get_slid, some_luid, some_slid, Luid, NumericId, Slid}; + +// ============================================================================ +// SERIALIZABLE DATA TYPES +// ============================================================================ + +/// Serializable form of a relation +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct RelationData { + pub arity: usize, + pub tuples: Vec>, + pub extent: Vec, +} + +/// Serializable form of a function column +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub enum FunctionColumnData { + Local(Vec>), + External(Vec>), + /// Product domain: maps tuples of sort-local indices to result Slid index, + /// along with the field sort IDs for reconstruction + ProductLocal { + entries: Vec<(Vec, usize)>, + field_sorts: Vec, + }, + /// Product codomain: base domain maps to multiple fields + ProductCodomain { + /// One column per field - each Vec> is indexed by domain sort-local index + field_columns: Vec>>, + field_names: Vec, + field_sorts: Vec, + domain_sort: usize, + }, +} + +/// Serializable form of a Structure +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct StructureData { + pub num_sorts: usize, + pub luids: Vec, + pub sorts: Vec, + pub functions: Vec, + pub relations: Vec, +} + +impl StructureData { + pub fn from_structure(structure: &Structure) -> Self { + let functions = structure + .functions + .iter() + .map(|func_col| match func_col { + FunctionColumn::Local(col) => FunctionColumnData::Local( + col.iter() + .map(|&opt| get_slid(opt).map(|s| s.index())) + .collect(), + ), + FunctionColumn::External(col) => FunctionColumnData::External( + col.iter() + .map(|&opt| get_luid(opt).map(|l| l.index())) + .collect(), + ), + FunctionColumn::ProductLocal { + storage, + field_sorts, + } => { + let entries: Vec<(Vec, usize)> = storage + .iter_defined() + .map(|(tuple, result)| (tuple, result.index())) + .collect(); + FunctionColumnData::ProductLocal { + entries, + field_sorts: field_sorts.clone(), + } + } + FunctionColumn::ProductCodomain { + field_columns, + field_names, + field_sorts, + domain_sort, + } => { + let serialized_columns: Vec>> = field_columns + .iter() + .map(|col| { + col.iter() + .map(|&opt| get_slid(opt).map(|s| s.index())) + .collect() + }) + .collect(); + FunctionColumnData::ProductCodomain { + field_columns: serialized_columns, + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + }) + .collect(); + + let relations = structure + .relations + .iter() + .map(|rel| RelationData { + arity: rel.arity(), + tuples: rel.tuples.clone(), + extent: rel.iter_ids().collect(), + }) + .collect(); + + Self { + num_sorts: structure.num_sorts(), + luids: structure.luids.clone(), + sorts: structure.sorts.clone(), + functions, + relations, + } + } + + pub fn to_structure(&self) -> Structure { + use crate::id::NumericId; + + let mut structure = Structure::new(self.num_sorts); + + for (slid_idx, (&luid, &sort_id)) in self.luids.iter().zip(self.sorts.iter()).enumerate() { + let added_slid = structure.add_element_with_luid(luid, sort_id); + debug_assert_eq!(added_slid, Slid::from_usize(slid_idx)); + } + + structure.functions = self + .functions + .iter() + .map(|func_data| match func_data { + FunctionColumnData::Local(col) => FunctionColumn::Local( + col.iter() + .map(|&opt| opt.map(Slid::from_usize).and_then(some_slid)) + .collect(), + ), + FunctionColumnData::External(col) => FunctionColumn::External( + col.iter() + .map(|&opt| opt.map(Luid::from_usize).and_then(some_luid)) + .collect(), + ), + FunctionColumnData::ProductLocal { + entries, + field_sorts, + } => { + let mut storage = ProductStorage::new_general(); + for (tuple, result) in entries { + storage + .set(tuple, Slid::from_usize(*result)) + .expect("no conflicts in serialized data"); + } + FunctionColumn::ProductLocal { + storage, + field_sorts: field_sorts.clone(), + } + } + FunctionColumnData::ProductCodomain { + field_columns, + field_names, + field_sorts, + domain_sort, + } => { + let restored_columns: Vec> = field_columns + .iter() + .map(|col| { + col.iter() + .map(|&opt| opt.map(Slid::from_usize).and_then(some_slid)) + .collect() + }) + .collect(); + FunctionColumn::ProductCodomain { + field_columns: restored_columns, + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + }) + .collect(); + + structure.relations = self + .relations + .iter() + .map(|rel_data| { + let mut rel = VecRelation::new(rel_data.arity); + for tuple in &rel_data.tuples { + rel.tuple_to_id.insert(tuple.clone(), rel.tuples.len()); + rel.tuples.push(tuple.clone()); + } + for &tuple_id in &rel_data.extent { + rel.extent.insert(tuple_id as u64); + } + rel + }) + .collect(); + + structure + } +} + +// ============================================================================ +// SAVE / LOAD FUNCTIONS +// ============================================================================ + +/// Save a Structure to a file +pub fn save_structure(structure: &Structure, path: &Path) -> Result<(), String> { + let data = StructureData::from_structure(structure); + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("Failed to create directory: {}", e))?; + } + + let mut serializer = AllocSerializer::<4096>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize structure: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let temp_path = path.with_extension("tmp"); + { + let mut file = + File::create(&temp_path).map_err(|e| format!("Failed to create temp file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write file: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync file: {}", e))?; + } + + fs::rename(&temp_path, path).map_err(|e| format!("Failed to rename file: {}", e))?; + + Ok(()) +} + +/// Load a Structure from a file (deserializes into heap-allocated Structure) +/// +/// Use this when you need a mutable Structure or when access patterns involve +/// heavy computation on the data. For read-only access to large structures, +/// prefer `load_structure_mapped` which is ~100-400x faster. +pub fn load_structure(path: &Path) -> Result { + let file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?; + + let mmap = unsafe { Mmap::map(&file) }.map_err(|e| format!("Failed to mmap file: {}", e))?; + + if mmap.is_empty() { + return Err("Empty structure file".to_string()); + } + + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate archive: {}", e))?; + + let data: StructureData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize structure")?; + + Ok(data.to_structure()) +} + +/// Load a Structure from a file with zero-copy access (memory-mapped) +/// +/// This is ~100-400x faster than `load_structure` for large structures because +/// it doesn't deserialize the data - it accesses the archived format directly +/// from the memory map. +/// +/// Use this for: +/// - Read-only access to large structures +/// - Fast startup when you just need to query existing data +/// - Reducing memory footprint (only the mmap exists, no heap copies) +/// +/// Trade-offs: +/// - Read-only (cannot modify the structure) +/// - Slightly different API (returns `MappedStructure` instead of `Structure`) +/// - File must remain valid for lifetime of `MappedStructure` +pub fn load_structure_mapped(path: &Path) -> Result { + crate::zerocopy::MappedStructure::open(path) +} diff --git a/src/solver/mod.rs b/src/solver/mod.rs new file mode 100644 index 0000000..3cc87cc --- /dev/null +++ b/src/solver/mod.rs @@ -0,0 +1,415 @@ +//! Solver infrastructure for instance synthesis +//! +//! This module provides the search tree and tactics for finding instances +//! of geometric theories. The architecture is designed for AI-agent-driven +//! search: the agent manipulates an explicit search tree, running automated +//! tactics for bounded time and providing strategic guidance. +//! +//! # Key Concepts +//! +//! - **Search Tree**: Explicit tree of partial models, not implicit in call stack +//! - **Partial Model**: A `Structure` where carriers can grow, functions can become +//! more defined, and relations can have more tuples asserted +//! - **Refinement**: Natural preorder on Structures (really a category of partial +//! models with refinement morphisms) +//! - **Obligation**: When an axiom's premise is satisfied but conclusion isn't, +//! we have an obligation to witness the conclusion (not a failure!) +//! - **Tactic**: Automated search strategy that runs for bounded time +//! - **Agent Loop**: AI decides which nodes to explore, provides hints, estimates +//! success probabilities, allocates resources +//! +//! # The Refinement Order +//! +//! A Structure S₁ refines to S₂ (S₁ ≤ S₂) when: +//! - All carriers in S₁ are subsets of corresponding carriers in S₂ +//! - All defined function values in S₁ are preserved in S₂ +//! - All asserted relation tuples in S₁ are preserved in S₂ +//! +//! A search node conjectures: "∃ complete, axiom-satisfying Structure above this one" +//! +//! # Obligations, Equations, and Derivations +//! +//! In geometric logic, axiom consequents are always positive (existentials, +//! disjunctions, atomic relations, equations). The refinement order on partial +//! models includes not just adding facts, but also **quotienting by equations** +//! (merging elements). This means: +//! +//! - **Obligation**: Premise satisfied, conclusion not yet witnessed → need to +//! witness. Can always potentially be done by refinement. +//! +//! - **Pending Equation**: Two terms must be equal. Resolved by merging elements +//! and propagating consequences (congruence closure). +//! +//! - **Unsat**: The ONLY way to get true unsatisfiability is if there exists a +//! **Derivation** of `⊢ False` from the instantiated axioms. This is +//! proof-theoretic: we need to actually derive False, not just have "conflicts". +//! +//! For example, "function f already maps a to b, but we need f(a) = c" is NOT +//! unsat—it's a pending equation `b = c`. We merge b and c, propagate, and only +//! if this leads to deriving False (via some axiom like `R(x), S(x) ⊢ False`) +//! do we have true unsatisfiability. +//! +//! This is essentially SMT solving with EUF (equality + uninterpreted functions) +//! plus geometric theory axioms, where the goal is to either: +//! 1. Find a complete model satisfying all axioms, or +//! 2. Derive `⊢ False` proving no such model exists +//! +//! # Unified Model Enumeration API +//! +//! The high-level API unifies `:solve` and `:query` under a common abstraction: +//! finding maximal elements of the posetal reflection of the category of models. +//! +//! - [`solve`]: Find models from scratch (base = empty structure) +//! - [`query`]: Find extensions of an existing model to a larger theory +//! - [`enumerate_models`]: Core unified function (both above are wrappers) +//! +//! ```ignore +//! // Find any model of a theory +//! let result = solve(theory, Budget::quick()); +//! +//! // Extend an existing model to satisfy additional axioms +//! let result = query(base_structure, universe, extended_theory, budget); +//! ``` + +mod tactics; +mod tree; +mod types; + +// Re-export main types +pub use tactics::{AutoTactic, Budget, CheckTactic, EnumerateFunctionTactic, ForwardChainingTactic, PropagateEquationsTactic, Tactic, TacticResult}; +pub use tree::SearchTree; +pub use types::{ + AxiomCheckResult, ConflictClause, CongruenceClosure, EquationReason, NodeDetail, NodeId, + NodeStatus, Obligation, PendingEquation, SearchNode, SearchSummary, +}; + +// Unified model enumeration API (see below) +// - enumerate_models: core unified function +// - solve: convenience for :solve (find models from scratch) +// - query: convenience for :query (extend existing models) +// - EnumerationResult: result type + +// Re-export union-find for convenience +pub use egglog_union_find::UnionFind; + +// ============================================================================ +// UNIFIED MODEL ENUMERATION API +// ============================================================================ + +use std::rc::Rc; +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::universe::Universe; + +/// Result of model enumeration. +#[derive(Debug, Clone)] +pub enum EnumerationResult { + /// Found a complete model satisfying all axioms. + Found { + /// The witness structure (model). + model: Structure, + /// Time taken in milliseconds. + time_ms: f64, + }, + /// Proved no model exists (derived False). + Unsat { + /// Time taken in milliseconds. + time_ms: f64, + }, + /// Search incomplete (budget exhausted or still has obligations). + Incomplete { + /// Partial structure so far. + partial: Structure, + /// Time taken in milliseconds. + time_ms: f64, + /// Description of why incomplete. + reason: String, + }, +} + +/// Unified model enumeration: find models of `theory` extending `base`. +/// +/// This is the core API that unifies `:solve` and `:query`: +/// - `:solve T` = `enumerate_models(empty, T, budget)` +/// - `:query M T'` = `enumerate_models(M, T', budget)` where T' extends M's theory +/// +/// # Arguments +/// - `base`: Starting structure (empty for `:solve`, existing model for `:query`) +/// - `universe`: Universe for Luid allocation (should contain Luids from base) +/// - `theory`: The theory to satisfy +/// - `budget`: Resource limits for the search +/// +/// # Returns +/// - `Found` if a complete model was found +/// - `Unsat` if no model exists (derived False) +/// - `Incomplete` if budget exhausted or search blocked +pub fn enumerate_models( + base: Structure, + universe: Universe, + theory: Rc, + budget: Budget, +) -> EnumerationResult { + let start = std::time::Instant::now(); + let sig = &theory.theory.signature; + + // Create search tree from base + let mut tree = SearchTree::from_base(theory.clone(), base, universe); + + // Initialize function and relation storage at root (if not already initialized) + // This preserves any function values that were imported from param instances. + let num_funcs = sig.functions.len(); + let num_rels = sig.relations.len(); + + // Only init functions if not already initialized (or wrong size) + if tree.nodes[0].structure.functions.len() != num_funcs { + let domain_sort_ids: Vec> = sig + .functions + .iter() + .map(|f| match &f.domain { + DerivedSort::Base(sid) => Some(*sid), + DerivedSort::Product(_) => None, + }) + .collect(); + + if tree.init_functions(0, &domain_sort_ids).is_err() { + return EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms: start.elapsed().as_secs_f64() * 1000.0, + reason: "Failed to initialize function storage".to_string(), + }; + } + } + + // Only init relations if not already initialized (or wrong size) + if tree.nodes[0].structure.relations.len() != num_rels { + let arities: Vec = sig + .relations + .iter() + .map(|r| match &r.domain { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + }) + .collect(); + + if tree.init_relations(0, &arities).is_err() { + return EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms: start.elapsed().as_secs_f64() * 1000.0, + reason: "Failed to initialize relation storage".to_string(), + }; + } + } + + // Run AutoTactic + let result = AutoTactic.run(&mut tree, 0, &budget); + let time_ms = start.elapsed().as_secs_f64() * 1000.0; + + match result { + TacticResult::Solved => EnumerationResult::Found { + model: tree.nodes[0].structure.clone(), + time_ms, + }, + TacticResult::Unsat(_) => EnumerationResult::Unsat { time_ms }, + TacticResult::HasObligations(obs) => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Has {} unfulfilled obligations", obs.len()), + }, + TacticResult::Progress { steps_taken, .. } => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Made progress ({} steps) but not complete", steps_taken), + }, + TacticResult::Timeout { steps_taken } => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Timeout after {} steps", steps_taken), + }, + TacticResult::Error(e) => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Error: {}", e), + }, + } +} + +/// Convenience: solve a theory from scratch (find any model). +/// +/// Equivalent to `enumerate_models(empty_structure, Universe::new(), theory, budget)`. +pub fn solve(theory: Rc, budget: Budget) -> EnumerationResult { + let num_sorts = theory.theory.signature.sorts.len(); + let base = Structure::new(num_sorts); + enumerate_models(base, Universe::new(), theory, budget) +} + +/// Convenience: query/extend an existing model. +/// +/// Equivalent to `enumerate_models(base, universe, extension_theory, budget)`. +pub fn query( + base: Structure, + universe: Universe, + extension_theory: Rc, + budget: Budget, +) -> EnumerationResult { + enumerate_models(base, universe, extension_theory, budget) +} + +#[cfg(test)] +mod unified_api_tests { + use super::*; + use crate::core::{Context, Formula, RelationStorage, Sequent, Signature, Term, Theory}; + + fn make_existential_theory() -> Rc { + // Theory: Node sort, R relation + // Axiom: True |- ∃x:Node. R(x) + let mut sig = Signature::new(); + let node = sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(node)); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(node), + Box::new(Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(node)))), + ), + }; + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }) + } + + #[test] + fn test_solve_finds_model() { + // solve = enumerate_models with empty base + let theory = make_existential_theory(); + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // Should have at least one element with R + assert!(model.carrier_size(0) >= 1); + assert!(!model.relations[0].is_empty()); + } + other => panic!("Expected Found, got {:?}", other), + } + } + + #[test] + fn test_query_extends_base() { + // query = enumerate_models with existing base + let theory = make_existential_theory(); + + // Create base with one element, R not yet holding + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (_elem, _) = base.add_element(&mut universe, 0); + base.init_relations(&[1]); + + // query should extend the base to satisfy the axiom + let result = query(base, universe, theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // R should now have at least one tuple + assert!(!model.relations[0].is_empty()); + } + other => panic!("Expected Found, got {:?}", other), + } + } + + #[test] + fn test_unification_equivalence() { + // Demonstrate: solve(T) = enumerate_models(empty, T) + let theory = make_existential_theory(); + let budget = Budget::quick(); + + // Method 1: solve + let result1 = solve(theory.clone(), budget.clone()); + + // Method 2: enumerate_models with empty base + let num_sorts = theory.theory.signature.sorts.len(); + let empty_base = Structure::new(num_sorts); + let result2 = enumerate_models(empty_base, Universe::new(), theory, budget); + + // Both should succeed (find a model) + match (&result1, &result2) { + (EnumerationResult::Found { .. }, EnumerationResult::Found { .. }) => { + // Both found models - the unification works! + } + _ => panic!( + "Expected both to find models, got {:?} and {:?}", + result1, result2 + ), + } + } + + #[test] + fn test_solve_unsat_theory() { + // Theory that derives False: forall a:A. |- false + let mut sig = Signature::new(); + let _sort_a = sig.add_sort("A".to_string()); + + // Axiom: forall a:A. |- false + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }); + + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Unsat { .. } => { + // Correctly detected UNSAT + } + other => panic!("Expected Unsat, got {:?}", other), + } + } + + #[test] + fn test_solve_trivial_theory() { + // Theory with no axioms - should be trivially satisfied by empty structure + let mut sig = Signature::new(); + sig.add_sort("A".to_string()); + sig.add_sort("B".to_string()); + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Trivial".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }); + + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // Empty structure is a valid model + assert_eq!(model.carrier_size(0), 0); + assert_eq!(model.carrier_size(1), 0); + } + other => panic!("Expected Found with empty model, got {:?}", other), + } + } +} diff --git a/src/solver/tactics.rs b/src/solver/tactics.rs new file mode 100644 index 0000000..9a25eca --- /dev/null +++ b/src/solver/tactics.rs @@ -0,0 +1,1398 @@ +//! Tactics for automated search. + +use crate::id::{NumericId, Slid}; + +use super::tree::SearchTree; +use super::types::{ConflictClause, NodeId, Obligation}; + +/// Budget for tactic execution +#[derive(Clone, Debug)] +pub struct Budget { + /// Maximum wall-clock time in milliseconds + pub time_ms: u64, + /// Maximum number of refinement steps + pub steps: usize, +} + +impl Budget { + pub fn new(time_ms: u64, steps: usize) -> Self { + Self { time_ms, steps } + } + + /// A short budget for quick checks + pub fn quick() -> Self { + Self { + time_ms: 100, + steps: 100, + } + } + + /// A medium budget for exploratory search + pub fn medium() -> Self { + Self { + time_ms: 1000, + steps: 1000, + } + } + + /// A longer budget for deeper search + pub fn long() -> Self { + Self { + time_ms: 5000, + steps: 10000, + } + } +} + +/// Result of running a tactic +#[derive(Clone, Debug)] +pub enum TacticResult { + /// Found a valid complete instance at this node + Solved, + /// Proved this node has no solution (with optional conflict clause) + /// This only happens when fulfilling obligations would CONFLICT with + /// existing commitments, not merely because axioms aren't yet satisfied. + Unsat(Option), + /// Axioms have unsatisfied consequents that need to be witnessed. + /// This is NOT failure—the agent should fulfill these obligations + /// (add elements, define functions, assert relations) to make progress. + HasObligations(Vec), + /// Made progress, can continue with more budget + Progress { + /// Number of refinement steps taken + steps_taken: usize, + /// Number of branches created + branches_created: usize, + }, + /// Budget exhausted without conclusive result + Timeout { + /// Where we got to + steps_taken: usize, + }, + /// Error during execution + Error(String), +} + +/// A tactic for automated search +/// +/// Tactics implement specific search strategies. They run for bounded time/steps +/// and return a result. The agent orchestrates tactics across the search tree. +pub trait Tactic { + /// Run the tactic on a node with the given budget + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult; + + /// Human-readable name for this tactic + fn name(&self) -> &str; +} + +// ============================================================================ +// BUILT-IN TACTICS +// ============================================================================ + +/// Check tactic: check axioms and report obligations +/// +/// In geometric logic, axiom "violations" are really OBLIGATIONS to fulfill. +/// The consequent is always positive, so we can potentially satisfy it by +/// adding elements, defining functions, or asserting relations. +/// +/// This tactic checks current state and returns: +/// - Solved if complete and all axioms satisfied +/// - HasObligations if there are consequents to witness +/// - Progress if incomplete but no current obligations +pub struct CheckTactic; + +impl Tactic for CheckTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, _budget: &Budget) -> TacticResult { + match tree.check_axioms(node) { + Ok(()) => { + // No violations - check if complete + match tree.is_complete(node) { + Ok(true) => { + tree.mark_solved(node); + TacticResult::Solved + } + Ok(false) => { + // Model is incomplete (e.g., undefined functions) but no axiom violations. + // Don't report progress - we need external help (function enumeration) + // or the model can be considered valid with partial functions. + TacticResult::Progress { + steps_taken: 0, + branches_created: 0, + } + } + Err(e) => TacticResult::Error(e), + } + } + Err(violations) => { + // Convert violations to obligations + // Violations mean: premise satisfied, conclusion not yet satisfied + // This is an OBLIGATION to witness the conclusion, not unsat! + let obligations: Vec = violations + .iter() + .flat_map(|(axiom_idx, viols)| { + viols.iter().map(move |v| Obligation { + axiom_idx: *axiom_idx, + // Convert variable assignment to (name, sort_id, slid) + // For now, we don't have sort info in Violation, so approximate + assignment: v + .variable_names + .iter() + .zip(v.assignment.iter()) + .map(|(name, &idx)| (name.clone(), 0, Slid::from_usize(idx))) + // sort_id=0 is placeholder + .collect(), + description: format!( + "Axiom {} needs consequent witnessed for assignment {:?}", + axiom_idx, v.assignment + ), + }) + }) + .collect(); + + TacticResult::HasObligations(obligations) + } + } + } + + fn name(&self) -> &str { + "check" + } +} + +/// Enumeration tactic: try all values for an undefined function +pub struct EnumerateFunctionTactic { + pub func_id: usize, +} + +impl Tactic for EnumerateFunctionTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut steps = 0; + let mut branches = 0; + + let sig = tree.signature().clone(); + let func_sym = match sig.functions.get(self.func_id) { + Some(f) => f, + None => return TacticResult::Error("Invalid function ID".to_string()), + }; + + // Get domain and codomain sorts + let (domain_sort, codomain_sort) = match (&func_sym.domain, &func_sym.codomain) { + (crate::core::DerivedSort::Base(d), crate::core::DerivedSort::Base(c)) => (*d, *c), + _ => return TacticResult::Error("Only base sorts supported for now".to_string()), + }; + + // Find undefined function applications + let node_ref = match tree.get(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + + if self.func_id >= node_ref.structure.functions.len() { + return TacticResult::Error("Function storage not initialized".to_string()); + } + + // Find first undefined domain element (for local functions) + let mut undefined_domain: Option = None; + for slid_u64 in node_ref.structure.carriers[domain_sort].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = node_ref.structure.sort_local_id(slid); + if node_ref + .structure + .get_function(self.func_id, sort_slid) + .is_none() + { + undefined_domain = Some(slid); + break; + } + } + + let domain_slid = match undefined_domain { + Some(d) => d, + None => { + // All defined - check current state + return CheckTactic.run(tree, node, budget); + } + }; + + // Enumerate codomain values + let codomain_elements: Vec = node_ref.structure.carriers[codomain_sort] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect(); + + if codomain_elements.is_empty() { + return TacticResult::Error("Empty codomain - need to add elements first".to_string()); + } + + // Create a branch for each possible value + for &codomain_slid in &codomain_elements { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + let child = tree.branch( + node, + Some(format!( + "f{}({})={}", + self.func_id, domain_slid, codomain_slid + )), + ); + + if let Err(e) = tree.define_function(child, self.func_id, domain_slid, codomain_slid) { + tree.mark_unsat( + child, + Some(ConflictClause { + required_elements: vec![], + required_functions: vec![], + required_relations: vec![], + violated_axiom: None, + explanation: Some(e), + }), + ); + } + + steps += 1; + branches += 1; + } + + // Mark parent as non-leaf (it has children now) + // Parent stays Open but is no longer on frontier + + TacticResult::Progress { + steps_taken: steps, + branches_created: branches, + } + } + + fn name(&self) -> &str { + "enumerate_function" + } +} + +/// Forward chaining tactic: automatically fulfill simple obligations. +/// +/// When an axiom's premise is satisfied but conclusion isn't, we have an obligation. +/// This tactic automatically fulfills simple obligations: +/// - **Relation assertions**: assert the relation tuple +/// - **Equations**: add to pending equations in congruence closure +/// - **Existentials**: add a fresh witness element (then recurse) +/// - **Disjunctions**: create branches (one per disjunct) +/// - **False**: mark as unsat (derivation of False found!) +/// +/// This is Datalog-style forward chaining for geometric logic. +pub struct ForwardChainingTactic; + +impl Tactic for ForwardChainingTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + use crate::core::Formula; + use crate::tensor::check_theory_axioms; + + let start = std::time::Instant::now(); + let mut steps = 0; + let mut branches = 0; + + // Get current structure and axioms + let axioms = tree.theory.theory.axioms.clone(); + let sig = tree.theory.theory.signature.clone(); + + // Check axioms and get violations + let violations = { + let node_ref = match tree.get(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + check_theory_axioms(&axioms, &node_ref.structure, &sig) + }; + + if violations.is_empty() { + // No violations - check if complete + return CheckTactic.run(tree, node, budget); + } + + // Process each violation + for (axiom_idx, viols) in violations { + for viol in viols { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + let axiom = &axioms[axiom_idx]; + let conclusion = &axiom.conclusion; + + // Build variable assignment map from violation + let assignment: std::collections::HashMap = viol + .variable_names + .iter() + .zip(viol.assignment.iter()) + .map(|(name, &idx)| (name.clone(), idx)) + .collect(); + + // Process the conclusion based on its type + match conclusion { + Formula::False => { + // Found a derivation of False! + // This is true unsatisfiability. + tree.mark_unsat( + node, + Some(ConflictClause { + required_elements: vec![], + required_functions: vec![], + required_relations: vec![], + violated_axiom: Some(axiom_idx), + explanation: Some(format!( + "Axiom {} derives False for assignment {:?}", + axiom_idx, assignment + )), + }), + ); + return TacticResult::Unsat(None); + } + + Formula::Disj(disjuncts) if !disjuncts.is_empty() => { + // Create a branch for each disjunct and process in that branch + for (i, disjunct) in disjuncts.iter().enumerate() { + let child = tree.branch( + node, + Some(format!("axiom{}:disj{}", axiom_idx, i)), + ); + branches += 1; + // Process the disjunct in the child branch + let mut processor = FormulaProcessor::new( + tree, + child, + assignment.clone(), + axiom_idx, + ); + if let Err(e) = processor.process(disjunct) { + return TacticResult::Error(e); + } + steps += processor.steps; + } + } + + Formula::Disj(_) => { + // Empty disjunction - should be handled as False + // For now, skip (shouldn't happen in well-formed theories) + } + + // For Rel, Eq, Exists, Conj, True - use recursive processor + other_formula => { + let mut processor = FormulaProcessor::new( + tree, + node, + assignment.clone(), + axiom_idx, + ); + if let Err(e) = processor.process(other_formula) { + return TacticResult::Error(e); + } + steps += processor.steps; + } + } + } + } + + if steps > 0 || branches > 0 { + TacticResult::Progress { + steps_taken: steps, + branches_created: branches, + } + } else { + // No progress made - return obligations for agent + CheckTactic.run(tree, node, budget) + } + } + + fn name(&self) -> &str { + "forward_chaining" + } +} + +/// Equation propagation tactic: process pending equations in the congruence closure. +/// +/// This tactic: +/// 1. Pops pending equations from the CC queue +/// 2. Merges the equivalence classes +/// 3. Checks for function conflicts (f(a) = x and f(b) = y where a = b implies x = y) +/// 4. Adds any new equations discovered via congruence +/// +/// This is a simplified version that doesn't do full congruence closure, +/// but handles the basic case of merging and detecting function conflicts. +pub struct PropagateEquationsTactic; + +impl Tactic for PropagateEquationsTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut steps = 0; + let mut _new_equations = 0; + + // Process pending equations + loop { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + // Pop next equation + let eq = { + let node = match tree.get_mut(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + node.cc.pop_pending() + }; + + let eq = match eq { + Some(e) => e, + None => break, // No more pending equations + }; + + // Merge the equivalence classes + let merged = { + let node = tree.get_mut(node).unwrap(); + node.cc.merge(eq.lhs, eq.rhs) + }; + + if merged { + steps += 1; + + // Check for function conflicts + // For each function f, if f(a) and f(b) are both defined and a = b, + // then we need f(a) = f(b) (congruence) + let sig = tree.signature().clone(); + let conflicts: Vec<(Slid, Slid, usize)> = { + let node = tree.get(node).unwrap(); + let mut conflicts = Vec::new(); + + for func_id in 0..sig.functions.len() { + if func_id >= node.structure.functions.len() { + continue; + } + + // Get values for eq.lhs and eq.rhs + let lhs_sort_slid = node.structure.sort_local_id(eq.lhs); + let rhs_sort_slid = node.structure.sort_local_id(eq.rhs); + + let lhs_val = node.structure.get_function(func_id, lhs_sort_slid); + let rhs_val = node.structure.get_function(func_id, rhs_sort_slid); + + if let (Some(lv), Some(rv)) = (lhs_val, rhs_val) + && lv != rv + { + // Function conflict: f(a) = lv and f(b) = rv, but a = b + // Add equation lv = rv with func_id for debugging + conflicts.push((lv, rv, func_id)); + } + } + conflicts + }; + + // Add conflict-induced equations + for (lv, rv, func_id) in conflicts { + tree.add_pending_equation( + node, + lv, + rv, + super::types::EquationReason::Congruence { func_id }, + ); + _new_equations += 1; + } + } + } + + if steps > 0 { + TacticResult::Progress { + steps_taken: steps, + branches_created: 0, + } + } else { + // No pending equations - fall through to check + CheckTactic.run(tree, node, budget) + } + } + + fn name(&self) -> &str { + "propagate_equations" + } +} + +/// Automatic solving tactic: runs forward chaining and equation propagation to fixpoint. +/// +/// This composite tactic: +/// 1. Runs ForwardChainingTactic until no progress +/// 2. Runs PropagateEquationsTactic until no progress +/// 3. Repeats until fixpoint (no more progress from either) +/// +/// This is the main "auto-solve" tactic for geometric logic. +pub struct AutoTactic; + +impl Tactic for AutoTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut total_steps = 0; + let mut total_branches = 0; + let mut iterations = 0; + + loop { + if start.elapsed().as_millis() as u64 > budget.time_ms { + return TacticResult::Timeout { steps_taken: total_steps }; + } + + iterations += 1; + let mut made_progress = false; + + // Run forward chaining + let remaining_budget = Budget { + time_ms: budget.time_ms.saturating_sub(start.elapsed().as_millis() as u64), + steps: budget.steps.saturating_sub(total_steps), + }; + + match ForwardChainingTactic.run(tree, node, &remaining_budget) { + TacticResult::Progress { steps_taken, branches_created } => { + total_steps += steps_taken; + total_branches += branches_created; + if steps_taken > 0 || branches_created > 0 { + made_progress = true; + } + } + TacticResult::Solved => return TacticResult::Solved, + TacticResult::Unsat(clause) => return TacticResult::Unsat(clause), + TacticResult::Timeout { steps_taken } => { + total_steps += steps_taken; + return TacticResult::Timeout { steps_taken: total_steps }; + } + TacticResult::Error(e) => return TacticResult::Error(e), + TacticResult::HasObligations(_) => { + // Has obligations but made no progress - continue to propagation + } + } + + // Run equation propagation + let remaining_budget = Budget { + time_ms: budget.time_ms.saturating_sub(start.elapsed().as_millis() as u64), + steps: budget.steps.saturating_sub(total_steps), + }; + + match PropagateEquationsTactic.run(tree, node, &remaining_budget) { + TacticResult::Progress { steps_taken, .. } => { + total_steps += steps_taken; + if steps_taken > 0 { + made_progress = true; + } + } + TacticResult::Solved => return TacticResult::Solved, + TacticResult::Unsat(clause) => return TacticResult::Unsat(clause), + TacticResult::Timeout { steps_taken } => { + total_steps += steps_taken; + return TacticResult::Timeout { steps_taken: total_steps }; + } + TacticResult::Error(e) => return TacticResult::Error(e), + TacticResult::HasObligations(_) => { + // Has obligations but made no progress - continue to next iteration + } + } + + // Check for fixpoint + if !made_progress { + break; + } + + // Safety limit on iterations + if iterations > 1000 { + return TacticResult::Error("AutoTactic exceeded iteration limit".to_string()); + } + } + + TacticResult::Progress { + steps_taken: total_steps, + branches_created: total_branches, + } + } + + fn name(&self) -> &str { + "auto" + } +} + +/// Recursive formula processor for forward chaining. +/// +/// Processes a positive geometric formula by: +/// - Asserting relation tuples +/// - Adding pending equations +/// - Adding witness elements for existentials (and recursively processing bodies) +/// - Processing conjuncts +/// - NOT handling disjunctions (those need branching at a higher level) +struct FormulaProcessor<'a> { + tree: &'a mut SearchTree, + node: NodeId, + assignment: std::collections::HashMap, + axiom_idx: usize, + steps: usize, +} + +impl<'a> FormulaProcessor<'a> { + fn new( + tree: &'a mut SearchTree, + node: NodeId, + assignment: std::collections::HashMap, + axiom_idx: usize, + ) -> Self { + Self { + tree, + node, + assignment, + axiom_idx, + steps: 0, + } + } + + /// Process a formula, accumulating steps. Returns Err on failure. + fn process(&mut self, formula: &crate::core::Formula) -> Result<(), String> { + use crate::core::Formula; + + match formula { + Formula::True => { + // Nothing to do + Ok(()) + } + + Formula::Rel(rel_id, term) => { + // Assert relation tuple + let tuple = self.tree.get(self.node).and_then(|n| { + eval_term_to_tuple(term, &self.assignment, &n.structure) + }); + if let Some(tuple) = tuple { + self.tree.assert_relation(self.node, *rel_id, tuple)?; + self.steps += 1; + } + Ok(()) + } + + Formula::Eq(t1, t2) => { + // Add equation to congruence closure if not already equal + let eq_slids = self.tree.get(self.node).and_then(|n| { + let lhs = eval_term_to_slid(t1, &self.assignment, &n.structure)?; + let rhs = eval_term_to_slid(t2, &self.assignment, &n.structure)?; + Some((lhs, rhs)) + }); + if let Some((lhs, rhs)) = eq_slids { + let search_node = self.tree.get_mut(self.node).ok_or("Invalid node")?; + if !search_node.cc.are_equal(lhs, rhs) { + search_node.cc.add_equation( + lhs, + rhs, + super::types::EquationReason::AxiomConsequent { + axiom_idx: self.axiom_idx, + }, + ); + self.steps += 1; + } + } + Ok(()) + } + + Formula::Conj(conjuncts) => { + // Process each conjunct recursively + for conjunct in conjuncts { + self.process(conjunct)?; + } + Ok(()) + } + + Formula::Exists(var_name, sort, body) => { + // Add fresh witness and recursively process body + if let crate::core::DerivedSort::Base(sort_id) = sort { + match self.tree.add_element(self.node, *sort_id) { + Ok((slid, _luid)) => { + // Add witness to assignment + self.assignment.insert(var_name.clone(), slid.index()); + self.steps += 1; + // Recursively process body with updated assignment + self.process(body) + } + Err(e) => Err(format!("Failed to add witness: {}", e)), + } + } else { + // Product sort witness - not yet implemented + Ok(()) + } + } + + Formula::False | Formula::Disj(_) => { + // These should be handled at a higher level + // False triggers unsat, Disj triggers branching + Ok(()) + } + } + } +} + +/// Helper: evaluate a term to a single Slid given variable assignment and structure. +/// Returns None if the term contains constructs we can't handle or if evaluation fails. +fn eval_term_to_slid( + term: &crate::core::Term, + assignment: &std::collections::HashMap, + structure: &crate::core::Structure, +) -> Option { + use crate::core::Term; + + match term { + Term::Var(name, _sort) => { + // Simple variable - look up in assignment + assignment.get(name).map(|&idx| Slid::from_usize(idx)) + } + Term::App(func_id, arg) => { + // Function application: evaluate arg, then look up function value + let arg_slid = eval_term_to_slid(arg, assignment, structure)?; + let sort_slid = structure.sort_local_id(arg_slid); + structure.get_function(*func_id, sort_slid) + } + Term::Record(_fields) => { + // Records evaluate to product elements - not a single Slid + // Would need product element lookup + None + } + Term::Project(base, field_name) => { + // Projection: evaluate base (must be a record element), then project + // This would require looking up the product element's components + let _ = (base, field_name); + None // Not yet implemented - needs product element storage + } + } +} + +/// Helper: evaluate a term to a tuple of Slids given variable assignment. +/// Used for relation assertions where the domain may be a product. +fn eval_term_to_tuple( + term: &crate::core::Term, + assignment: &std::collections::HashMap, + structure: &crate::core::Structure, +) -> Option> { + use crate::core::Term; + + match term { + Term::Var(name, _sort) => { + // Simple variable - look up in assignment + assignment.get(name).map(|&idx| vec![Slid::from_usize(idx)]) + } + Term::Record(fields) => { + // Record term - collect all field values + let mut tuple = Vec::new(); + for (_, field_term) in fields { + match eval_term_to_tuple(field_term, assignment, structure) { + Some(mut field_tuple) => tuple.append(&mut field_tuple), + None => return None, + } + } + Some(tuple) + } + Term::App(_func_id, _arg) => { + // Function application - evaluate to single value, wrap in vec + eval_term_to_slid(term, assignment, structure).map(|s| vec![s]) + } + Term::Project(_, _) => { + // Projection - would need product element storage + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{DerivedSort, ElaboratedTheory, Signature, Theory}; + use crate::id::Slid; + use std::rc::Rc; + + fn make_simple_theory() -> Rc { + // A simple theory with one sort and one function + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + sig.add_function( + "f".to_string(), + DerivedSort::Base(node_id), + DerivedSort::Base(node_id), + ); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }) + } + + #[test] + fn test_search_tree_creation() { + let theory = make_simple_theory(); + let tree = SearchTree::new(theory); + + assert_eq!(tree.nodes.len(), 1); + assert_eq!(tree.root(), 0); + assert_eq!(tree.frontier(), vec![0]); + } + + #[test] + fn test_branching() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + let child1 = tree.branch(0, Some("child1".to_string())); + let child2 = tree.branch(0, Some("child2".to_string())); + + assert_eq!(tree.nodes.len(), 3); + assert_eq!(tree.nodes[0].children, vec![child1, child2]); + assert_eq!(tree.nodes[child1].parent, Some(0)); + assert_eq!(tree.nodes[child2].parent, Some(0)); + + // Frontier should now be the children + let frontier = tree.frontier(); + assert!(frontier.contains(&child1)); + assert!(frontier.contains(&child2)); + assert!(!frontier.contains(&0)); // Parent no longer on frontier (has children) + } + + #[test] + fn test_add_elements() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // Add elements to root + let (slid1, _luid1) = tree.add_element(0, 0).unwrap(); + let (slid2, _luid2) = tree.add_element(0, 0).unwrap(); + + assert_eq!(slid1, Slid::from_usize(0)); + assert_eq!(slid2, Slid::from_usize(1)); + assert_eq!(tree.nodes[0].structure.carrier_size(0), 2); + } + + #[test] + fn test_check_tactic() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // Empty structure should be "incomplete" but no obligations (no axioms) + let result = CheckTactic.run(&mut tree, 0, &Budget::quick()); + + // No axioms means no obligations, but also not complete (no elements, no function values) + match result { + TacticResult::Progress { .. } => {} // Expected + other => panic!("Unexpected result: {:?}", other), + } + } + + #[test] + fn test_summary() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + tree.branch(0, Some("a".to_string())); + tree.branch(0, Some("b".to_string())); + + let summary = tree.summary(5); + assert_eq!(summary.total_nodes, 3); + assert_eq!(summary.frontier_size, 2); + assert_eq!(summary.solved_count, 0); + } + + #[test] + fn test_union_find_with_slid() { + use egglog_union_find::UnionFind; + + // Helper for cleaner syntax + fn s(n: usize) -> Slid { + Slid::from_usize(n) + } + + // Verify egglog's union-find works with our Slid type (which is usize) + let mut uf: UnionFind = UnionFind::default(); + + // Union some elements + let (parent, child) = uf.union(s(0), s(1)); + assert_eq!(parent, s(0)); // union-by-min: smaller id becomes parent + assert_eq!(child, s(1)); + + // Find should return canonical representative + assert_eq!(uf.find(s(0)), s(0)); + assert_eq!(uf.find(s(1)), s(0)); + + // Add more elements and union + let (parent2, child2) = uf.union(s(2), s(3)); + assert_eq!(parent2, s(2)); + assert_eq!(child2, s(3)); + + // Union the two equivalence classes + let (parent3, child3) = uf.union(s(1), s(3)); + // Now 0, 1, 2, 3 should all be in same class with 0 as root + assert_eq!(parent3, s(0)); // find(1) = 0, find(3) = 2, min(0,2) = 0 + assert_eq!(child3, s(2)); + + assert_eq!(uf.find(s(0)), s(0)); + assert_eq!(uf.find(s(1)), s(0)); + assert_eq!(uf.find(s(2)), s(0)); + assert_eq!(uf.find(s(3)), s(0)); + } + + #[test] + fn test_forward_chaining_tactic() { + // Create a theory with no axioms - forward chaining should just fall through + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // On an empty structure with no axioms, forward chaining should report progress + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + // No axioms means no violations, should fall through to CheckTactic + match result { + TacticResult::Progress { .. } => {} // Expected - incomplete but no obligations + other => panic!("Expected Progress, got {:?}", other), + } + } + + #[test] + fn test_forward_chaining_detects_false() { + use crate::core::{Context, Formula, Sequent}; + + // Create a theory with an axiom: True |- False + // This means any model is immediately unsat + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Forward chaining should detect the derivation of False + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Unsat(_) => {} // Expected - True |- False is violated + other => panic!("Expected Unsat, got {:?}", other), + } + } + + #[test] + fn test_forward_chaining_adds_equations() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory with an axiom: ∀x:Node, y:Node. True |- x = y + // (Every two nodes are equal) + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add two elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + assert_ne!(a, b); + + // Forward chaining should detect the equation obligation and add pending equations + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have made progress"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Check that pending equations were added to congruence closure + let node = tree.get(0).unwrap(); + assert!(!node.cc.pending.is_empty(), "Should have pending equations"); + } + + #[test] + fn test_propagate_equations_merges() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory with an axiom: ∀x:Node, y:Node. True |- x = y + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add two elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + + // First run forward chaining to add equations + ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + // Verify equations are pending + assert!(!tree.get(0).unwrap().cc.pending.is_empty()); + + // Run equation propagation + let result = PropagateEquationsTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have processed equations"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Check that a and b are now in the same equivalence class + let node = tree.get_mut(0).unwrap(); + assert!(node.cc.are_equal(a, b), "a and b should be equal after propagation"); + } + + #[test] + fn test_auto_tactic() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory where all elements are equal + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add three elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + let (c, _) = tree.add_element(0, 0).unwrap(); + + // Run AutoTactic - should do forward chaining + propagation to fixpoint + let result = AutoTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have made progress"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // All three should be in the same equivalence class + let node = tree.get_mut(0).unwrap(); + assert!(node.cc.are_equal(a, b), "a and b should be equal"); + assert!(node.cc.are_equal(b, c), "b and c should be equal"); + assert!(node.cc.are_equal(a, c), "a and c should be equal (transitively)"); + } + + #[test] + fn test_existential_body_processing() { + use crate::core::{Context, Formula, RelationStorage, Sequent, Term}; + + // Create a theory with: + // - Sort: Node + // - Relation: R : Node -> Prop + // - Axiom: True |- ∃x:Node. R(x) + // This should add a witness and assert R(witness) + + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // R + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + tree.init_relations(0, &[1]).unwrap(); // R has arity 1 + + // Initially no elements + assert_eq!(tree.get(0).unwrap().structure.carrier_size(0), 0); + + // Run forward chaining + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken >= 2, "Should have added witness AND asserted R"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Should now have one element (the witness) + let node = tree.get(0).unwrap(); + assert_eq!(node.structure.carrier_size(0), 1, "Should have one witness"); + + // R(witness) should be asserted + let witness = Slid::from_usize(0); + assert!( + node.structure.relations[0].contains(&[witness]), + "R(witness) should be asserted" + ); + } + + #[test] + fn test_nested_existential_body() { + use crate::core::{Context, Formula, RelationStorage, Sequent, Term}; + + // Create a theory with: + // - Sort: Node + // - Relation: E : Node × Node -> Prop + // - Axiom: True |- ∃x:Node. ∃y:Node. E(x, y) + // This should add two witnesses and assert E(w1, w2) + + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + // E : Node × Node -> Prop (binary relation as product domain) + sig.add_relation( + "E".to_string(), + DerivedSort::Product(vec![ + ("0".to_string(), DerivedSort::Base(0)), + ("1".to_string(), DerivedSort::Base(0)), + ]), + ); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Exists( + "y".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // E + Term::Record(vec![ + ("0".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("1".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + )), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsEdge".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_edge".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + tree.init_relations(0, &[2]).unwrap(); // E has arity 2 + + // Run forward chaining + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken >= 3, "Should have added 2 witnesses AND asserted E"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Should have two elements + let node = tree.get(0).unwrap(); + assert_eq!(node.structure.carrier_size(0), 2, "Should have two witnesses"); + + // E(w1, w2) should be asserted + let w1 = Slid::from_usize(0); + let w2 = Slid::from_usize(1); + assert!( + node.structure.relations[0].contains(&[w1, w2]), + "E(w1, w2) should be asserted" + ); + } + + #[test] + fn test_from_base_preserves_structure() { + // Test that from_base preserves the base structure's elements and facts + use crate::core::Structure; + use crate::universe::Universe; + + let theory = make_simple_theory(); + + // Create a base structure with some elements + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (elem_a, _) = base.add_element(&mut universe, 0); + let (elem_b, _) = base.add_element(&mut universe, 0); + + // Initialize function storage and define f(a) = b + base.init_functions(&[Some(0)]); + base.define_function(0, elem_a, elem_b).unwrap(); + + // Create search tree from base + let tree = SearchTree::from_base(theory, base, universe); + + // The root should preserve the base structure + let root = tree.get(0).unwrap(); + assert_eq!(root.structure.carrier_size(0), 2, "Should have 2 elements from base"); + let sort_slid_a = root.structure.sort_local_id(elem_a); + assert_eq!( + root.structure.get_function(0, sort_slid_a), + Some(elem_b), + "f(a) = b should be preserved" + ); + } + + #[test] + fn test_from_base_solver_can_extend() { + // Test that the solver can extend a base structure to satisfy axioms + use crate::core::{Context, Formula, RelationStorage, Sequent, Structure, Term}; + use crate::universe::Universe; + + // Theory: Node sort with relation R : Node -> Prop + // Axiom: ∀x:Node. ∃y:Node. R(y) + // (every existing element implies existence of some R-element) + let mut sig = Signature::new(); + let node = sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(node)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(node))], + }, + premise: Formula::True, + conclusion: Formula::Exists( + "y".to_string(), + DerivedSort::Base(node), + Box::new(Formula::Rel( + 0, // R + Term::Var("y".to_string(), DerivedSort::Base(node)), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }); + + // Create base structure with one element, R not yet holding + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (_elem_a, _) = base.add_element(&mut universe, 0); + base.init_relations(&[1]); // R has arity 1 + + // Create search tree from base + let mut tree = SearchTree::from_base(theory, base, universe); + + // Verify starting state: one element, R is empty + assert_eq!(tree.get(0).unwrap().structure.carrier_size(0), 1); + assert!(tree.get(0).unwrap().structure.relations[0].is_empty()); + + // Run forward chaining - should create witness for R(y) + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { .. } => { + let node = tree.get(0).unwrap(); + // Should have at least one R-element now + assert!( + !node.structure.relations[0].is_empty(), + "R should have at least one tuple after forward chaining" + ); + } + other => panic!("Expected Progress, got {:?}", other), + } + } +} diff --git a/src/solver/tree.rs b/src/solver/tree.rs new file mode 100644 index 0000000..d2fb15b --- /dev/null +++ b/src/solver/tree.rs @@ -0,0 +1,465 @@ +//! Search tree for instance synthesis. + +use std::rc::Rc; + +use crate::core::{ElaboratedTheory, RelationStorage, Signature, Structure}; +use crate::id::{Luid, Slid, Uuid}; +use crate::tensor::{CheckResult, Violation}; +use crate::universe::Universe; + +use super::types::{ + ConflictClause, CongruenceClosure, NodeDetail, NodeId, NodeStatus, SearchNode, SearchSummary, +}; + +/// The search tree +#[derive(Debug)] +pub struct SearchTree { + /// All nodes, indexed by NodeId + pub(crate) nodes: Vec, + /// The theory we're trying to instantiate + pub theory: Rc, + /// Universe for Luid allocation + pub universe: Universe, +} + +impl SearchTree { + /// Create a new search tree for instantiating a theory + /// + /// The root node contains an empty Structure with the right number of + /// sorts but no elements. + /// + /// This is equivalent to `SearchTree::from_base(theory, empty_structure)`. + /// Use this for `:solve` (finding models from scratch). + pub fn new(theory: Rc) -> Self { + let num_sorts = theory.theory.signature.sorts.len(); + let root_structure = Structure::new(num_sorts); + Self::from_base_inner(theory, root_structure, Universe::new()) + } + + /// Create a search tree starting from an existing base structure. + /// + /// This enables the unified model-finding API: + /// - `:solve T` = `SearchTree::new(T)` = find models of T from scratch + /// - `:query M T'` = `SearchTree::from_base(T', M)` = find extensions of M to T' + /// + /// The base structure's elements, function values, and relation tuples are + /// preserved as "frozen" facts. The solver will only add new facts, not remove + /// existing ones (the refinement order). + /// + /// # Arguments + /// - `theory`: The theory to satisfy (may extend the base structure's theory) + /// - `base`: The starting structure (may already have elements, functions, relations) + /// - `universe`: The universe for Luid allocation (should contain Luids from base) + /// + /// # Panics + /// Panics if the base structure has more sorts than the theory signature. + pub fn from_base(theory: Rc, base: Structure, universe: Universe) -> Self { + let num_sorts = theory.theory.signature.sorts.len(); + assert!( + base.carriers.len() <= num_sorts, + "Base structure has {} sorts but theory only has {}", + base.carriers.len(), + num_sorts + ); + Self::from_base_inner(theory, base, universe) + } + + /// Internal constructor shared by `new` and `from_base`. + fn from_base_inner(theory: Rc, root_structure: Structure, universe: Universe) -> Self { + let root = SearchNode { + id: 0, + parent: None, + children: Vec::new(), + structure: root_structure, + cc: CongruenceClosure::new(), + status: NodeStatus::Open, + p_success: 0.5, // Prior: 50% chance of solution existing + conflicts: Vec::new(), + label: Some("root".to_string()), + }; + + Self { + nodes: vec![root], + theory, + universe, + } + } + + /// Get the root node ID + pub fn root(&self) -> NodeId { + 0 + } + + /// Get a node by ID + pub fn get(&self, id: NodeId) -> Option<&SearchNode> { + self.nodes.get(id) + } + + /// Get a mutable reference to a node + pub fn get_mut(&mut self, id: NodeId) -> Option<&mut SearchNode> { + self.nodes.get_mut(id) + } + + /// Get the signature of the theory + pub fn signature(&self) -> &Signature { + &self.theory.theory.signature + } + + /// Get all open frontier nodes + pub fn frontier(&self) -> Vec { + self.nodes + .iter() + .filter(|n| n.status == NodeStatus::Open && n.children.is_empty()) + .map(|n| n.id) + .collect() + } + + /// Get frontier nodes sorted by p_success (descending) + pub fn frontier_by_probability(&self) -> Vec { + let mut frontier = self.frontier(); + frontier.sort_by(|&a, &b| { + let pa = self.nodes[a].p_success; + let pb = self.nodes[b].p_success; + pb.partial_cmp(&pa).unwrap_or(std::cmp::Ordering::Equal) + }); + frontier + } + + /// Create a child node by cloning the parent's structure + /// + /// Returns the new node's ID. The child starts with the same structure + /// as the parent (will be refined by subsequent operations). + pub fn branch(&mut self, parent: NodeId, label: Option) -> NodeId { + let parent_node = &self.nodes[parent]; + let child_structure = parent_node.structure.clone(); + let child_cc = parent_node.cc.clone(); + let child_p = parent_node.p_success; + + let child_id = self.nodes.len(); + let child = SearchNode { + id: child_id, + parent: Some(parent), + children: Vec::new(), + structure: child_structure, + cc: child_cc, + status: NodeStatus::Open, + p_success: child_p, + conflicts: Vec::new(), + label, + }; + + self.nodes.push(child); + self.nodes[parent].children.push(child_id); + child_id + } + + /// Mark a node as solved (found valid instance) + pub fn mark_solved(&mut self, id: NodeId) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Solved; + } + } + + /// Mark a node as unsatisfiable + pub fn mark_unsat(&mut self, id: NodeId, conflict: Option) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Unsat; + if let Some(c) = conflict { + node.conflicts.push(c); + } + } + } + + /// Mark a node as pruned (agent decided not to explore) + pub fn mark_pruned(&mut self, id: NodeId) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Pruned; + } + } + + /// Update a node's success probability estimate + pub fn set_probability(&mut self, id: NodeId, p: f64) { + if let Some(node) = self.nodes.get_mut(id) { + node.p_success = p.clamp(0.0, 1.0); + } + } + + /// Check if any node has been solved + pub fn has_solution(&self) -> Option { + self.nodes + .iter() + .find(|n| n.status == NodeStatus::Solved) + .map(|n| n.id) + } + + /// Get the path from root to a node (list of NodeIds) + pub fn path_to(&self, id: NodeId) -> Vec { + let mut path = Vec::new(); + let mut current = Some(id); + while let Some(nid) = current { + path.push(nid); + current = self.nodes[nid].parent; + } + path.reverse(); + path + } +} + +// ============================================================================ +// REFINEMENT OPERATIONS +// ============================================================================ + +/// Operations for refining a partial model (moving up in the refinement order) +impl SearchTree { + /// Add a new element to a sort in a node's structure + /// + /// Returns the (Slid, Luid) of the new element. + pub fn add_element(&mut self, node: NodeId, sort_id: usize) -> Result<(Slid, Luid), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node.structure.add_element(&mut self.universe, sort_id)) + } + + /// Add a new element with a specific UUID + pub fn add_element_with_uuid( + &mut self, + node: NodeId, + uuid: Uuid, + sort_id: usize, + ) -> Result<(Slid, Luid), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node + .structure + .add_element_with_uuid(&mut self.universe, uuid, sort_id)) + } + + /// Define a function value: f(domain) = codomain + /// + /// The function must not already be defined at this domain element + /// (that would be a conflict, not a refinement). + pub fn define_function( + &mut self, + node: NodeId, + func_id: usize, + domain_slid: Slid, + codomain_slid: Slid, + ) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + node.structure + .define_function(func_id, domain_slid, codomain_slid) + } + + /// Assert a relation tuple: R(tuple) = true + pub fn assert_relation( + &mut self, + node: NodeId, + rel_id: usize, + tuple: Vec, + ) -> Result { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node.structure.assert_relation(rel_id, tuple)) + } + + /// Initialize function storage for a node (call after adding elements) + pub fn init_functions( + &mut self, + node: NodeId, + domain_sort_ids: &[Option], + ) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + node.structure.init_functions(domain_sort_ids); + Ok(()) + } + + /// Initialize relation storage for a node + pub fn init_relations(&mut self, node: NodeId, arities: &[usize]) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + node.structure.init_relations(arities); + Ok(()) + } + + /// Add a pending equation to a node's congruence closure + /// + /// Equations arise from axiom consequents, function conflicts, etc. + /// They are processed later during propagation. + pub fn add_pending_equation( + &mut self, + node: NodeId, + lhs: Slid, + rhs: Slid, + reason: super::types::EquationReason, + ) { + if let Some(node) = self.nodes.get_mut(node) { + node.cc.add_equation(lhs, rhs, reason); + } + } +} + +// ============================================================================ +// CONSTRAINT CHECKING +// ============================================================================ + +impl SearchTree { + /// Check all axioms against a node's current structure + /// + /// Returns Ok(()) if all axioms are satisfied, or Err with violations. + pub fn check_axioms(&self, node: NodeId) -> Result<(), Vec<(usize, Vec)>> { + let node = self.nodes.get(node).ok_or_else(Vec::new)?; + let violations = crate::tensor::check_theory_axioms( + &self.theory.theory.axioms, + &node.structure, + &self.theory.theory.signature, + ); + if violations.is_empty() { + Ok(()) + } else { + Err(violations) + } + } + + /// Check a single axiom + pub fn check_axiom(&self, node: NodeId, axiom_idx: usize) -> CheckResult { + let node = match self.nodes.get(node) { + Some(n) => n, + None => return CheckResult::Satisfied, // Invalid node = vacuously true? + }; + let axiom = match self.theory.theory.axioms.get(axiom_idx) { + Some(a) => a, + None => return CheckResult::Satisfied, + }; + // Return Satisfied on compile error (unsupported patterns handled elsewhere) + crate::tensor::check_sequent(axiom, &node.structure, &self.theory.theory.signature) + .unwrap_or(CheckResult::Satisfied) + } + + /// Check if a structure is "complete" (all functions total, all axioms satisfied) + /// + /// A complete structure is a valid model of the theory. + pub fn is_complete(&self, node: NodeId) -> Result { + let node = self.nodes.get(node).ok_or("Invalid node ID")?; + let sig = &self.theory.theory.signature; + + // Check all functions are total + for (func_id, func_sym) in sig.functions.iter().enumerate() { + if func_id >= node.structure.functions.len() { + return Ok(false); // Function storage not initialized + } + + // Get domain cardinality (works for base and product sorts) + let domain_size = func_sym.domain.cardinality(&node.structure); + + // Check all domain elements have values (local functions only for now) + let func_col = &node.structure.functions[func_id]; + if func_col.len() < domain_size { + return Ok(false); + } + if let Some(local_col) = func_col.as_local() { + for opt in local_col { + if opt.is_none() { + return Ok(false); + } + } + } + } + + // Check all axioms + match self.check_axioms(node.id) { + Ok(()) => Ok(true), + Err(_) => Ok(false), + } + } +} + +// ============================================================================ +// AGENT INTERFACE +// ============================================================================ + +impl SearchTree { + /// Get a summary of the search state + pub fn summary(&self, top_k: usize) -> SearchSummary { + let frontier = self.frontier_by_probability(); + let top_frontier: Vec<_> = frontier + .iter() + .take(top_k) + .map(|&id| { + let node = &self.nodes[id]; + (id, node.p_success, node.label.clone()) + }) + .collect(); + + SearchSummary { + total_nodes: self.nodes.len(), + frontier_size: frontier.len(), + solved_count: self + .nodes + .iter() + .filter(|n| n.status == NodeStatus::Solved) + .count(), + unsat_count: self + .nodes + .iter() + .filter(|n| n.status == NodeStatus::Unsat) + .count(), + top_frontier, + } + } + + /// Get detailed info about a node (for agent inspection) + pub fn node_detail(&self, id: NodeId) -> Option { + let node = self.nodes.get(id)?; + Some(NodeDetail { + id: node.id, + parent: node.parent, + children: node.children.clone(), + status: node.status.clone(), + p_success: node.p_success, + label: node.label.clone(), + carrier_sizes: node + .structure + .carriers + .iter() + .map(|c| c.len() as usize) + .collect(), + num_function_values: node + .structure + .functions + .iter() + .map(|f| match f { + crate::core::FunctionColumn::Local(col) => { + col.iter().filter(|opt| opt.is_some()).count() + } + crate::core::FunctionColumn::External(col) => { + col.iter().filter(|opt| opt.is_some()).count() + } + crate::core::FunctionColumn::ProductLocal { storage, .. } => { + storage.defined_count() + } + crate::core::FunctionColumn::ProductCodomain { field_columns, .. } => { + // Count elements where ALL fields are defined + if field_columns.is_empty() { + 0 + } else { + let len = field_columns[0].len(); + (0..len) + .filter(|&i| field_columns.iter().all(|col| col.get(i).is_some_and(|opt| opt.is_some()))) + .count() + } + } + }) + .collect(), + num_relation_tuples: node.structure.relations.iter().map(|r| r.len()).collect(), + conflicts: node.conflicts.clone(), + }) + } +} diff --git a/src/solver/types.rs b/src/solver/types.rs new file mode 100644 index 0000000..7c3f37e --- /dev/null +++ b/src/solver/types.rs @@ -0,0 +1,131 @@ +//! Core types for the solver infrastructure. + +use crate::core::Structure; +use crate::id::{Luid, Slid}; + +// Re-export congruence closure types from shared module +pub use crate::cc::{CongruenceClosure, EquationReason, PendingEquation}; + +/// Unique identifier for a search node +pub type NodeId = usize; + +/// A node in the search tree +#[derive(Clone, Debug)] +pub struct SearchNode { + /// Unique ID for this node + pub id: NodeId, + /// Parent node (None for root) + pub parent: Option, + /// Children (branches from this node) + pub children: Vec, + /// The partial model at this node + pub structure: Structure, + /// Congruence closure for tracking element equivalences + pub cc: CongruenceClosure, + /// Status of this node + pub status: NodeStatus, + /// Agent's estimate of success probability (0.0 to 1.0) + pub p_success: f64, + /// Conflict clauses learned at or below this node + pub conflicts: Vec, + /// Debug/display name for this node + pub label: Option, +} + +/// Status of a search node +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum NodeStatus { + /// Still exploring (frontier node) + Open, + /// Found a valid complete instance + Solved, + /// Proved unsatisfiable from this point + Unsat, + /// Agent decided not to explore further + Pruned, +} + +/// A learned conflict clause (derivation of False) +/// +/// Records a combination of commitments from which `⊢ False` was derived. +/// Used for CDCL-style pruning: if a node's commitments subsume a conflict +/// clause, that node can be immediately marked Unsat (since False is derivable). +/// +/// Note: This represents a PROOF of unsatisfiability, not mere "conflicts". +/// Even apparent conflicts (like function defined with two different values) +/// just create pending equations—only if propagating those equations leads +/// to deriving False do we have a true conflict clause. +#[derive(Clone, Debug)] +pub struct ConflictClause { + /// Elements that must exist (sort_id, luid) + pub required_elements: Vec<(usize, Luid)>, + /// Function values that must hold (func_id, domain_luid, codomain_luid) + pub required_functions: Vec<(usize, Luid, Luid)>, + /// Relation tuples that must be asserted (rel_id, tuple as Luids) + pub required_relations: Vec<(usize, Vec)>, + /// Which axiom was violated (index into theory's axiom list) + pub violated_axiom: Option, + /// Human-readable explanation + pub explanation: Option, +} + +/// An obligation to fulfill +/// +/// Geometric logic consequents are positive (existentials, disjunctions, relations). +/// When an axiom's premise is satisfied but conclusion isn't, we have an OBLIGATION +/// to make the conclusion true. This can always potentially be done by refinement +/// (adding elements, defining functions, asserting relations). +/// +/// Only when fulfilling the obligation would CONFLICT with existing commitments +/// is the node truly unsatisfiable. +#[derive(Clone, Debug)] +pub struct Obligation { + /// Which axiom generated this obligation + pub axiom_idx: usize, + /// The variable assignment where premise holds but conclusion doesn't + /// Maps variable name to (sort_id, slid) in the current structure + pub assignment: Vec<(String, usize, Slid)>, + /// Human-readable description of what needs to be witnessed + pub description: String, +} + +/// Result of checking axioms: either all satisfied, or obligations remain +#[derive(Clone, Debug)] +pub enum AxiomCheckResult { + /// All axioms satisfied for all substitutions + AllSatisfied, + /// Some axioms have unsatisfied consequents (obligations to fulfill) + Obligations(Vec), +} + +/// Summary of the current search state (for agent inspection) +#[derive(Debug)] +pub struct SearchSummary { + /// Total nodes in tree + pub total_nodes: usize, + /// Open frontier nodes + pub frontier_size: usize, + /// Solved nodes + pub solved_count: usize, + /// Unsat nodes + pub unsat_count: usize, + /// Top-k frontier nodes by probability + pub top_frontier: Vec<(NodeId, f64, Option)>, +} + +/// Detailed information about a search node +#[derive(Debug)] +pub struct NodeDetail { + pub id: NodeId, + pub parent: Option, + pub children: Vec, + pub status: NodeStatus, + pub p_success: f64, + pub label: Option, + pub carrier_sizes: Vec, + pub num_function_values: Vec, + pub num_relation_tuples: Vec, + pub conflicts: Vec, +} + +// Congruence closure types and tests are now in crate::cc diff --git a/src/store/append.rs b/src/store/append.rs new file mode 100644 index 0000000..4d705c6 --- /dev/null +++ b/src/store/append.rs @@ -0,0 +1,31 @@ +//! Low-level append operations for the Store. +//! +//! These are the primitive operations that all higher-level operations use. +//! Note: We use a trait to document the interface, but the actual implementations +//! are on Store directly to avoid borrow checker issues. + +use crate::id::Slid; + +/// Low-level operations on the meta structure. +/// +/// This trait documents the interface that Store implements for low-level +/// element manipulation. The actual implementations are on Store directly. +pub trait AppendOps { + /// Add an element to a sort in the meta structure with a simple name + fn add_element(&mut self, sort_id: usize, name: &str) -> Slid; + + /// Add an element with a qualified name path + fn add_element_qualified(&mut self, sort_id: usize, path: Vec) -> Slid; + + /// Define a function value in the meta structure + fn define_func(&mut self, func_id: usize, domain: Slid, codomain: Slid) -> Result<(), String>; + + /// Get a function value from the meta structure + fn get_func(&self, func_id: usize, domain: Slid) -> Option; + + /// Get all elements of a sort + fn elements_of_sort(&self, sort_id: usize) -> Vec; + + /// Get the name of an element + fn get_element_name(&self, slid: Slid) -> String; +} diff --git a/src/store/batch.rs b/src/store/batch.rs new file mode 100644 index 0000000..bb550a8 --- /dev/null +++ b/src/store/batch.rs @@ -0,0 +1,355 @@ +//! Atomic batch creation for elements. +//! +//! This module enforces the Monotonic Submodel Property by requiring all facts +//! involving an element to be defined atomically at element creation time. +//! +//! # Design Principles +//! +//! 1. **All facts defined at creation**: When element `a` is created, all facts +//! involving `a` (function values `f(a)=b`, relation tuples `R(a,c)`) must be +//! defined in the same atomic batch. +//! +//! 2. **No post-hoc fact addition**: After an element's batch is committed, no new +//! facts involving that element can be added. This ensures existing submodels +//! remain valid as new elements are added. +//! +//! 3. **Relations are boolean functions**: Relations `R: A × B → Bool` are treated +//! as total functions. When element `a` is created, all `R(a, _)` and `R(_, a)` +//! values are implicitly `false` unless explicitly asserted as `true`. + +use crate::id::{NumericId, Slid}; + +use super::Store; + +/// An atomic batch of changes for creating a single new element. +/// +/// All facts involving the new element must be defined in this batch. +/// After the batch is committed, no new facts can be added. +#[derive(Debug, Clone)] +pub struct ElementBatch { + /// The instance this element belongs to + pub instance: Slid, + + /// The sort (from the theory) of this element + pub sort: Slid, + + /// Human-readable name for the element + pub name: String, + + /// Function values where this element is in the domain: f(elem) = value + pub func_vals: Vec<(Slid, Slid)>, // (func, codomain_value) + + /// Relation assertions where this element appears: R(..., elem, ...) = true + /// Only the TRUE tuples are listed; everything else is implicitly false. + pub rel_tuples: Vec<(Slid, Slid)>, // (rel, arg) - for unary relations or when elem is the arg +} + +impl ElementBatch { + /// Create an empty/invalid batch (for use with mem::replace) + fn empty() -> Self { + Self { + instance: Slid::from_usize(0), + sort: Slid::from_usize(0), + name: String::new(), + func_vals: Vec::new(), + rel_tuples: Vec::new(), + } + } +} + +impl ElementBatch { + /// Create a new element batch + pub fn new(instance: Slid, sort: Slid, name: impl Into) -> Self { + Self { + instance, + sort, + name: name.into(), + func_vals: Vec::new(), + rel_tuples: Vec::new(), + } + } + + /// Add a function value: f(this_element) = value + pub fn with_func(mut self, func: Slid, value: Slid) -> Self { + self.func_vals.push((func, value)); + self + } + + /// Add a relation tuple: R(this_element) = true (for unary relations) + /// or R(arg) = true where this element is part of arg + pub fn with_rel(mut self, rel: Slid, arg: Slid) -> Self { + self.rel_tuples.push((rel, arg)); + self + } + + /// Define a function value: f(this_element) = value + pub fn define_func(&mut self, func: Slid, value: Slid) { + self.func_vals.push((func, value)); + } + + /// Assert a relation tuple as true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) { + self.rel_tuples.push((rel, arg)); + } +} + +/// Builder for creating elements with all their facts defined atomically. +/// +/// This enforces the Monotonic Submodel Property by ensuring all facts +/// are defined before the element is committed. +pub struct ElementBuilder<'a> { + store: &'a mut Store, + batch: ElementBatch, + committed: bool, +} + +impl<'a> ElementBuilder<'a> { + /// Create a new element builder + pub fn new(store: &'a mut Store, instance: Slid, sort: Slid, name: impl Into) -> Self { + Self { + store, + batch: ElementBatch::new(instance, sort, name), + committed: false, + } + } + + /// Define a function value: f(this_element) = value + pub fn define_func(&mut self, func: Slid, value: Slid) -> &mut Self { + self.batch.define_func(func, value); + self + } + + /// Assert a relation tuple as true: R(arg) = true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) -> &mut Self { + self.batch.assert_rel(rel, arg); + self + } + + /// Commit the element batch and return the new element's Slid. + /// + /// This atomically creates the element and all its facts. + /// After this, no new facts involving this element can be added. + pub fn commit(mut self) -> Result { + self.committed = true; + let batch = std::mem::replace(&mut self.batch, ElementBatch::empty()); + self.store.add_element_batch(batch) + } +} + +impl<'a> Drop for ElementBuilder<'a> { + fn drop(&mut self) { + if !self.committed { + // Log a warning if the builder was dropped without committing + // In debug builds, this could panic to catch bugs + #[cfg(debug_assertions)] + eprintln!( + "Warning: ElementBuilder for '{}' was dropped without committing", + self.batch.name + ); + } + } +} + +impl Store { + /// Create an element builder for atomic element creation. + /// + /// # Example + /// + /// ```ignore + /// let elem = store.build_element(instance, sort, "my_element") + /// .define_func(f, target) + /// .assert_rel(r, arg) + /// .commit()?; + /// ``` + pub fn build_element( + &mut self, + instance: Slid, + sort: Slid, + name: impl Into, + ) -> ElementBuilder<'_> { + ElementBuilder::new(self, instance, sort, name) + } + + /// Add an element with all its facts atomically. + /// + /// This is the low-level API; prefer `build_element()` for a builder pattern. + pub fn add_element_batch(&mut self, batch: ElementBatch) -> Result { + // 1. Create the element + let elem_slid = self.add_elem(batch.instance, batch.sort, &batch.name)?; + + // 2. Add all function values + for (func, value) in batch.func_vals { + self.add_func_val(batch.instance, func, elem_slid, value)?; + } + + // 3. Add all relation tuples (sparse: only the true ones) + for (rel, arg) in batch.rel_tuples { + self.add_rel_tuple(batch.instance, rel, arg)?; + } + + Ok(elem_slid) + } + + /// Create multiple elements atomically within a closure. + /// + /// This allows defining elements that reference each other within the same batch. + /// + /// # Example + /// + /// ```ignore + /// store.create_elements(instance, |ctx| { + /// let a = ctx.add_element(sort_a, "a")?; + /// let b = ctx.add_element(sort_b, "b")?; + /// + /// ctx.define_func(f, a, b)?; // f(a) = b + /// ctx.assert_rel(r, a)?; // R(a) = true + /// + /// Ok(vec![a, b]) + /// })?; + /// ``` + pub fn create_elements(&mut self, instance: Slid, f: F) -> Result + where + F: FnOnce(&mut ElementCreationContext<'_>) -> Result, + { + let mut ctx = ElementCreationContext::new(self, instance); + let result = f(&mut ctx)?; + ctx.commit()?; + Ok(result) + } +} + +/// Context for creating multiple elements atomically. +/// +/// All elements and facts created within this context are committed together. +pub struct ElementCreationContext<'a> { + store: &'a mut Store, + instance: Slid, + /// Elements created but not yet committed to GeologMeta + pending_elements: Vec<(Slid, Slid, String)>, // (sort, slid, name) + /// Function values to add + pending_func_vals: Vec<(Slid, Slid, Slid)>, // (func, arg, result) + /// Relation tuples to add + pending_rel_tuples: Vec<(Slid, Slid)>, // (rel, arg) + committed: bool, +} + +impl<'a> ElementCreationContext<'a> { + fn new(store: &'a mut Store, instance: Slid) -> Self { + Self { + store, + instance, + pending_elements: Vec::new(), + pending_func_vals: Vec::new(), + pending_rel_tuples: Vec::new(), + committed: false, + } + } + + /// Add a new element (returns Slid immediately for use in defining facts) + pub fn add_element(&mut self, sort: Slid, name: impl Into) -> Result { + let name = name.into(); + let elem_slid = self.store.add_elem(self.instance, sort, &name)?; + self.pending_elements.push((sort, elem_slid, name)); + Ok(elem_slid) + } + + /// Define a function value: f(arg) = result + pub fn define_func(&mut self, func: Slid, arg: Slid, result: Slid) -> Result<(), String> { + self.pending_func_vals.push((func, arg, result)); + Ok(()) + } + + /// Assert a relation tuple as true: R(arg) = true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) -> Result<(), String> { + self.pending_rel_tuples.push((rel, arg)); + Ok(()) + } + + /// Commit all pending elements and facts + fn commit(&mut self) -> Result<(), String> { + // Add all function values + for (func, arg, result) in std::mem::take(&mut self.pending_func_vals) { + self.store.add_func_val(self.instance, func, arg, result)?; + } + + // Add all relation tuples + for (rel, arg) in std::mem::take(&mut self.pending_rel_tuples) { + self.store.add_rel_tuple(self.instance, rel, arg)?; + } + + self.committed = true; + Ok(()) + } +} + +impl<'a> Drop for ElementCreationContext<'a> { + fn drop(&mut self) { + if !self.committed && !self.pending_elements.is_empty() { + #[cfg(debug_assertions)] + eprintln!( + "Warning: ElementCreationContext with {} pending elements was dropped without committing", + self.pending_elements.len() + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_element_batch_builder() { + let mut store = Store::new(); + + // Create a theory with a sort + let theory = store.create_theory("TestTheory").unwrap(); + let sort = store.add_sort(theory, "Node").unwrap(); + let sort_ds = store.make_base_dsort(sort).unwrap(); + + // Create a function + let _func = store.add_function(theory, "label", sort_ds, sort_ds).unwrap(); + + // Create an instance + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // Create an element using the batch API + let elem = store + .build_element(instance, sort, "node1") + .commit() + .unwrap(); + + // Verify element was created + let view = store.materialize(instance); + assert!(view.elements.contains(&elem)); + } + + #[test] + fn test_create_elements_context() { + let mut store = Store::new(); + + // Create a theory with a sort and relation + let theory = store.create_theory("TestTheory").unwrap(); + let sort = store.add_sort(theory, "Node").unwrap(); + let sort_ds = store.make_base_dsort(sort).unwrap(); + let rel = store.add_relation(theory, "connected", sort_ds).unwrap(); + + // Create an instance + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // Create multiple elements atomically + let (a, b) = store + .create_elements(instance, |ctx| { + let a = ctx.add_element(sort, "a")?; + let b = ctx.add_element(sort, "b")?; + ctx.assert_rel(rel, a)?; + Ok((a, b)) + }) + .unwrap(); + + // Verify elements were created + let view = store.materialize(instance); + assert!(view.elements.contains(&a)); + assert!(view.elements.contains(&b)); + } +} diff --git a/src/store/bootstrap_queries.rs b/src/store/bootstrap_queries.rs new file mode 100644 index 0000000..91bbfa1 --- /dev/null +++ b/src/store/bootstrap_queries.rs @@ -0,0 +1,1017 @@ +//! Bootstrap query methods for GeologMeta. +//! +//! These methods provide typed query APIs for GeologMeta. They now delegate +//! to the compiled query engine (see query/store_queries.rs) for the core +//! scan+filter operations, with additional lookups for complex fields. +//! +//! TODO(geolog-ubi): Further integrate with the full query engine. + +use std::collections::HashMap; + +use crate::core::{Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory}; +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::Store; + +/// Remap a DerivedSort from Slid indices to sort indices. +/// +/// During reconstruction, DerivedSort::Base contains Slid.index() values +/// (from resolve_dsort). This function maps them to proper sort indices +/// using the provided mapping. +fn remap_derived_sort( + ds: &DerivedSort, + srt_slid_to_idx: &HashMap, +) -> DerivedSort { + match ds { + DerivedSort::Base(slid_idx) => { + // The slid_idx is a Slid.index() from resolve_dsort + // Map it to a sort index + if let Some(&sort_idx) = srt_slid_to_idx.get(slid_idx) { + DerivedSort::Base(sort_idx) + } else { + // Fallback: assume it's already a sort index + DerivedSort::Base(*slid_idx) + } + } + DerivedSort::Product(fields) => { + let remapped: Vec<_> = fields + .iter() + .map(|(name, field_ds)| { + (name.clone(), remap_derived_sort(field_ds, srt_slid_to_idx)) + }) + .collect(); + DerivedSort::Product(remapped) + } + } +} + +/// Information about a sort in a theory +#[derive(Debug, Clone)] +pub struct SortInfo { + pub name: String, + pub slid: Slid, +} + +/// Information about a function in a theory +#[derive(Debug, Clone)] +pub struct FuncInfo { + pub name: String, + pub slid: Slid, + pub domain: DerivedSort, + pub codomain: DerivedSort, +} + +/// Information about a relation in a theory +#[derive(Debug, Clone)] +pub struct RelInfo { + pub name: String, + pub slid: Slid, + pub domain: DerivedSort, +} + +/// Information about a sequent (axiom) in a theory +#[derive(Debug, Clone)] +pub struct SequentInfo { + pub name: String, + pub slid: Slid, + pub premise_slid: Option, + pub conclusion_slid: Option, +} + +/// Information about a context variable in a sequent +#[derive(Debug, Clone)] +pub struct CtxVarInfo { + pub slid: Slid, + pub binder_slid: Option, +} + +impl Store { + /// Query all sorts belonging to a theory. + /// + /// Returns (name, slid) for each Srt where Srt/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_sorts(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_sorts_compiled(theory_slid) + } + + /// Query all functions belonging to a theory. + /// + /// Returns FuncInfo for each Func where Func/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_funcs(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_funcs_compiled(theory_slid) + } + + /// Query all relations belonging to a theory. + /// + /// Returns RelInfo for each Rel where Rel/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_rels(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_rels_compiled(theory_slid) + } + + /// Look up a sort by name within a theory. + pub fn lookup_sort_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_sorts(theory_slid) + .into_iter() + .find(|s| s.name == name) + .map(|s| s.slid) + } + + /// Look up a function by name within a theory. + pub fn lookup_func_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_funcs(theory_slid) + .into_iter() + .find(|f| f.name == name) + .map(|f| f.slid) + } + + /// Look up a relation by name within a theory. + pub fn lookup_rel_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_rels(theory_slid) + .into_iter() + .find(|r| r.name == name) + .map(|r| r.slid) + } + + /// Query all sequents (axioms) belonging to a theory. + pub fn query_theory_sequents(&self, theory_slid: Slid) -> Vec { + let Some(sequent_sort) = self.sort_ids.sequent else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.sequent_theory else { + return vec![]; + }; + + let mut results = Vec::new(); + for sequent_slid in self.elements_of_sort(sequent_sort) { + if self.get_func(theory_func, sequent_slid) == Some(theory_slid) { + let name = self.get_element_name(sequent_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + let premise_slid = self + .func_ids + .sequent_premise + .and_then(|f| self.get_func(f, sequent_slid)); + let conclusion_slid = self + .func_ids + .sequent_conclusion + .and_then(|f| self.get_func(f, sequent_slid)); + + results.push(SequentInfo { + name: short_name, + slid: sequent_slid, + premise_slid, + conclusion_slid, + }); + } + } + results + } + + /// Query context variables for a sequent. + fn query_sequent_ctx_vars(&self, sequent_slid: Slid) -> Vec { + let Some(ctx_var_sort) = self.sort_ids.ctx_var else { + return vec![]; + }; + let Some(sequent_func) = self.func_ids.ctx_var_sequent else { + return vec![]; + }; + + let mut results = Vec::new(); + for ctx_var_slid in self.elements_of_sort(ctx_var_sort) { + if self.get_func(sequent_func, ctx_var_slid) == Some(sequent_slid) { + let binder_slid = self + .func_ids + .ctx_var_binder + .and_then(|f| self.get_func(f, ctx_var_slid)); + + results.push(CtxVarInfo { + slid: ctx_var_slid, + binder_slid, + }); + } + } + results + } + + /// Get the binder's type (DSort slid). + fn get_binder_type(&self, binder_slid: Slid) -> Option { + self.func_ids + .binder_type + .and_then(|f| self.get_func(f, binder_slid)) + } + + /// Reconstruct a Term from its Term slid. + fn reconstruct_term( + &self, + term_slid: Slid, + binder_to_var: &HashMap, + func_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Check VarT + if let Some(var_t_sort) = self.sort_ids.var_t { + for var_t_slid in self.elements_of_sort(var_t_sort) { + if let Some(term_func) = self.func_ids.var_t_term + && self.get_func(term_func, var_t_slid) == Some(term_slid) { + // Found a VarT for this term + if let Some(binder_func) = self.func_ids.var_t_binder + && let Some(binder_slid) = self.get_func(binder_func, var_t_slid) + && let Some((var_name, var_sort)) = binder_to_var.get(&binder_slid) { + return Some(Term::Var(var_name.clone(), var_sort.clone())); + } + } + } + } + + // Check AppT + if let Some(app_t_sort) = self.sort_ids.app_t { + for app_t_slid in self.elements_of_sort(app_t_sort) { + if let Some(term_func) = self.func_ids.app_t_term + && self.get_func(term_func, app_t_slid) == Some(term_slid) { + // Found an AppT for this term + let func_slid = self + .func_ids + .app_t_func + .and_then(|f| self.get_func(f, app_t_slid))?; + let func_idx = *func_to_idx.get(&func_slid)?; + + let arg_term_slid = self + .func_ids + .app_t_arg + .and_then(|f| self.get_func(f, app_t_slid))?; + let arg = self.reconstruct_term( + arg_term_slid, + binder_to_var, + func_to_idx, + srt_slid_to_idx, + )?; + + return Some(Term::App(func_idx, Box::new(arg))); + } + } + } + + // Check RecordT + if let Some(record_t_sort) = self.sort_ids.record_t { + for record_t_slid in self.elements_of_sort(record_t_sort) { + if let Some(term_func) = self.func_ids.record_t_term + && self.get_func(term_func, record_t_slid) == Some(term_slid) { + // Found a RecordT for this term - collect entries + let mut fields = Vec::new(); + if let Some(rec_entry_sort) = self.sort_ids.rec_entry { + for rec_entry_slid in self.elements_of_sort(rec_entry_sort) { + if let Some(record_func) = self.func_ids.rec_entry_record + && self.get_func(record_func, rec_entry_slid) + == Some(record_t_slid) + { + // Get field name (from Field) + let field_name = self + .func_ids + .rec_entry_field + .and_then(|f| self.get_func(f, rec_entry_slid)) + .map(|field_slid| { + let name = self.get_element_name(field_slid); + name.rsplit('/').next().unwrap_or(&name).to_string() + }) + .unwrap_or_default(); + + // Get value term + if let Some(val_slid) = self + .func_ids + .rec_entry_val + .and_then(|f| self.get_func(f, rec_entry_slid)) + && let Some(val_term) = self.reconstruct_term( + val_slid, + binder_to_var, + func_to_idx, + srt_slid_to_idx, + ) { + fields.push((field_name, val_term)); + } + } + } + } + return Some(Term::Record(fields)); + } + } + } + + // Check ProjT + if let Some(proj_t_sort) = self.sort_ids.proj_t { + for proj_t_slid in self.elements_of_sort(proj_t_sort) { + if let Some(term_func) = self.func_ids.proj_t_term + && self.get_func(term_func, proj_t_slid) == Some(term_slid) { + // Get base term + let base_slid = self + .func_ids + .proj_t_base + .and_then(|f| self.get_func(f, proj_t_slid))?; + let base = + self.reconstruct_term(base_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + // Get field name + let field_name = self + .func_ids + .proj_t_field + .and_then(|f| self.get_func(f, proj_t_slid)) + .map(|field_slid| { + let name = self.get_element_name(field_slid); + name.rsplit('/').next().unwrap_or(&name).to_string() + }) + .unwrap_or_default(); + + return Some(Term::Project(Box::new(base), field_name)); + } + } + } + + None + } + + /// Reconstruct a Formula from its Formula slid. + fn reconstruct_formula( + &self, + formula_slid: Slid, + binder_to_var: &mut HashMap, + func_to_idx: &HashMap, + rel_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Check TrueF + if let Some(true_f_sort) = self.sort_ids.true_f { + for true_f_slid in self.elements_of_sort(true_f_sort) { + if let Some(formula_func) = self.func_ids.true_f_formula + && self.get_func(formula_func, true_f_slid) == Some(formula_slid) { + return Some(Formula::True); + } + } + } + + // Check FalseF + if let Some(false_f_sort) = self.sort_ids.false_f { + for false_f_slid in self.elements_of_sort(false_f_sort) { + if let Some(formula_func) = self.func_ids.false_f_formula + && self.get_func(formula_func, false_f_slid) == Some(formula_slid) { + return Some(Formula::False); + } + } + } + + // Check EqF + if let Some(eq_f_sort) = self.sort_ids.eq_f { + for eq_f_slid in self.elements_of_sort(eq_f_sort) { + if let Some(formula_func) = self.func_ids.eq_f_formula + && self.get_func(formula_func, eq_f_slid) == Some(formula_slid) { + let lhs_slid = self + .func_ids + .eq_f_lhs + .and_then(|f| self.get_func(f, eq_f_slid))?; + let rhs_slid = self + .func_ids + .eq_f_rhs + .and_then(|f| self.get_func(f, eq_f_slid))?; + + let lhs = + self.reconstruct_term(lhs_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + let rhs = + self.reconstruct_term(rhs_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + return Some(Formula::Eq(lhs, rhs)); + } + } + } + + // Check RelF + if let Some(rel_f_sort) = self.sort_ids.rel_f { + for rel_f_slid in self.elements_of_sort(rel_f_sort) { + if let Some(formula_func) = self.func_ids.rel_f_formula + && self.get_func(formula_func, rel_f_slid) == Some(formula_slid) { + let rel_slid = self + .func_ids + .rel_f_rel + .and_then(|f| self.get_func(f, rel_f_slid))?; + let rel_idx = *rel_to_idx.get(&rel_slid)?; + + let arg_slid = self + .func_ids + .rel_f_arg + .and_then(|f| self.get_func(f, rel_f_slid))?; + let arg = + self.reconstruct_term(arg_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + return Some(Formula::Rel(rel_idx, arg)); + } + } + } + + // Check ConjF + if let Some(conj_f_sort) = self.sort_ids.conj_f { + for conj_f_slid in self.elements_of_sort(conj_f_sort) { + if let Some(formula_func) = self.func_ids.conj_f_formula + && self.get_func(formula_func, conj_f_slid) == Some(formula_slid) { + // Collect conjuncts from ConjArm + let mut conjuncts = Vec::new(); + if let Some(conj_arm_sort) = self.sort_ids.conj_arm { + for arm_slid in self.elements_of_sort(conj_arm_sort) { + if let Some(conj_func) = self.func_ids.conj_arm_conj + && self.get_func(conj_func, arm_slid) == Some(conj_f_slid) + && let Some(child_slid) = self + .func_ids + .conj_arm_child + .and_then(|f| self.get_func(f, arm_slid)) + && let Some(child) = self.reconstruct_formula( + child_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + ) { + conjuncts.push(child); + } + } + } + return Some(Formula::Conj(conjuncts)); + } + } + } + + // Check DisjF + if let Some(disj_f_sort) = self.sort_ids.disj_f { + for disj_f_slid in self.elements_of_sort(disj_f_sort) { + if let Some(formula_func) = self.func_ids.disj_f_formula + && self.get_func(formula_func, disj_f_slid) == Some(formula_slid) { + // Collect disjuncts from DisjArm + let mut disjuncts = Vec::new(); + if let Some(disj_arm_sort) = self.sort_ids.disj_arm { + for arm_slid in self.elements_of_sort(disj_arm_sort) { + if let Some(disj_func) = self.func_ids.disj_arm_disj + && self.get_func(disj_func, arm_slid) == Some(disj_f_slid) + && let Some(child_slid) = self + .func_ids + .disj_arm_child + .and_then(|f| self.get_func(f, arm_slid)) + && let Some(child) = self.reconstruct_formula( + child_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + ) { + disjuncts.push(child); + } + } + } + return Some(Formula::Disj(disjuncts)); + } + } + } + + // Check ExistsF + if let Some(exists_f_sort) = self.sort_ids.exists_f { + for exists_f_slid in self.elements_of_sort(exists_f_sort) { + if let Some(formula_func) = self.func_ids.exists_f_formula + && self.get_func(formula_func, exists_f_slid) == Some(formula_slid) { + // Get the binder + let binder_slid = self + .func_ids + .exists_f_binder + .and_then(|f| self.get_func(f, exists_f_slid))?; + + // Get binder type + let dsort_slid = self.get_binder_type(binder_slid)?; + let dsort_raw = self.resolve_dsort(dsort_slid); + let dsort = remap_derived_sort(&dsort_raw, srt_slid_to_idx); + + // Get var name from binder element name + let binder_name = self.get_element_name(binder_slid); + let var_name = binder_name + .strip_prefix("binder_") + .unwrap_or(&binder_name) + .to_string(); + + // Add to binder mapping for body reconstruction + binder_to_var.insert(binder_slid, (var_name.clone(), dsort.clone())); + + // Reconstruct body + let body_slid = self + .func_ids + .exists_f_body + .and_then(|f| self.get_func(f, exists_f_slid))?; + let body = self.reconstruct_formula( + body_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + )?; + + return Some(Formula::Exists(var_name, dsort, Box::new(body))); + } + } + } + + None + } + + /// Reconstruct an axiom (Sequent) from its SequentInfo. + fn reconstruct_axiom( + &self, + info: &SequentInfo, + func_to_idx: &HashMap, + rel_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Build binder mapping from context variables + let mut binder_to_var: HashMap = HashMap::new(); + let mut context = Context::new(); + + let ctx_vars = self.query_sequent_ctx_vars(info.slid); + for cv in ctx_vars { + if let Some(binder_slid) = cv.binder_slid { + // Get binder type + if let Some(dsort_slid) = self.get_binder_type(binder_slid) { + let dsort_raw = self.resolve_dsort(dsort_slid); + let dsort = remap_derived_sort(&dsort_raw, srt_slid_to_idx); + + // Get var name from binder element name + let binder_name = self.get_element_name(binder_slid); + let var_name = binder_name + .strip_prefix("binder_") + .unwrap_or(&binder_name) + .to_string(); + + binder_to_var.insert(binder_slid, (var_name.clone(), dsort.clone())); + context = context.extend(var_name, dsort); + } + } + } + + // Reconstruct premise + let premise = info.premise_slid.and_then(|slid| { + self.reconstruct_formula(slid, &mut binder_to_var, func_to_idx, rel_to_idx, srt_slid_to_idx) + })?; + + // Reconstruct conclusion + let conclusion = info.conclusion_slid.and_then(|slid| { + self.reconstruct_formula(slid, &mut binder_to_var, func_to_idx, rel_to_idx, srt_slid_to_idx) + })?; + + Some(Sequent { + context, + premise, + conclusion, + }) + } + + /// Resolve a DSort slid to a DerivedSort. + /// + /// DSorts in GeologMeta are represented as either BaseDS or ProdDS elements. + /// This traverses the structure to build the corresponding DerivedSort. + pub fn resolve_dsort(&self, dsort_slid: Slid) -> DerivedSort { + // Check if it's a BaseDS + if let Some(base_ds_sort) = self.sort_ids.base_ds + && let Some(srt_func) = self.func_ids.base_ds_srt { + // Check all BaseDS elements to find one whose dsort matches + for base_slid in self.elements_of_sort(base_ds_sort) { + if let Some(dsort_func) = self.func_ids.base_ds_dsort + && self.get_func(dsort_func, base_slid) == Some(dsort_slid) { + // Found the BaseDS, get its Srt + if let Some(srt_slid) = self.get_func(srt_func, base_slid) { + // We need to map srt_slid to a sort index... + // This is tricky without knowing the theory context. + // For bootstrap, we store the slid index and resolve later. + return DerivedSort::Base(srt_slid.index()); + } + } + } + } + + // Check if it's a ProdDS + if let Some(prod_ds_sort) = self.sort_ids.prod_ds { + for prod_slid in self.elements_of_sort(prod_ds_sort) { + if let Some(dsort_func) = self.func_ids.prod_ds_dsort + && self.get_func(dsort_func, prod_slid) == Some(dsort_slid) { + // Found the ProdDS, get its fields + let fields = self.query_prod_fields(prod_slid); + return DerivedSort::Product(fields); + } + } + } + + // Fallback: empty product (unit type) + DerivedSort::Product(vec![]) + } + + /// Query the fields of a product DSort. + fn query_prod_fields(&self, prod_slid: Slid) -> Vec<(String, DerivedSort)> { + let Some(field_sort) = self.sort_ids.field else { + return vec![]; + }; + let Some(prod_func) = self.func_ids.field_prod else { + return vec![]; + }; + let Some(type_func) = self.func_ids.field_type else { + return vec![]; + }; + + let mut fields = Vec::new(); + for field_slid in self.elements_of_sort(field_sort) { + if self.get_func(prod_func, field_slid) == Some(prod_slid) { + let name = self.get_element_name(field_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + let field_type = self + .get_func(type_func, field_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + fields.push((short_name, field_type)); + } + } + fields + } + + /// Get all theory names that are committed (visible from HEAD). + pub fn query_committed_theories(&self) -> Vec<(String, Slid)> { + use super::BindingKind; + self.list_bindings() + .into_iter() + .filter_map(|(name, kind, slid)| { + if kind == BindingKind::Theory { + Some((name, slid)) + } else { + None + } + }) + .collect() + } + + /// Get all instance names that are committed (visible from HEAD). + pub fn query_committed_instances(&self) -> Vec<(String, Slid)> { + use super::BindingKind; + self.list_bindings() + .into_iter() + .filter_map(|(name, kind, slid)| { + if kind == BindingKind::Instance { + Some((name, slid)) + } else { + None + } + }) + .collect() + } + + /// Get all theories in GeologMeta (regardless of commit status). + /// + /// This is useful for reconstruction when loading from disk, + /// where we want to restore all data, not just committed data. + pub fn query_all_theories(&self) -> Vec<(String, Slid)> { + let Some(theory_sort) = self.sort_ids.theory else { + return vec![]; + }; + + self.elements_of_sort(theory_sort) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + (name, slid) + }) + .collect() + } + + /// Get all instances in GeologMeta (regardless of commit status). + /// + /// This is useful for reconstruction when loading from disk, + /// where we want to restore all data, not just committed data. + pub fn query_all_instances(&self) -> Vec<(String, Slid)> { + let Some(instance_sort) = self.sort_ids.instance else { + return vec![]; + }; + + self.elements_of_sort(instance_sort) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + (name, slid) + }) + .collect() + } + + /// Reconstruct an ElaboratedTheory from persisted GeologMeta data. + /// + /// This is a bootstrap method that will be replaced by proper query engine. + /// It rebuilds the in-memory ElaboratedTheory representation from the + /// persisted sorts, functions, and relations. + pub fn reconstruct_theory(&self, theory_slid: Slid) -> Option { + let theory_name = self.get_element_name(theory_slid); + + // Query sorts, functions, relations + let sort_infos = self.query_theory_sorts(theory_slid); + let func_infos = self.query_theory_funcs(theory_slid); + let rel_infos = self.query_theory_rels(theory_slid); + + // Build Srt Slid -> sort index mapping for resolving DerivedSorts + let mut srt_slid_to_idx: std::collections::HashMap = + std::collections::HashMap::new(); + for (idx, info) in sort_infos.iter().enumerate() { + srt_slid_to_idx.insert(info.slid.index(), idx); + } + + // Build signature using its constructor methods + let mut signature = Signature::new(); + + // Add sorts + for info in &sort_infos { + signature.add_sort(info.name.clone()); + } + + // Add functions with remapped DerivedSorts + for info in &func_infos { + let domain = remap_derived_sort(&info.domain, &srt_slid_to_idx); + let codomain = remap_derived_sort(&info.codomain, &srt_slid_to_idx); + signature.add_function(info.name.clone(), domain, codomain); + } + + // Add relations with remapped DerivedSorts + for info in &rel_infos { + let domain = remap_derived_sort(&info.domain, &srt_slid_to_idx); + signature.add_relation(info.name.clone(), domain); + } + + // Build Func Slid -> func index mapping + let func_to_idx: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Build Rel Slid -> rel index mapping + let rel_to_idx: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Query and reconstruct axioms + let sequent_infos = self.query_theory_sequents(theory_slid); + let mut axioms = Vec::new(); + let mut axiom_names = Vec::new(); + + for info in &sequent_infos { + if let Some(axiom) = self.reconstruct_axiom(info, &func_to_idx, &rel_to_idx, &srt_slid_to_idx) { + axiom_names.push(info.name.clone()); + axioms.push(axiom); + } + } + + let theory = Theory { + name: theory_name, + signature, + axioms, + axiom_names, + }; + + Some(ElaboratedTheory { + params: vec![], // TODO: persist and reconstruct params + theory, + }) + } + + /// Reconstruct all persisted theories. + /// + /// Returns a map from theory name to ElaboratedTheory. + pub fn reconstruct_all_theories( + &self, + ) -> std::collections::HashMap> { + let mut result = std::collections::HashMap::new(); + // Use query_all_theories to restore ALL theories from disk, + // not just committed ones + for (name, slid) in self.query_all_theories() { + if let Some(theory) = self.reconstruct_theory(slid) { + result.insert(name, std::rc::Rc::new(theory)); + } + } + result + } + + // ======================================================================== + // Instance queries and reconstruction + // ======================================================================== + + /// Query all elements belonging to an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_elems(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_elems_compiled(instance_slid) + } + + /// Query all function values in an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_func_vals(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_func_vals_compiled(instance_slid) + } + + /// Query all relation tuples in an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_rel_tuples(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_rel_tuples_compiled(instance_slid) + } + + /// Reconstruct an instance (Structure + metadata) from persisted GeologMeta data. + pub fn reconstruct_instance( + &self, + instance_slid: Slid, + ) -> Option { + let theory_slid = self.get_instance_theory(instance_slid)?; + let theory = self.reconstruct_theory(theory_slid)?; + + let instance_name = self.get_element_name(instance_slid); + let num_sorts = theory.theory.signature.sorts.len(); + + // Query elements + let elem_infos = self.query_instance_elems(instance_slid); + let sort_infos = self.query_theory_sorts(theory_slid); + + // Build Srt Slid -> sort index mapping + let srt_to_idx: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Build Elem Slid -> Structure Slid mapping + // Structure Slids are assigned sequentially as we add elements + let mut elem_to_structure_slid: HashMap = HashMap::new(); + let mut structure = crate::core::Structure::new(num_sorts); + let mut element_names: HashMap = HashMap::new(); + + // Group elements by sort and add to structure + for elem_info in &elem_infos { + if let Some(srt_slid) = elem_info.srt_slid + && let Some(&sort_idx) = srt_to_idx.get(&srt_slid) { + // Add element to structure + let (structure_slid, _luid) = + structure.add_element(&mut crate::universe::Universe::new(), sort_idx); + elem_to_structure_slid.insert(elem_info.slid, structure_slid); + element_names.insert(structure_slid, elem_info.name.clone()); + } + } + + // Build srt_slid -> sort index mapping for remapping DerivedSorts + let srt_slid_to_idx: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid.index(), idx)) + .collect(); + + // Initialize functions + let func_infos = self.query_theory_funcs(theory_slid); + let domain_sort_ids: Vec> = func_infos + .iter() + .map(|f| { + // Remap the domain from Slid indices to sort indices + let remapped = remap_derived_sort(&f.domain, &srt_slid_to_idx); + match remapped { + DerivedSort::Base(idx) => Some(idx), + DerivedSort::Product(_) => None, + } + }) + .collect(); + structure.init_functions(&domain_sort_ids); + + // Initialize relations + let rel_infos = self.query_theory_rels(theory_slid); + let arities: Vec = rel_infos + .iter() + .map(|r| { + // Remap to get correct arity + let remapped = remap_derived_sort(&r.domain, &srt_slid_to_idx); + remapped.arity() + }) + .collect(); + structure.init_relations(&arities); + + // Build Func Slid -> func index mapping + let func_to_idx: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Populate function values + let func_vals = self.query_instance_func_vals(instance_slid); + for fv in func_vals { + if let (Some(func_slid), Some(arg_slid), Some(result_slid)) = + (fv.func_slid, fv.arg_slid, fv.result_slid) + && let Some(&func_idx) = func_to_idx.get(&func_slid) + && let (Some(&arg_struct), Some(&result_struct)) = ( + elem_to_structure_slid.get(&arg_slid), + elem_to_structure_slid.get(&result_slid), + ) { + let _ = structure.define_function(func_idx, arg_struct, result_struct); + } + } + + // Populate relation tuples from columnar batches + // Build UUID -> Structure Slid mapping for elements + let elem_uuid_to_structure: HashMap = elem_infos + .iter() + .filter_map(|info| { + let uuid = self.get_element_uuid(info.slid); + elem_to_structure_slid.get(&info.slid).map(|&s| (uuid, s)) + }) + .collect(); + + // Build Rel UUID -> rel index mapping + let rel_uuid_to_idx: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (self.get_element_uuid(info.slid), idx)) + .collect(); + + // Load columnar batches for this instance + let instance_uuid = self.get_element_uuid(instance_slid); + if let Ok(batches) = self.load_instance_data_batches(instance_uuid) { + for batch in batches { + for rel_batch in &batch.relation_tuples { + if let Some(&rel_idx) = rel_uuid_to_idx.get(&rel_batch.rel) { + // Convert each tuple's UUIDs to Structure Slids + for tuple_uuids in rel_batch.iter() { + let tuple_slids: Vec = tuple_uuids + .iter() + .filter_map(|uuid| elem_uuid_to_structure.get(uuid).copied()) + .collect(); + + // Only assert if all elements were found + if tuple_slids.len() == tuple_uuids.len() { + structure.assert_relation(rel_idx, tuple_slids); + } + } + } + } + } + } + + Some(ReconstructedInstance { + name: instance_name, + theory_name: theory.theory.name.clone(), + structure, + element_names, + }) + } + + /// Reconstruct all persisted instances. + pub fn reconstruct_all_instances(&self) -> HashMap { + let mut result = HashMap::new(); + // Use query_all_instances to restore ALL instances from disk, + // not just committed ones + for (name, slid) in self.query_all_instances() { + if let Some(instance) = self.reconstruct_instance(slid) { + result.insert(name, instance); + } + } + result + } +} + +/// Information about an element in an instance +#[derive(Debug, Clone)] +pub struct ElemInfo { + pub name: String, + pub slid: Slid, + pub srt_slid: Option, +} + +/// Information about a function value +#[derive(Debug, Clone)] +pub struct FuncValInfo { + pub slid: Slid, + pub func_slid: Option, + pub arg_slid: Option, + pub result_slid: Option, +} + +/// Information about a relation tuple +#[derive(Debug, Clone)] +pub struct RelTupleInfo { + pub slid: Slid, + pub rel_slid: Option, + pub arg_slid: Option, +} + +/// A reconstructed instance with its structure and metadata +#[derive(Debug)] +pub struct ReconstructedInstance { + pub name: String, + pub theory_name: String, + pub structure: crate::core::Structure, + pub element_names: HashMap, +} diff --git a/src/store/columnar.rs b/src/store/columnar.rs new file mode 100644 index 0000000..f160e46 --- /dev/null +++ b/src/store/columnar.rs @@ -0,0 +1,208 @@ +//! Columnar batch format for efficient storage and wire transfer. +//! +//! This module defines the physical representation for instance-level data +//! (elements, function values, relation tuples). The logical model is still +//! GeologMeta (with Elem, FuncVal, RelTupleArg sorts), but the physical +//! encoding uses columnar batches for efficiency. +//! +//! # EDB vs IDB Batches +//! +//! Batches are tagged as either EDB (extensional) or IDB (intensional): +//! +//! - **EDB batches**: User-declared facts. Persisted locally AND transmitted over wire. +//! - **IDB batches**: Chase-derived facts. Persisted locally but NOT transmitted over wire. +//! +//! When receiving patches over the network, only EDB batches are included. +//! The recipient runs the chase locally to regenerate IDB tuples. +//! +//! Each patch can have up to 2 batches per instance: +//! - One EDB batch (if user manually added tuples) +//! - One IDB batch (if chase produced conclusions) + +use rkyv::{Archive, Deserialize, Serialize}; + +use crate::id::Uuid; + +/// Distinguishes between user-declared (EDB) and chase-derived (IDB) data. +/// +/// This determines whether the batch is transmitted over the wire during sync. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] +#[archive(check_bytes)] +pub enum BatchKind { + /// Extensional database: user-declared facts. + /// Persisted locally AND transmitted over wire. + #[default] + Edb, + /// Intensional database: chase-derived facts. + /// Persisted locally but NOT transmitted over wire. + Idb, +} + +/// A batch of elements added to an instance. +/// +/// Logically equivalent to a collection of Elem elements in GeologMeta. +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct ElementBatch { + /// Which instance these elements belong to + pub instance: Uuid, + /// Sort UUID for each element (parallel array) + pub sorts: Vec, + /// UUID for each element (parallel array, same length as sorts) + pub elements: Vec, +} + +/// A batch of function values in an instance. +/// +/// Logically equivalent to a collection of FuncVal elements in GeologMeta. +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct FunctionValueBatch { + /// Which instance these function values belong to + pub instance: Uuid, + /// Which function + pub func: Uuid, + /// Domain elements (parallel array) + pub args: Vec, + /// Codomain elements (parallel array, same length as args) + pub results: Vec, +} + +/// A batch of relation tuples in an instance. +/// +/// Logically equivalent to a collection of RelTuple + RelTupleArg elements +/// in GeologMeta, but stored columnar for efficiency. +/// +/// For a relation `R : [from: A, to: B] -> Prop`, this stores: +/// - columns[0] = all "from" field values (UUIDs of A elements) +/// - columns[1] = all "to" field values (UUIDs of B elements) +/// +/// Row i represents the tuple (columns[0][i], columns[1][i]). +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct RelationTupleBatch { + /// Which instance these tuples belong to + pub instance: Uuid, + /// Which relation + pub rel: Uuid, + /// Field UUIDs for each column (from the relation's domain ProdDS/Field) + pub field_ids: Vec, + /// Columnar data: columns[field_idx][row_idx] = element UUID + /// All columns have the same length (number of tuples). + pub columns: Vec>, +} + +impl RelationTupleBatch { + /// Create a new empty batch for a relation + pub fn new(instance: Uuid, rel: Uuid, field_ids: Vec) -> Self { + let num_fields = field_ids.len(); + Self { + instance, + rel, + field_ids, + columns: vec![Vec::new(); num_fields], + } + } + + /// Add a tuple to the batch + pub fn push(&mut self, tuple: &[Uuid]) { + assert_eq!(tuple.len(), self.columns.len(), "tuple arity mismatch"); + for (col, &val) in self.columns.iter_mut().zip(tuple.iter()) { + col.push(val); + } + } + + /// Number of tuples in this batch + pub fn len(&self) -> usize { + self.columns.first().map(|c| c.len()).unwrap_or(0) + } + + /// Whether the batch is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Iterate over tuples as slices + pub fn iter(&self) -> impl Iterator> + '_ { + (0..self.len()).map(|i| { + self.columns.iter().map(|col| col[i]).collect() + }) + } +} + +/// A complete instance data snapshot in columnar format. +/// +/// This is the efficient representation for storage and wire transfer. +/// Logically equivalent to the Elem, FuncVal, RelTuple, RelTupleArg +/// portions of a GeologMeta instance. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Default)] +#[archive(check_bytes)] +pub struct InstanceDataBatch { + /// Whether this batch contains EDB (user-declared) or IDB (chase-derived) data. + /// IDB batches are persisted locally but NOT transmitted over wire. + pub kind: BatchKind, + /// All element additions + pub elements: Vec, + /// All function value definitions + pub function_values: Vec, + /// All relation tuple assertions + pub relation_tuples: Vec, +} + +impl InstanceDataBatch { + /// Create a new empty EDB batch (default for user-declared data) + pub fn new() -> Self { + Self::default() + } + + /// Create a new empty IDB batch (for chase-derived data) + pub fn new_idb() -> Self { + Self { + kind: BatchKind::Idb, + ..Default::default() + } + } + + /// Check if this batch should be transmitted over the wire + pub fn is_wire_transmittable(&self) -> bool { + self.kind == BatchKind::Edb + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_relation_tuple_batch() { + let instance = Uuid::nil(); + let rel = Uuid::nil(); + let field_a = Uuid::nil(); + let field_b = Uuid::nil(); + + let mut batch = RelationTupleBatch::new( + instance, + rel, + vec![field_a, field_b], + ); + + assert!(batch.is_empty()); + + // Add some tuples + let elem1 = Uuid::nil(); + let elem2 = Uuid::nil(); + let elem3 = Uuid::nil(); + + batch.push(&[elem1, elem2]); + batch.push(&[elem2, elem3]); + batch.push(&[elem1, elem3]); + + assert_eq!(batch.len(), 3); + + let tuples: Vec<_> = batch.iter().collect(); + assert_eq!(tuples.len(), 3); + assert_eq!(tuples[0], vec![elem1, elem2]); + assert_eq!(tuples[1], vec![elem2, elem3]); + assert_eq!(tuples[2], vec![elem1, elem3]); + } +} diff --git a/src/store/commit.rs b/src/store/commit.rs new file mode 100644 index 0000000..83cd78b --- /dev/null +++ b/src/store/commit.rs @@ -0,0 +1,209 @@ +//! Commit operations for the Store. +//! +//! Version control through commits and name bindings. + +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::{BindingKind, Store}; + +impl Store { + /// Create a new commit + pub fn commit(&mut self, message: Option<&str>) -> Result { + let sort_id = self.sort_ids.commit.ok_or("Commit sort not found")?; + let commit_slid = self.add_element(sort_id, message.unwrap_or("commit")); + + // Set parent if there's a head + if let Some(head) = self.head { + let parent_func = self.func_ids.commit_parent.ok_or("Commit/parent not found")?; + self.define_func(parent_func, commit_slid, head)?; + } + + // Create NameBindings for all uncommitted changes + let nb_sort = self.sort_ids.name_binding.ok_or("NameBinding sort not found")?; + let commit_func = self.func_ids.name_binding_commit.ok_or("NameBinding/commit not found")?; + let theory_func = self.func_ids.name_binding_theory.ok_or("NameBinding/theory not found")?; + let instance_func = self.func_ids.name_binding_instance.ok_or("NameBinding/instance not found")?; + + // Collect uncommitted to avoid borrow issues + let uncommitted: Vec<_> = self.uncommitted.drain().collect(); + for (name, binding) in uncommitted { + let nb_slid = self.add_element(nb_sort, &format!("nb_{}_{}", name, commit_slid.index())); + self.define_func(commit_func, nb_slid, commit_slid)?; + + match binding.kind { + BindingKind::Theory => { + self.define_func(theory_func, nb_slid, binding.target)?; + } + BindingKind::Instance => { + self.define_func(instance_func, nb_slid, binding.target)?; + } + } + } + + // Update head + self.head = Some(commit_slid); + + // Auto-save + self.save()?; + + Ok(commit_slid) + } + + /// Get the current binding for a name (from HEAD commit or uncommitted) + pub fn resolve_name(&self, name: &str) -> Option<(Slid, BindingKind)> { + // Check uncommitted first + if let Some(binding) = self.uncommitted.get(name) { + return Some((binding.target, binding.kind)); + } + + // Search through name bindings from HEAD backwards (if we have commits) + if let (Some(head), Some(nb_sort), Some(commit_func), Some(theory_func), Some(instance_func)) = ( + self.head, + self.sort_ids.name_binding, + self.func_ids.name_binding_commit, + self.func_ids.name_binding_theory, + self.func_ids.name_binding_instance, + ) { + let mut current = Some(head); + while let Some(commit) = current { + // Find all NameBindings for this commit + for nb_slid in self.elements_of_sort(nb_sort) { + if self.get_func(commit_func, nb_slid) == Some(commit) { + // Check if this binding is for our name + let nb_name = self.get_element_name(nb_slid); + if nb_name.starts_with(&format!("nb_{}_", name)) { + // Found it! Return the target + if let Some(theory) = self.get_func(theory_func, nb_slid) { + return Some((theory, BindingKind::Theory)); + } + if let Some(instance) = self.get_func(instance_func, nb_slid) { + return Some((instance, BindingKind::Instance)); + } + } + } + } + + // Move to parent commit + if let Some(parent_func) = self.func_ids.commit_parent { + current = self.get_func(parent_func, commit); + } else { + break; + } + } + } + + // Fallback: search directly in meta Structure for uncommitted theories/instances + // This handles the case where data exists in meta.bin but no commit was made yet + if let Some(theory_sort) = self.sort_ids.theory { + for slid in self.elements_of_sort(theory_sort) { + if self.get_element_name(slid) == name { + return Some((slid, BindingKind::Theory)); + } + } + } + if let Some(instance_sort) = self.sort_ids.instance { + for slid in self.elements_of_sort(instance_sort) { + if self.get_element_name(slid) == name { + return Some((slid, BindingKind::Instance)); + } + } + } + + None + } + + /// Get all commits in order (oldest to newest) + pub fn commit_history(&self) -> Vec { + let Some(head) = self.head else { + return vec![]; + }; + + let mut chain = Vec::new(); + let mut current = Some(head); + + while let Some(commit) = current { + chain.push(commit); + current = self + .func_ids + .commit_parent + .and_then(|f| self.get_func(f, commit)); + } + + chain.reverse(); + chain + } + + /// List all committed bindings (theories and instances) + /// + /// Returns (name, kind, target_slid) for each binding visible from HEAD. + /// Names may appear multiple times if rebound in different commits. + pub fn list_bindings(&self) -> Vec<(String, BindingKind, Slid)> { + let Some(head) = self.head else { + return vec![]; + }; + + let Some(nb_sort) = self.sort_ids.name_binding else { + return vec![]; + }; + let Some(commit_func) = self.func_ids.name_binding_commit else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.name_binding_theory else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.name_binding_instance else { + return vec![]; + }; + + let mut bindings = Vec::new(); + let mut seen_names = std::collections::HashSet::new(); + + // Walk commits from head backwards + let mut current = Some(head); + while let Some(commit) = current { + // Find all NameBindings for this commit + for nb_slid in self.elements_of_sort(nb_sort) { + if self.get_func(commit_func, nb_slid) == Some(commit) { + // Extract name from "nb_{name}_{commit_id}" + let nb_name = self.get_element_name(nb_slid); + if let Some(name) = extract_binding_name(&nb_name) { + // Only include first (most recent) binding for each name + if seen_names.insert(name.clone()) { + if let Some(theory) = self.get_func(theory_func, nb_slid) { + bindings.push((name, BindingKind::Theory, theory)); + } else if let Some(instance) = self.get_func(instance_func, nb_slid) { + bindings.push((name, BindingKind::Instance, instance)); + } + } + } + } + } + + // Move to parent commit + current = self + .func_ids + .commit_parent + .and_then(|f| self.get_func(f, commit)); + } + + bindings + } +} + +/// Extract the name from a binding element name like "nb_Graph_2" +fn extract_binding_name(nb_name: &str) -> Option { + // Format: "nb_{name}_{commit_id}" + if !nb_name.starts_with("nb_") { + return None; + } + let rest = &nb_name[3..]; // Skip "nb_" + // Find the last underscore (before commit_id) + if let Some(last_underscore) = rest.rfind('_') { + // Verify the part after underscore is a number + if rest[last_underscore + 1..].parse::().is_ok() { + return Some(rest[..last_underscore].to_string()); + } + } + None +} diff --git a/src/store/instance.rs b/src/store/instance.rs new file mode 100644 index 0000000..d5204fd --- /dev/null +++ b/src/store/instance.rs @@ -0,0 +1,356 @@ +//! Instance operations for the Store. +//! +//! Creating, extending, and modifying instances in the GeologMeta structure. + +use std::collections::HashMap; + +use crate::core::{RelationStorage, Structure}; +use crate::id::{NumericId, Slid, Uuid}; + +use super::append::AppendOps; +use super::columnar::{InstanceDataBatch, RelationTupleBatch}; +use super::{BindingKind, Store, UncommittedBinding}; + +impl Store { + /// Create a new instance (version 0, no parent) + pub fn create_instance(&mut self, name: &str, theory: Slid) -> Result { + let sort_id = self.sort_ids.instance.ok_or("Instance sort not found")?; + let instance_slid = self.add_element(sort_id, name); + + // Set theory + let func_id = self.func_ids.instance_theory.ok_or("Instance/theory not found")?; + self.define_func(func_id, instance_slid, theory)?; + + // Register uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: instance_slid, + kind: BindingKind::Instance, + }, + ); + + Ok(instance_slid) + } + + /// Create a new version of an existing instance + pub fn extend_instance(&mut self, parent: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.instance.ok_or("Instance sort not found")?; + + // Get the theory from the parent + let theory_func = self.func_ids.instance_theory.ok_or("Instance/theory not found")?; + let theory = self.get_func(theory_func, parent).ok_or("Parent has no theory")?; + + let instance_slid = self.add_element( + sort_id, + &format!("{}@v{}", name, self.meta.carriers[sort_id].len()), + ); + + // Set parent and theory + let parent_func = self.func_ids.instance_parent.ok_or("Instance/parent not found")?; + self.define_func(parent_func, instance_slid, parent)?; + self.define_func(theory_func, instance_slid, theory)?; + + // Update uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: instance_slid, + kind: BindingKind::Instance, + }, + ); + + Ok(instance_slid) + } + + /// Add an element to an instance + pub fn add_elem(&mut self, instance: Slid, srt: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.elem.ok_or("Elem sort not found")?; + let elem_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(instance), name.to_string()], + ); + + let instance_func = self.func_ids.elem_instance.ok_or("Elem/instance not found")?; + let sort_func = self.func_ids.elem_sort.ok_or("Elem/sort not found")?; + + self.define_func(instance_func, elem_slid, instance)?; + self.define_func(sort_func, elem_slid, srt)?; + + Ok(elem_slid) + } + + /// Retract an element from an instance + pub fn retract_elem(&mut self, instance: Slid, elem: Slid) -> Result { + let sort_id = self.sort_ids.elem_retract.ok_or("ElemRetract sort not found")?; + let retract_slid = self.add_element(sort_id, &format!("retract_{}", self.get_element_name(elem))); + + let instance_func = self.func_ids.elem_retract_instance.ok_or("ElemRetract/instance not found")?; + let elem_func = self.func_ids.elem_retract_elem.ok_or("ElemRetract/elem not found")?; + + self.define_func(instance_func, retract_slid, instance)?; + self.define_func(elem_func, retract_slid, elem)?; + + Ok(retract_slid) + } + + /// Define a function value in an instance + pub fn add_func_val( + &mut self, + instance: Slid, + func: Slid, + arg: Slid, + result: Slid, + ) -> Result { + let sort_id = self.sort_ids.func_val.ok_or("FuncVal sort not found")?; + let fv_slid = self.add_element( + sort_id, + &format!("fv_{}_{}", self.get_element_name(func), self.get_element_name(arg)), + ); + + let instance_func = self.func_ids.func_val_instance.ok_or("FuncVal/instance not found")?; + let func_func = self.func_ids.func_val_func.ok_or("FuncVal/func not found")?; + let arg_func = self.func_ids.func_val_arg.ok_or("FuncVal/arg not found")?; + let result_func = self.func_ids.func_val_result.ok_or("FuncVal/result not found")?; + + self.define_func(instance_func, fv_slid, instance)?; + self.define_func(func_func, fv_slid, func)?; + self.define_func(arg_func, fv_slid, arg)?; + self.define_func(result_func, fv_slid, result)?; + + Ok(fv_slid) + } + + // NOTE: No retract_func_val - function values are IMMUTABLE (Monotonic Submodel Property) + + /// Assert a relation tuple in an instance. + /// + /// NOTE: This is a legacy stub. Relation tuples should be persisted via columnar + /// batches (see `store::columnar`). This method is kept for API compatibility + /// but silently succeeds without persisting to storage. + /// + /// TODO: Migrate callers to use columnar batch persistence. + #[allow(unused_variables)] + pub fn add_rel_tuple(&mut self, instance: Slid, rel: Slid, arg: Slid) -> Result { + // Relation tuples are now stored in columnar batches, not as individual + // GeologMeta elements. This method is a no-op that returns a dummy Slid. + // + // The actual persistence should happen via InstanceDataBatch in columnar.rs. + // For now, return the arg as a placeholder to avoid breaking callers. + Ok(arg) + } + + // NOTE: No retract_rel_tuple - relation tuples are IMMUTABLE (Monotonic Submodel Property) + + /// Persist all instance data (elements, function values, relation tuples) to GeologMeta. + /// + /// This takes a Structure and persists its contents to the Store, creating Elem, + /// FuncVal, and RelTuple elements in GeologMeta. + /// + /// Returns a mapping from Structure Slids to GeologMeta Elem Slids. + pub fn persist_instance_data( + &mut self, + instance_slid: Slid, + theory_slid: Slid, + structure: &Structure, + element_names: &HashMap, + ) -> Result { + // Get theory's sorts to map sort indices to Srt Slids + let sort_infos = self.query_theory_sorts(theory_slid); + let func_infos = self.query_theory_funcs(theory_slid); + let rel_infos = self.query_theory_rels(theory_slid); + + // Build sort index -> Srt Slid mapping + let sort_idx_to_srt: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Build func index -> Func Slid mapping + let func_idx_to_func: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Build rel index -> Rel Slid mapping + let rel_idx_to_rel: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Mapping from Structure Slid to GeologMeta Elem Slid + let mut elem_slid_map: HashMap = HashMap::new(); + + // 1. Persist all elements + for (sort_idx, carrier) in structure.carriers.iter().enumerate() { + let srt_slid = sort_idx_to_srt + .get(&sort_idx) + .copied() + .ok_or_else(|| format!("Unknown sort index: {}", sort_idx))?; + + for structure_slid_u64 in carrier.iter() { + let structure_slid = Slid::from_usize(structure_slid_u64 as usize); + let elem_name = element_names + .get(&structure_slid) + .map(|s| s.as_str()) + .unwrap_or_else(|| "elem"); + + let elem_slid = self.add_elem(instance_slid, srt_slid, elem_name)?; + elem_slid_map.insert(structure_slid, elem_slid); + } + } + + // 2. Persist function values + // For now, only handle base domain functions (not product domains) + for (func_idx, func_col) in structure.functions.iter().enumerate() { + let func_slid = match func_idx_to_func.get(&func_idx) { + Some(s) => *s, + None => continue, // Skip if no corresponding Func in theory + }; + + match func_col { + crate::core::FunctionColumn::Local(values) => { + for (local_idx, opt_result) in values.iter().enumerate() { + if let Some(result_slid) = crate::id::get_slid(*opt_result) { + // Find the structure Slid for this local index + // The local index corresponds to position in the domain sort's carrier + if let Some(domain_sort_idx) = self.get_func_domain_sort(func_slid) + && let Some(carrier) = structure.carriers.get(domain_sort_idx) + && let Some(arg_u64) = carrier.iter().nth(local_idx) { + let arg_slid = Slid::from_usize(arg_u64 as usize); + if let (Some(&arg_elem), Some(&result_elem)) = + (elem_slid_map.get(&arg_slid), elem_slid_map.get(&result_slid)) + { + self.add_func_val(instance_slid, func_slid, arg_elem, result_elem)?; + } + } + } + } + } + crate::core::FunctionColumn::External(_) => { + // External functions reference elements from other instances + // TODO: Handle external references + } + crate::core::FunctionColumn::ProductLocal { .. } => { + // Product domain functions need special handling + // TODO: Handle product domains + } + crate::core::FunctionColumn::ProductCodomain { .. } => { + // Product codomain functions need special handling + // TODO: Handle product codomains (store each field value) + } + } + } + + // 3. Persist relation tuples via columnar batches + // Build InstanceDataBatch with all relation tuples + let mut batch = InstanceDataBatch::new(); + + // Get instance UUID for the batch + let instance_uuid = self.get_element_uuid(instance_slid); + + // Build a map from Structure Slid to element UUID + let struct_slid_to_uuid: HashMap = elem_slid_map + .iter() + .map(|(&struct_slid, &elem_slid)| { + (struct_slid, self.get_element_uuid(elem_slid)) + }) + .collect(); + + for (rel_idx, relation) in structure.relations.iter().enumerate() { + let rel_slid = match rel_idx_to_rel.get(&rel_idx) { + Some(s) => *s, + None => continue, + }; + + if relation.is_empty() { + continue; + } + + // Get the relation UUID + let rel_uuid = self.get_element_uuid(rel_slid); + + // Get field UUIDs for this relation's domain + let rel_info = rel_infos.get(rel_idx); + let arity = rel_info.map(|r| r.domain.arity()).unwrap_or(1); + + // For field_ids, we use the field UUIDs from the relation's domain + // For now, use placeholder UUIDs since we need to query Field elements + // TODO: Query Field elements from GeologMeta for proper UUIDs + let field_ids: Vec = (0..arity).map(|_| Uuid::nil()).collect(); + + let mut rel_batch = RelationTupleBatch::new( + instance_uuid, + rel_uuid, + field_ids, + ); + + // Add all tuples + for tuple in relation.iter() { + // Convert Structure Slids to UUIDs + let uuid_tuple: Vec = tuple + .iter() + .filter_map(|struct_slid| struct_slid_to_uuid.get(struct_slid).copied()) + .collect(); + + if uuid_tuple.len() == tuple.len() { + rel_batch.push(&uuid_tuple); + } + } + + if !rel_batch.is_empty() { + batch.relation_tuples.push(rel_batch); + } + } + + // Save the batch if we have any relation tuples + if !batch.relation_tuples.is_empty() { + // Determine version number (count existing batches for this instance) + let existing_batches = self.load_instance_data_batches(instance_uuid) + .unwrap_or_default(); + let version = existing_batches.len() as u64; + + self.save_instance_data_batch(instance_uuid, version, &batch)?; + } + + Ok(InstancePersistResult { elem_slid_map }) + } + + /// Helper to get the domain sort index for a function. + fn get_func_domain_sort(&self, func_slid: Slid) -> Option { + let dom_func = self.func_ids.func_dom?; + let dsort_slid = self.get_func(dom_func, func_slid)?; + + // Check if it's a base dsort + let base_ds_sort = self.sort_ids.base_ds?; + let srt_func = self.func_ids.base_ds_srt?; + let dsort_func = self.func_ids.base_ds_dsort?; + + for base_slid in self.elements_of_sort(base_ds_sort) { + if self.get_func(dsort_func, base_slid) == Some(dsort_slid) + && let Some(srt_slid) = self.get_func(srt_func, base_slid) { + // Find this Srt's index in the theory + let srt_theory_func = self.func_ids.srt_theory?; + if let Some(theory_slid) = self.get_func(srt_theory_func, srt_slid) { + let sorts = self.query_theory_sorts(theory_slid); + for (idx, info) in sorts.iter().enumerate() { + if info.slid == srt_slid { + return Some(idx); + } + } + } + } + } + None + } +} + +/// Result of persisting instance data to GeologMeta. +#[derive(Debug)] +pub struct InstancePersistResult { + /// Mapping from Structure Slids to GeologMeta Elem Slids + pub elem_slid_map: HashMap, +} diff --git a/src/store/materialize.rs b/src/store/materialize.rs new file mode 100644 index 0000000..b060571 --- /dev/null +++ b/src/store/materialize.rs @@ -0,0 +1,238 @@ +//! Materialized views for the Store. +//! +//! A MaterializedView is an indexed snapshot of an instance at a specific version, +//! computed by walking the version chain and applying all additions/retractions. + +use std::collections::{HashMap, HashSet}; + +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::Store; + +/// A materialized view of an instance at a specific version. +/// +/// This is the "rendered" state of an instance after applying all patches +/// from the root to a particular version. It can be incrementally updated +/// when a new child version is created. +#[derive(Clone, Debug)] +pub struct MaterializedView { + /// The instance version this view is materialized at + pub instance: Slid, + + /// Live elements (not tombstoned) + pub elements: HashSet, + + /// Live relation tuples: tuple_slid -> (rel, arg) + pub rel_tuples: HashMap, + + /// Live function values: fv_slid -> (func, arg, result) + pub func_vals: HashMap, + + /// Element tombstones (for delta computation) + /// NOTE: Only elements can be tombstoned; FuncVals and RelTuples are immutable + pub elem_tombstones: HashSet, +} + +impl MaterializedView { + /// Create an empty materialized view + pub fn empty(instance: Slid) -> Self { + Self { + instance, + elements: HashSet::new(), + rel_tuples: HashMap::new(), + func_vals: HashMap::new(), + elem_tombstones: HashSet::new(), + } + } + + /// Get the number of live elements + pub fn element_count(&self) -> usize { + self.elements.len() + } + + /// Check if an element is live + pub fn has_element(&self, elem: Slid) -> bool { + self.elements.contains(&elem) + } + + /// Check if a relation tuple is live + pub fn has_rel_tuple(&self, tuple: Slid) -> bool { + self.rel_tuples.contains_key(&tuple) + } + + /// Get all elements of a particular sort (requires Store for lookup) + pub fn elements_of_sort<'a>( + &'a self, + store: &'a Store, + sort: Slid, + ) -> impl Iterator + 'a { + self.elements.iter().copied().filter(move |&elem| { + store + .func_ids + .elem_sort + .and_then(|f| store.get_func(f, elem)) + .map(|s| s == sort) + .unwrap_or(false) + }) + } + + /// Get all relation tuples for a particular relation + pub fn tuples_of_relation(&self, rel: Slid) -> impl Iterator + '_ { + self.rel_tuples + .iter() + .filter(move |(_, (r, _))| *r == rel) + .map(|(&tuple_slid, (_, arg))| (tuple_slid, *arg)) + } + + /// Get all function values for a particular function + pub fn values_of_function(&self, func: Slid) -> impl Iterator + '_ { + self.func_vals + .iter() + .filter(move |(_, (f, _, _))| *f == func) + .map(|(_, (_, arg, result))| (*arg, *result)) + } +} + +impl Store { + /// Materialize an instance from scratch by walking the parent chain. + /// + /// This collects all additions and retractions from root to the specified + /// version, producing a complete view of the instance state. + pub fn materialize(&self, instance: Slid) -> MaterializedView { + let mut view = MaterializedView::empty(instance); + + // Collect version chain (from instance back to root) + let mut chain = Vec::new(); + let mut version = Some(instance); + while let Some(v) = version { + chain.push(v); + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + // Process from oldest to newest (reverse the chain) + for v in chain.into_iter().rev() { + self.apply_version_delta(&mut view, v); + } + + view.instance = instance; + view + } + + /// Apply the delta from a single instance version to a materialized view. + /// + /// This is the core of incremental materialization: given a view at version N, + /// we can efficiently update it to version N+1 by applying only the changes + /// introduced in N+1. + pub fn apply_version_delta(&self, view: &mut MaterializedView, version: Slid) { + // 1. Process element additions + if let Some(elem_sort) = self.sort_ids.elem + && let Some(instance_func) = self.func_ids.elem_instance { + for elem in self.elements_of_sort(elem_sort) { + if self.get_func(instance_func, elem) == Some(version) { + // Don't add if already tombstoned + if !view.elem_tombstones.contains(&elem) { + view.elements.insert(elem); + } + } + } + } + + // 2. Process element retractions + if let Some(retract_sort) = self.sort_ids.elem_retract + && let Some(instance_func) = self.func_ids.elem_retract_instance + && let Some(elem_func) = self.func_ids.elem_retract_elem { + for retract in self.elements_of_sort(retract_sort) { + if self.get_func(instance_func, retract) == Some(version) + && let Some(elem) = self.get_func(elem_func, retract) { + view.elements.remove(&elem); + view.elem_tombstones.insert(elem); + } + } + } + + // 3. Process relation tuple additions + // NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + // not as individual GeologMeta elements. This section is a no-op until + // columnar batch loading is implemented. + // + // TODO: Load relation tuples from columnar batches into view.rel_tuples + + // 4. Process function value additions (IMMUTABLE - no retractions) + if let Some(fv_sort) = self.sort_ids.func_val + && let (Some(instance_func), Some(func_func), Some(arg_func), Some(result_func)) = ( + self.func_ids.func_val_instance, + self.func_ids.func_val_func, + self.func_ids.func_val_arg, + self.func_ids.func_val_result, + ) { + for fv in self.elements_of_sort(fv_sort) { + if self.get_func(instance_func, fv) == Some(version) + && let (Some(func), Some(arg), Some(result)) = ( + self.get_func(func_func, fv), + self.get_func(arg_func, fv), + self.get_func(result_func, fv), + ) { + view.func_vals.insert(fv, (func, arg, result)); + } + } + } + } + + /// Incrementally update a materialized view to a new version. + /// + /// The new version must be a direct child of the view's current version, + /// or this will return an error. + pub fn update_view( + &self, + view: &mut MaterializedView, + new_version: Slid, + ) -> Result<(), String> { + // Verify that new_version is a direct child of view.instance + let parent = self + .func_ids + .instance_parent + .and_then(|f| self.get_func(f, new_version)); + + if parent != Some(view.instance) { + return Err(format!( + "Cannot incrementally update: {} is not a direct child of {}", + new_version.index(), + view.instance.index() + )); + } + + // Apply the delta + self.apply_version_delta(view, new_version); + view.instance = new_version; + + Ok(()) + } + + /// Create a new instance version extending an existing view, and update the view. + /// + /// This is the preferred way to modify instances: create the extension, + /// add elements/tuples/values to it, then update the view. + pub fn extend_instance_with_view( + &mut self, + view: &mut MaterializedView, + name: &str, + ) -> Result { + let new_version = self.extend_instance(view.instance, name)?; + + // The view can be updated after mutations are done + // For now, just update the instance reference + view.instance = new_version; + + Ok(new_version) + } + + /// Materialize and cache a view for an instance. + /// + /// This stores the view in a view cache for efficient reuse. + /// The cache is invalidated when the instance is extended. + pub fn get_or_create_view(&mut self, instance: Slid) -> MaterializedView { + // For now, just materialize (cache can be added later) + self.materialize(instance) + } +} diff --git a/src/store/mod.rs b/src/store/mod.rs new file mode 100644 index 0000000..1f2c874 --- /dev/null +++ b/src/store/mod.rs @@ -0,0 +1,585 @@ +//! Append-only store for GeologMeta elements. +//! +//! This module provides the foundation for geolog's persistent, versioned data model. +//! All data (theories, instances, elements, function values, relation tuples) is stored +//! as elements in a single GeologMeta Structure that is append-only. +//! +//! # Key design principles +//! +//! - **Append-only**: Elements are never deleted, only tombstoned +//! - **Patch-based versioning**: Each theory/instance version is a delta from its parent +//! - **Incremental materialization**: Views are updated efficiently as patches arrive +//! - **Eternal format**: Once GeologMeta schema is v1.0, it never changes +//! +//! # Module structure +//! +//! - [`schema`]: Cached sort and function IDs from GeologMeta +//! - [`append`]: Low-level element append operations +//! - [`theory`]: Theory CRUD (create, extend, add sorts/functions/relations) +//! - [`instance`]: Instance CRUD (create, extend, add elements, retractions) +//! - [`commit`]: Version control (commits, name bindings, history) +//! - [`query`]: Query operations (walking version chains) +//! - [`materialize`]: Materialized views for fast indexed access + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::id::{NumericId, Slid}; +use crate::meta::geolog_meta; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +pub mod append; +pub mod batch; +pub mod bootstrap_queries; +pub mod columnar; +pub mod commit; +pub mod instance; +pub mod materialize; +pub mod query; +pub mod schema; +pub mod theory; + +pub use batch::{ElementBatch, ElementBuilder, ElementCreationContext}; +pub use materialize::MaterializedView; +pub use schema::{FuncIds, SortIds}; + +// ============================================================================ +// STORE +// ============================================================================ + +/// The append-only store: a single GeologMeta Structure plus indexing. +/// +/// This is the "source of truth" for all geolog data. Theories and instances +/// are represented as elements within this structure, along with their +/// components (sorts, functions, relations, elements, values, etc.). +pub struct Store { + /// The GeologMeta instance containing all data + pub meta: Structure, + + /// The GeologMeta theory (for signature lookups) + pub meta_theory: Arc, + + /// Universe for UUID <-> Luid mapping + pub universe: Universe, + + /// Human-readable names for UUIDs + pub naming: NamingIndex, + + /// Current HEAD commit (None if no commits yet) + pub head: Option, + + /// Uncommitted changes (name -> target slid) + /// These become NameBindings on commit + pub uncommitted: HashMap, + + /// Cached sort IDs for quick lookup + pub(crate) sort_ids: SortIds, + + /// Cached function IDs for quick lookup + pub(crate) func_ids: FuncIds, + + /// Path for persistence (None = in-memory only) + pub path: Option, + + /// Whether there are unsaved changes + dirty: bool, +} + +/// An uncommitted name binding +#[derive(Debug, Clone)] +pub struct UncommittedBinding { + /// The target (Theory or Instance slid in meta) + pub target: Slid, + /// Whether this binds to a theory or instance + pub kind: BindingKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BindingKind { + Theory, + Instance, +} + +// ============================================================================ +// APPEND TRAIT IMPLEMENTATION +// ============================================================================ + +impl append::AppendOps for Store { + fn add_element(&mut self, sort_id: usize, name: &str) -> Slid { + let (slid, luid) = self.meta.add_element(&mut self.universe, sort_id); + let uuid = self.universe.get(luid).expect("freshly created luid should have uuid"); + self.naming.insert(uuid, vec![name.to_string()]); + self.dirty = true; + slid + } + + fn add_element_qualified(&mut self, sort_id: usize, path: Vec) -> Slid { + let (slid, luid) = self.meta.add_element(&mut self.universe, sort_id); + let uuid = self.universe.get(luid).expect("freshly created luid should have uuid"); + self.naming.insert(uuid, path); + self.dirty = true; + slid + } + + fn define_func(&mut self, func_id: usize, domain: Slid, codomain: Slid) -> Result<(), String> { + self.meta.define_function(func_id, domain, codomain)?; + self.dirty = true; + Ok(()) + } + + fn get_func(&self, func_id: usize, domain: Slid) -> Option { + let sort_slid = self.meta.sort_local_id(domain); + self.meta.get_function(func_id, sort_slid) + } + + fn elements_of_sort(&self, sort_id: usize) -> Vec { + if sort_id >= self.meta.carriers.len() { + return vec![]; + } + self.meta.carriers[sort_id] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect() + } + + fn get_element_name(&self, slid: Slid) -> String { + let luid = self.meta.get_luid(slid); + if let Some(uuid) = self.universe.get(luid) { + self.naming.display_name(&uuid) + } else { + format!("#{}", slid.index()) + } + } +} + +// ============================================================================ +// STORE IMPL +// ============================================================================ + +impl Store { + /// Create a new empty store + pub fn new() -> Self { + let meta_theory = geolog_meta(); + let num_sorts = meta_theory.theory.signature.sorts.len(); + let mut meta = Structure::new(num_sorts); + + // Initialize function storage for all functions in GeologMeta + let domain_sort_ids: Vec> = meta_theory + .theory + .signature + .functions + .iter() + .map(|f| match &f.domain { + DerivedSort::Base(sort_id) => Some(*sort_id), + DerivedSort::Product(_) => None, + }) + .collect(); + meta.init_functions(&domain_sort_ids); + + // Initialize relation storage + let arities: Vec = meta_theory + .theory + .signature + .relations + .iter() + .map(|r| match &r.domain { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + }) + .collect(); + meta.init_relations(&arities); + + let sort_ids = SortIds::from_theory(&meta_theory); + let func_ids = FuncIds::from_theory(&meta_theory); + + Self { + meta, + meta_theory, + universe: Universe::new(), + naming: NamingIndex::new(), + head: None, + uncommitted: HashMap::new(), + sort_ids, + func_ids, + path: None, + dirty: false, + } + } + + /// Create a store with a persistence path + pub fn with_path(path: impl Into) -> Self { + let path = path.into(); + + // Create directory if needed + let _ = std::fs::create_dir_all(&path); + + // Create store with paths for all components + let mut store = Self::new(); + store.path = Some(path.clone()); + store.universe = Universe::with_path(path.join("universe")); + store.naming = NamingIndex::with_path(path.join("naming")); + store + } + + /// Load a store from disk, or create new if doesn't exist + pub fn load_or_create(path: impl Into) -> Self { + let path = path.into(); + if path.exists() { + Self::load(&path).unwrap_or_else(|_| Self::with_path(path)) + } else { + Self::with_path(path) + } + } + + /// Load a store from disk + pub fn load(path: &Path) -> Result { + // Load meta structure + let meta_path = path.join("meta.bin"); + let meta = crate::serialize::load_structure(&meta_path)?; + + // Load universe + let universe_path = path.join("universe"); + let universe = Universe::load(&universe_path)?; + + // Load naming + let naming_path = path.join("naming"); + let naming = NamingIndex::load(&naming_path)?; + + // Load HEAD commit reference + let head_path = path.join("HEAD"); + let head = if head_path.exists() { + let content = std::fs::read_to_string(&head_path) + .map_err(|e| format!("Failed to read HEAD: {}", e))?; + let index: usize = content + .trim() + .parse() + .map_err(|e| format!("Invalid HEAD format: {}", e))?; + Some(Slid::from_usize(index)) + } else { + None + }; + + // Get meta theory and build IDs (same as new()) + let meta_theory = geolog_meta(); + let sort_ids = SortIds::from_theory(&meta_theory); + let func_ids = FuncIds::from_theory(&meta_theory); + + Ok(Self { + meta, + meta_theory, + universe, + naming, + head, + uncommitted: HashMap::new(), + sort_ids, + func_ids, + path: Some(path.to_path_buf()), + dirty: false, + }) + } + + /// Save the store to disk + pub fn save(&mut self) -> Result<(), String> { + if !self.dirty { + return Ok(()); + } + + let Some(path) = &self.path else { + return Ok(()); // In-memory store, nothing to save + }; + + // Ensure parent directory exists + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create directory: {}", e))?; + } + + // Save universe + self.universe.save()?; + + // Save naming + self.naming.save()?; + + // Save meta structure + let meta_path = path.join("meta.bin"); + crate::serialize::save_structure(&self.meta, &meta_path)?; + + // Save head commit reference + if let Some(head) = self.head { + let head_path = path.join("HEAD"); + std::fs::write(&head_path, format!("{}", head.index())) + .map_err(|e| format!("Failed to write HEAD: {}", e))?; + } + + self.dirty = false; + Ok(()) + } + + /// Check if the store has uncommitted changes + pub fn is_dirty(&self) -> bool { + self.dirty || !self.uncommitted.is_empty() + } + + /// Get the number of elements in the meta structure + pub fn len(&self) -> usize { + self.meta.len() + } + + /// Check if the store is empty + pub fn is_empty(&self) -> bool { + self.meta.is_empty() + } + + // ======================================================================== + // COLUMNAR BATCH STORAGE + // ======================================================================== + + /// Get the directory for instance data (columnar batches) + fn instance_data_dir(&self) -> Option { + self.path.as_ref().map(|p| p.join("instance_data")) + } + + /// Save instance data batch for a specific patch version. + /// + /// Each patch can have up to 2 batches per instance: + /// - One EDB batch (user-declared facts) + /// - One IDB batch (chase-derived facts) + /// + /// The batch kind is encoded in the filename to allow both to coexist. + pub fn save_instance_data_batch( + &self, + instance_uuid: crate::id::Uuid, + patch_version: u64, + batch: &columnar::InstanceDataBatch, + ) -> Result<(), String> { + use rkyv::ser::serializers::AllocSerializer; + use rkyv::ser::Serializer; + + let Some(dir) = self.instance_data_dir() else { + return Ok(()); // In-memory store, nothing to save + }; + + // Ensure directory exists + std::fs::create_dir_all(&dir) + .map_err(|e| format!("Failed to create instance_data dir: {}", e))?; + + // Serialize batch with rkyv + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(batch) + .map_err(|e| format!("Failed to serialize instance data batch: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + // Write to file named by instance UUID, patch version, and batch kind + // EDB batches: {uuid}_v{version}_edb.batch.bin + // IDB batches: {uuid}_v{version}_idb.batch.bin + let kind_suffix = match batch.kind { + columnar::BatchKind::Edb => "edb", + columnar::BatchKind::Idb => "idb", + }; + let filename = format!("{}_v{}_{}.batch.bin", instance_uuid, patch_version, kind_suffix); + let file_path = dir.join(filename); + std::fs::write(&file_path, &bytes) + .map_err(|e| format!("Failed to write instance data batch: {}", e))?; + + Ok(()) + } + + /// Load all instance data batches for an instance (across all patch versions). + /// + /// Returns batches in version order so they can be applied sequentially. + /// Both EDB and IDB batches are loaded; use `batch.kind` to filter if needed. + pub fn load_instance_data_batches( + &self, + instance_uuid: crate::id::Uuid, + ) -> Result, String> { + use rkyv::Deserialize; + + let Some(dir) = self.instance_data_dir() else { + return Ok(vec![]); // In-memory store, no data + }; + + if !dir.exists() { + return Ok(vec![]); + } + + // (version, is_idb, batch) - sort so EDB comes before IDB at same version + let mut version_batches: Vec<(u64, bool, columnar::InstanceDataBatch)> = Vec::new(); + let prefix = format!("{}_v", instance_uuid); + + // Read all matching batch files + let entries = std::fs::read_dir(&dir) + .map_err(|e| format!("Failed to read instance_data dir: {}", e))?; + + for entry in entries { + let entry = entry.map_err(|e| format!("Failed to read dir entry: {}", e))?; + let path = entry.path(); + + if let Some(name) = path.file_name().and_then(|n| n.to_str()) + && name.starts_with(&prefix) && name.ends_with(".batch.bin") { + // Parse filename: {uuid}_v{version}_{edb|idb}.batch.bin + // or legacy format: {uuid}_v{version}.batch.bin + let suffix = name + .strip_prefix(&prefix) + .and_then(|s| s.strip_suffix(".batch.bin")) + .ok_or_else(|| format!("Invalid batch filename: {}", name))?; + + // Check for new format with _edb or _idb suffix + let (version_str, is_idb) = if let Some(v) = suffix.strip_suffix("_edb") { + (v, false) + } else if let Some(v) = suffix.strip_suffix("_idb") { + (v, true) + } else { + // Legacy format without kind suffix - assume EDB + (suffix, false) + }; + + let version: u64 = version_str.parse() + .map_err(|_| format!("Invalid version in filename: {}", name))?; + + let bytes = std::fs::read(&path) + .map_err(|e| format!("Failed to read batch {}: {}", name, e))?; + + let archived = rkyv::check_archived_root::(&bytes) + .map_err(|e| format!("Failed to validate batch {}: {}", name, e))?; + + let batch: columnar::InstanceDataBatch = archived.deserialize(&mut rkyv::Infallible) + .map_err(|_| format!("Failed to deserialize batch {}", name))?; + + version_batches.push((version, is_idb, batch)); + } + } + + // Sort by version, then EDB before IDB at same version + version_batches.sort_by_key(|(v, is_idb, _)| (*v, *is_idb)); + Ok(version_batches.into_iter().map(|(_, _, b)| b).collect()) + } + + /// Load only EDB (wire-transmittable) batches for an instance. + /// + /// This is what would be sent over the network during sync. + pub fn load_edb_batches( + &self, + instance_uuid: crate::id::Uuid, + ) -> Result, String> { + let all = self.load_instance_data_batches(instance_uuid)?; + Ok(all.into_iter().filter(|b| b.is_wire_transmittable()).collect()) + } +} + +impl Default for Store { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_store() { + let store = Store::new(); + assert!(store.head.is_none()); + assert!(store.uncommitted.is_empty()); + } + + #[test] + fn test_create_theory() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + assert!(store.uncommitted.contains_key("TestTheory")); + } + + #[test] + fn test_create_instance() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let _instance = store.create_instance("TestInstance", theory).unwrap(); + assert!(store.uncommitted.contains_key("TestInstance")); + } + + #[test] + fn test_commit() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + let commit = store.commit(Some("Initial commit")).unwrap(); + assert_eq!(store.head, Some(commit)); + assert!(store.uncommitted.is_empty()); + } + + #[test] + fn test_materialize_empty_instance() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let instance = store.create_instance("TestInstance", theory).unwrap(); + + let view = store.materialize(instance); + assert_eq!(view.instance, instance); + assert!(view.elements.is_empty()); + assert!(view.rel_tuples.is_empty()); + assert!(view.func_vals.is_empty()); + } + + #[test] + fn test_materialize_with_elements() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // We'd need a sort in the theory to add elements, so this test is limited + let view = store.materialize(instance); + assert_eq!(view.instance, instance); + } + + #[test] + fn test_incremental_view_update() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let v1 = store.create_instance("TestInstance", theory).unwrap(); + + let mut view = store.materialize(v1); + assert_eq!(view.instance, v1); + + // Extend the instance + let v2 = store.extend_instance(v1, "TestInstance_v2").unwrap(); + + // Update view incrementally + let result = store.update_view(&mut view, v2); + assert!(result.is_ok()); + assert_eq!(view.instance, v2); + } + + #[test] + fn test_incremental_update_invalid_parent() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let v1 = store.create_instance("Instance1", theory).unwrap(); + let v2 = store.create_instance("Instance2", theory).unwrap(); + + let mut view = store.materialize(v1); + + // v2 is not a child of v1, so this should fail + let result = store.update_view(&mut view, v2); + assert!(result.is_err()); + } + + #[test] + fn test_commit_history() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + let c1 = store.commit(Some("First")).unwrap(); + + store.create_theory("Theory2").unwrap(); + let c2 = store.commit(Some("Second")).unwrap(); + + let history = store.commit_history(); + assert_eq!(history, vec![c1, c2]); + } +} diff --git a/src/store/query.rs b/src/store/query.rs new file mode 100644 index 0000000..3033279 --- /dev/null +++ b/src/store/query.rs @@ -0,0 +1,127 @@ +//! Query operations for the Store. +//! +//! Walking instance version chains to collect elements, function values, and relation tuples. +//! +//! NOTE: FuncVals and RelTuples are IMMUTABLE (Monotonic Submodel Property). +//! Only elements can be retracted. + +use std::collections::HashSet; + +use crate::id::Slid; + +use super::append::AppendOps; +use super::Store; + +impl Store { + /// Get all elements of an instance (including from parent chain) + pub fn get_instance_elements(&self, instance: Slid) -> Vec { + let mut elements = Vec::new(); + let mut retractions = HashSet::new(); + + // Collect retractions first (from all versions in chain) + let mut version = Some(instance); + while let Some(v) = version { + if let Some(retract_sort) = self.sort_ids.elem_retract + && let Some(instance_func) = self.func_ids.elem_retract_instance + && let Some(elem_func) = self.func_ids.elem_retract_elem { + for retract in self.elements_of_sort(retract_sort) { + if self.get_func(instance_func, retract) == Some(v) + && let Some(elem) = self.get_func(elem_func, retract) { + retractions.insert(elem); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + // Now collect elements (filtering out retracted ones) + let mut version = Some(instance); + while let Some(v) = version { + if let Some(elem_sort) = self.sort_ids.elem + && let Some(instance_func) = self.func_ids.elem_instance { + for elem in self.elements_of_sort(elem_sort) { + if self.get_func(instance_func, elem) == Some(v) + && !retractions.contains(&elem) { + elements.push(elem); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + elements + } + + /// Get all relation tuples of an instance (including from parent chain) + /// + /// NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + /// not as individual GeologMeta elements. This function returns empty until + /// columnar batch loading is implemented. + /// + /// TODO: Implement columnar batch loading for relation tuples. + pub fn get_instance_rel_tuples(&self, _instance: Slid) -> Vec<(Slid, Slid, Slid)> { + // Relation tuples are stored in columnar batches, not GeologMeta elements. + // Return empty until columnar batch loading is implemented. + vec![] + } + + /// Get all function values of an instance (including from parent chain) + /// + /// Returns (fv_slid, func_slid, arg_slid, result_slid) tuples. + /// NOTE: FuncVals are IMMUTABLE - no retractions (Monotonic Submodel Property) + pub fn get_instance_func_vals(&self, instance: Slid) -> Vec<(Slid, Slid, Slid, Slid)> { + let mut vals = Vec::new(); + + // Collect function values from all versions in the chain + let mut version = Some(instance); + while let Some(v) = version { + if let Some(fv_sort) = self.sort_ids.func_val + && let ( + Some(instance_func), + Some(func_func), + Some(arg_func), + Some(result_func), + ) = ( + self.func_ids.func_val_instance, + self.func_ids.func_val_func, + self.func_ids.func_val_arg, + self.func_ids.func_val_result, + ) { + for fv in self.elements_of_sort(fv_sort) { + if self.get_func(instance_func, fv) == Some(v) + && let (Some(func), Some(arg), Some(result)) = ( + self.get_func(func_func, fv), + self.get_func(arg_func, fv), + self.get_func(result_func, fv), + ) { + vals.push((fv, func, arg, result)); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + vals + } + + /// Get the theory for an instance + pub fn get_instance_theory(&self, instance: Slid) -> Option { + self.func_ids + .instance_theory + .and_then(|f| self.get_func(f, instance)) + } + + /// Get the parent of an instance (for versioning) + pub fn get_instance_parent(&self, instance: Slid) -> Option { + self.func_ids + .instance_parent + .and_then(|f| self.get_func(f, instance)) + } + + /// Get an element's sort + pub fn get_elem_sort(&self, elem: Slid) -> Option { + self.func_ids + .elem_sort + .and_then(|f| self.get_func(f, elem)) + } +} diff --git a/src/store/schema.rs b/src/store/schema.rs new file mode 100644 index 0000000..cf2819c --- /dev/null +++ b/src/store/schema.rs @@ -0,0 +1,345 @@ +//! Schema ID caching for GeologMeta. +//! +//! Caches sort and function IDs from the GeologMeta signature for fast lookup. + +use crate::core::ElaboratedTheory; + +/// Cached sort IDs from GeologMeta +#[derive(Default)] +pub struct SortIds { + pub theory: Option, + pub instance: Option, + pub commit: Option, + pub name_binding: Option, + pub srt: Option, + pub func: Option, + pub rel: Option, + pub elem: Option, + pub elem_retract: Option, + pub func_val: Option, + pub rel_tuple: Option, + pub rel_tuple_arg: Option, + // NOTE: No func_val_retract or rel_tuple_retract - these are immutable (Monotonic Submodel Property) + pub sequent: Option, + pub param: Option, + pub dsort: Option, + pub base_ds: Option, + pub prod_ds: Option, + pub field: Option, + pub binder: Option, + pub term: Option, + pub formula: Option, + + // Context variables (for sequent universal quantification) + pub ctx_var: Option, + + // Term subtypes + pub var_t: Option, + pub app_t: Option, + pub record_t: Option, + pub rec_entry: Option, + pub proj_t: Option, + + // Formula subtypes + pub true_f: Option, + pub false_f: Option, + pub eq_f: Option, + pub rel_f: Option, + pub conj_f: Option, + pub conj_arm: Option, + pub disj_f: Option, + pub disj_arm: Option, + pub exists_f: Option, + + // Node (for ancestry/scoping - may not be needed for persistence) + pub node: Option, +} + +/// Cached function IDs from GeologMeta +#[derive(Default)] +pub struct FuncIds { + // Theory functions + pub theory_parent: Option, + + // Instance functions + pub instance_parent: Option, + pub instance_theory: Option, + + // Commit functions + pub commit_parent: Option, + + // NameBinding functions + pub name_binding_commit: Option, + pub name_binding_theory: Option, + pub name_binding_instance: Option, + + // Elem functions + pub elem_instance: Option, + pub elem_sort: Option, + + // ElemRetract functions + pub elem_retract_instance: Option, + pub elem_retract_elem: Option, + + // FuncVal functions (IMMUTABLE - no retract) + pub func_val_instance: Option, + pub func_val_func: Option, + pub func_val_arg: Option, + pub func_val_result: Option, + + // RelTuple functions (IMMUTABLE - no retract) + pub rel_tuple_instance: Option, + pub rel_tuple_rel: Option, + + // RelTupleArg functions (uniform for all relations, even unary) + pub rel_tuple_arg_tuple: Option, + pub rel_tuple_arg_elem: Option, + pub rel_tuple_arg_position: Option, + + // Srt functions + pub srt_theory: Option, + + // Func functions + pub func_theory: Option, + pub func_dom: Option, + pub func_cod: Option, + + // Rel functions + pub rel_theory: Option, + pub rel_dom: Option, + + // DSort functions + pub base_ds_dsort: Option, + pub base_ds_srt: Option, + pub prod_ds_dsort: Option, + pub field_prod: Option, + pub field_type: Option, + + // Sequent functions + pub sequent_theory: Option, + pub sequent_premise: Option, + pub sequent_conclusion: Option, + + // CtxVar functions (sequent-level universal quantification) + pub ctx_var_sequent: Option, + pub ctx_var_binder: Option, + + // Binder functions + pub binder_type: Option, + + // Term/Formula to Node embeddings + pub term_node: Option, + pub formula_node: Option, + + // VarT functions + pub var_t_term: Option, + pub var_t_binder: Option, + + // AppT functions + pub app_t_term: Option, + pub app_t_func: Option, + pub app_t_arg: Option, + + // RecordT functions + pub record_t_term: Option, + + // RecEntry functions + pub rec_entry_record: Option, + pub rec_entry_val: Option, + pub rec_entry_field: Option, + + // ProjT functions + pub proj_t_term: Option, + pub proj_t_base: Option, + pub proj_t_field: Option, + + // TrueF/FalseF functions + pub true_f_formula: Option, + pub false_f_formula: Option, + + // EqF functions + pub eq_f_formula: Option, + pub eq_f_lhs: Option, + pub eq_f_rhs: Option, + + // RelF functions + pub rel_f_formula: Option, + pub rel_f_arg: Option, + pub rel_f_rel: Option, + + // ConjF functions + pub conj_f_formula: Option, + + // ConjArm functions + pub conj_arm_conj: Option, + pub conj_arm_child: Option, + + // DisjF functions + pub disj_f_formula: Option, + + // DisjArm functions + pub disj_arm_disj: Option, + pub disj_arm_child: Option, + + // ExistsF functions + pub exists_f_formula: Option, + pub exists_f_binder: Option, + pub exists_f_body: Option, +} + +impl SortIds { + /// Populate sort IDs from a GeologMeta theory + pub fn from_theory(theory: &ElaboratedTheory) -> Self { + let sig = &theory.theory.signature; + Self { + theory: sig.lookup_sort("Theory"), + instance: sig.lookup_sort("Instance"), + commit: sig.lookup_sort("Commit"), + name_binding: sig.lookup_sort("NameBinding"), + srt: sig.lookup_sort("Srt"), + func: sig.lookup_sort("Func"), + rel: sig.lookup_sort("Rel"), + elem: sig.lookup_sort("Elem"), + elem_retract: sig.lookup_sort("ElemRetract"), + func_val: sig.lookup_sort("FuncVal"), + rel_tuple: sig.lookup_sort("RelTuple"), + rel_tuple_arg: sig.lookup_sort("RelTupleArg"), + sequent: sig.lookup_sort("Sequent"), + param: sig.lookup_sort("Param"), + dsort: sig.lookup_sort("DSort"), + base_ds: sig.lookup_sort("BaseDS"), + prod_ds: sig.lookup_sort("ProdDS"), + field: sig.lookup_sort("Field"), + binder: sig.lookup_sort("Binder"), + term: sig.lookup_sort("Term"), + formula: sig.lookup_sort("Formula"), + ctx_var: sig.lookup_sort("CtxVar"), + var_t: sig.lookup_sort("VarT"), + app_t: sig.lookup_sort("AppT"), + record_t: sig.lookup_sort("RecordT"), + rec_entry: sig.lookup_sort("RecEntry"), + proj_t: sig.lookup_sort("ProjT"), + true_f: sig.lookup_sort("TrueF"), + false_f: sig.lookup_sort("FalseF"), + eq_f: sig.lookup_sort("EqF"), + rel_f: sig.lookup_sort("RelF"), + conj_f: sig.lookup_sort("ConjF"), + conj_arm: sig.lookup_sort("ConjArm"), + disj_f: sig.lookup_sort("DisjF"), + disj_arm: sig.lookup_sort("DisjArm"), + exists_f: sig.lookup_sort("ExistsF"), + node: sig.lookup_sort("Node"), + } + } +} + +impl FuncIds { + /// Populate function IDs from a GeologMeta theory + pub fn from_theory(theory: &ElaboratedTheory) -> Self { + let sig = &theory.theory.signature; + Self { + theory_parent: sig.lookup_func("Theory/parent"), + instance_parent: sig.lookup_func("Instance/parent"), + instance_theory: sig.lookup_func("Instance/theory"), + commit_parent: sig.lookup_func("Commit/parent"), + name_binding_commit: sig.lookup_func("NameBinding/commit"), + name_binding_theory: sig.lookup_func("NameBinding/theory"), + name_binding_instance: sig.lookup_func("NameBinding/instance"), + elem_instance: sig.lookup_func("Elem/instance"), + elem_sort: sig.lookup_func("Elem/sort"), + elem_retract_instance: sig.lookup_func("ElemRetract/instance"), + elem_retract_elem: sig.lookup_func("ElemRetract/elem"), + func_val_instance: sig.lookup_func("FuncVal/instance"), + func_val_func: sig.lookup_func("FuncVal/func"), + func_val_arg: sig.lookup_func("FuncVal/arg"), + func_val_result: sig.lookup_func("FuncVal/result"), + rel_tuple_instance: sig.lookup_func("RelTuple/instance"), + rel_tuple_rel: sig.lookup_func("RelTuple/rel"), + rel_tuple_arg_tuple: sig.lookup_func("RelTupleArg/tuple"), + rel_tuple_arg_elem: sig.lookup_func("RelTupleArg/elem"), + rel_tuple_arg_position: sig.lookup_func("RelTupleArg/position"), + srt_theory: sig.lookup_func("Srt/theory"), + func_theory: sig.lookup_func("Func/theory"), + func_dom: sig.lookup_func("Func/dom"), + func_cod: sig.lookup_func("Func/cod"), + rel_theory: sig.lookup_func("Rel/theory"), + rel_dom: sig.lookup_func("Rel/dom"), + base_ds_dsort: sig.lookup_func("BaseDS/dsort"), + base_ds_srt: sig.lookup_func("BaseDS/srt"), + prod_ds_dsort: sig.lookup_func("ProdDS/dsort"), + field_prod: sig.lookup_func("Field/prod"), + field_type: sig.lookup_func("Field/type"), + + // Sequent functions + sequent_theory: sig.lookup_func("Sequent/theory"), + sequent_premise: sig.lookup_func("Sequent/premise"), + sequent_conclusion: sig.lookup_func("Sequent/conclusion"), + + // CtxVar functions + ctx_var_sequent: sig.lookup_func("CtxVar/sequent"), + ctx_var_binder: sig.lookup_func("CtxVar/binder"), + + // Binder functions + binder_type: sig.lookup_func("Binder/type"), + + // Term/Formula to Node embeddings + term_node: sig.lookup_func("Term/node"), + formula_node: sig.lookup_func("Formula/node"), + + // VarT functions + var_t_term: sig.lookup_func("VarT/term"), + var_t_binder: sig.lookup_func("VarT/binder"), + + // AppT functions + app_t_term: sig.lookup_func("AppT/term"), + app_t_func: sig.lookup_func("AppT/func"), + app_t_arg: sig.lookup_func("AppT/arg"), + + // RecordT functions + record_t_term: sig.lookup_func("RecordT/term"), + + // RecEntry functions + rec_entry_record: sig.lookup_func("RecEntry/record"), + rec_entry_val: sig.lookup_func("RecEntry/val"), + rec_entry_field: sig.lookup_func("RecEntry/field"), + + // ProjT functions + proj_t_term: sig.lookup_func("ProjT/term"), + proj_t_base: sig.lookup_func("ProjT/base"), + proj_t_field: sig.lookup_func("ProjT/field"), + + // TrueF/FalseF functions + true_f_formula: sig.lookup_func("TrueF/formula"), + false_f_formula: sig.lookup_func("FalseF/formula"), + + // EqF functions + eq_f_formula: sig.lookup_func("EqF/formula"), + eq_f_lhs: sig.lookup_func("EqF/lhs"), + eq_f_rhs: sig.lookup_func("EqF/rhs"), + + // RelF functions + rel_f_formula: sig.lookup_func("RelF/formula"), + rel_f_arg: sig.lookup_func("RelF/arg"), + rel_f_rel: sig.lookup_func("RelF/rel"), + + // ConjF functions + conj_f_formula: sig.lookup_func("ConjF/formula"), + + // ConjArm functions + conj_arm_conj: sig.lookup_func("ConjArm/conj"), + conj_arm_child: sig.lookup_func("ConjArm/child"), + + // DisjF functions + disj_f_formula: sig.lookup_func("DisjF/formula"), + + // DisjArm functions + disj_arm_disj: sig.lookup_func("DisjArm/disj"), + disj_arm_child: sig.lookup_func("DisjArm/child"), + + // ExistsF functions + exists_f_formula: sig.lookup_func("ExistsF/formula"), + exists_f_binder: sig.lookup_func("ExistsF/binder"), + exists_f_body: sig.lookup_func("ExistsF/body"), + } + } +} diff --git a/src/store/theory.rs b/src/store/theory.rs new file mode 100644 index 0000000..3f66577 --- /dev/null +++ b/src/store/theory.rs @@ -0,0 +1,750 @@ +//! Theory operations for the Store. +//! +//! Creating, extending, and modifying theories in the GeologMeta structure. + +use std::collections::HashMap; + +use crate::core::{DerivedSort, Formula, Sequent, Signature, Term}; +use crate::id::Slid; + +use super::append::AppendOps; +use super::{BindingKind, Store, UncommittedBinding}; + +impl Store { + /// Create a new theory (version 0, no parent) + pub fn create_theory(&mut self, name: &str) -> Result { + let sort_id = self.sort_ids.theory.ok_or("Theory sort not found")?; + let theory_slid = self.add_element(sort_id, name); + + // Register uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: theory_slid, + kind: BindingKind::Theory, + }, + ); + + Ok(theory_slid) + } + + /// Create a new version of an existing theory + pub fn extend_theory(&mut self, parent: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.theory.ok_or("Theory sort not found")?; + let theory_slid = self.add_element( + sort_id, + &format!("{}@v{}", name, self.meta.carriers[sort_id].len()), + ); + + // Set parent + let func_id = self.func_ids.theory_parent.ok_or("Theory/parent not found")?; + self.define_func(func_id, theory_slid, parent)?; + + // Update uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: theory_slid, + kind: BindingKind::Theory, + }, + ); + + Ok(theory_slid) + } + + /// Add a sort to a theory + pub fn add_sort(&mut self, theory: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.srt.ok_or("Srt sort not found")?; + let srt_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let func_id = self.func_ids.srt_theory.ok_or("Srt/theory not found")?; + self.define_func(func_id, srt_slid, theory)?; + + Ok(srt_slid) + } + + /// Add a function to a theory + pub fn add_function( + &mut self, + theory: Slid, + name: &str, + domain: Slid, + codomain: Slid, + ) -> Result { + let sort_id = self.sort_ids.func.ok_or("Func sort not found")?; + let func_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let theory_func = self.func_ids.func_theory.ok_or("Func/theory not found")?; + let dom_func = self.func_ids.func_dom.ok_or("Func/dom not found")?; + let cod_func = self.func_ids.func_cod.ok_or("Func/cod not found")?; + + self.define_func(theory_func, func_slid, theory)?; + self.define_func(dom_func, func_slid, domain)?; + self.define_func(cod_func, func_slid, codomain)?; + + Ok(func_slid) + } + + /// Add a relation to a theory + pub fn add_relation(&mut self, theory: Slid, name: &str, domain: Slid) -> Result { + let sort_id = self.sort_ids.rel.ok_or("Rel sort not found")?; + let rel_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let theory_func = self.func_ids.rel_theory.ok_or("Rel/theory not found")?; + let dom_func = self.func_ids.rel_dom.ok_or("Rel/dom not found")?; + + self.define_func(theory_func, rel_slid, theory)?; + self.define_func(dom_func, rel_slid, domain)?; + + Ok(rel_slid) + } + + /// Create a base DSort from a Srt + pub fn make_base_dsort(&mut self, srt: Slid) -> Result { + let base_ds_sort = self.sort_ids.base_ds.ok_or("BaseDS sort not found")?; + let dsort_sort = self.sort_ids.dsort.ok_or("DSort sort not found")?; + + let base_ds_slid = self.add_element(base_ds_sort, &format!("base_{}", self.get_element_name(srt))); + let dsort_slid = self.add_element(dsort_sort, &format!("dsort_{}", self.get_element_name(srt))); + + let dsort_func = self.func_ids.base_ds_dsort.ok_or("BaseDS/dsort not found")?; + let srt_func = self.func_ids.base_ds_srt.ok_or("BaseDS/srt not found")?; + + self.define_func(dsort_func, base_ds_slid, dsort_slid)?; + self.define_func(srt_func, base_ds_slid, srt)?; + + Ok(dsort_slid) + } + + /// Create a product DSort with fields + pub fn make_product_dsort( + &mut self, + theory: Slid, + fields: &[(String, Slid)], // (field_name, field_dsort) + ) -> Result { + let (dsort, _) = self.make_product_dsort_with_fields(theory, fields)?; + Ok(dsort) + } + + /// Create a product DSort with fields, returning both the DSort and field Slids + fn make_product_dsort_with_fields( + &mut self, + theory: Slid, + fields: &[(String, Slid)], // (field_name, field_dsort) + ) -> Result<(Slid, HashMap), String> { + let prod_ds_sort = self.sort_ids.prod_ds.ok_or("ProdDS sort not found")?; + let dsort_sort = self.sort_ids.dsort.ok_or("DSort sort not found")?; + let field_sort = self.sort_ids.field.ok_or("Field sort not found")?; + + // Create the DSort element + let field_names: Vec<_> = fields.iter().map(|(n, _)| n.as_str()).collect(); + let dsort_name = format!("dsort_[{}]", field_names.join(",")); + let dsort_slid = self.add_element(dsort_sort, &dsort_name); + + // Create the ProdDS element + let prod_ds_slid = self.add_element(prod_ds_sort, &format!("prod_{}", dsort_name)); + + let dsort_func = self.func_ids.prod_ds_dsort.ok_or("ProdDS/dsort not found")?; + self.define_func(dsort_func, prod_ds_slid, dsort_slid)?; + + // Create Field elements + let prod_func = self.func_ids.field_prod.ok_or("Field/prod not found")?; + let type_func = self.func_ids.field_type.ok_or("Field/type not found")?; + + let mut field_slids = HashMap::new(); + for (field_name, field_dsort) in fields { + let field_slid = self.add_element_qualified( + field_sort, + vec![self.get_element_name(theory), field_name.clone()], + ); + self.define_func(prod_func, field_slid, prod_ds_slid)?; + self.define_func(type_func, field_slid, *field_dsort)?; + + field_slids.insert(field_name.clone(), field_slid); + } + + Ok((dsort_slid, field_slids)) + } + + /// Persist a full signature to the Store. + /// + /// Creates all sorts, functions, and relations in GeologMeta. + /// Returns a mapping from sort indices to Srt Slids. + pub fn persist_signature( + &mut self, + theory: Slid, + signature: &Signature, + ) -> Result { + let mut sort_slids: HashMap = HashMap::new(); + let mut dsort_slids: HashMap = HashMap::new(); // Base DSort for each sort + let mut func_slids: HashMap = HashMap::new(); + let mut rel_slids: HashMap = HashMap::new(); + let mut field_slids: HashMap = HashMap::new(); + + // 1. Create all Srt elements and their base DSorts + for (sort_id, sort_name) in signature.sorts.iter().enumerate() { + let srt_slid = self.add_sort(theory, sort_name)?; + sort_slids.insert(sort_id, srt_slid); + + // Create base DSort for this sort + let dsort_slid = self.make_base_dsort(srt_slid)?; + dsort_slids.insert(sort_id, dsort_slid); + } + + // 2. Create all Func elements + for (func_id, func_sym) in signature.functions.iter().enumerate() { + let (domain_dsort, dom_fields) = + self.persist_derived_sort_with_fields(theory, &func_sym.domain, &dsort_slids)?; + let (codomain_dsort, cod_fields) = + self.persist_derived_sort_with_fields(theory, &func_sym.codomain, &dsort_slids)?; + + // Collect field slids + field_slids.extend(dom_fields); + field_slids.extend(cod_fields); + + let func_slid = self.add_function(theory, &func_sym.name, domain_dsort, codomain_dsort)?; + func_slids.insert(func_id, func_slid); + } + + // 3. Create all Rel elements + for (rel_id, rel_sym) in signature.relations.iter().enumerate() { + let (domain_dsort, dom_fields) = + self.persist_derived_sort_with_fields(theory, &rel_sym.domain, &dsort_slids)?; + field_slids.extend(dom_fields); + + let rel_slid = self.add_relation(theory, &rel_sym.name, domain_dsort)?; + rel_slids.insert(rel_id, rel_slid); + } + + Ok(SignaturePersistResult { + sort_slids, + dsort_slids, + func_slids, + rel_slids, + field_slids, + }) + } + + /// Convert a DerivedSort to a DSort Slid, creating necessary elements. + fn persist_derived_sort( + &mut self, + theory: Slid, + ds: &DerivedSort, + dsort_slids: &HashMap, + ) -> Result { + let (dsort, _) = self.persist_derived_sort_with_fields(theory, ds, dsort_slids)?; + Ok(dsort) + } + + /// Convert a DerivedSort to a DSort Slid, also returning field Slids. + fn persist_derived_sort_with_fields( + &mut self, + theory: Slid, + ds: &DerivedSort, + dsort_slids: &HashMap, + ) -> Result<(Slid, HashMap), String> { + match ds { + DerivedSort::Base(sort_id) => { + let dsort = dsort_slids + .get(sort_id) + .copied() + .ok_or_else(|| format!("Unknown sort id: {}", sort_id))?; + Ok((dsort, HashMap::new())) + } + DerivedSort::Product(fields) => { + if fields.is_empty() { + // Unit type - create empty product + let dsort = self.make_product_dsort(theory, &[])?; + Ok((dsort, HashMap::new())) + } else { + // Recursively persist field types + let mut field_dsorts = Vec::new(); + let mut all_field_slids = HashMap::new(); + + for (field_name, field_type) in fields { + let (field_dsort, nested_fields) = + self.persist_derived_sort_with_fields(theory, field_type, dsort_slids)?; + field_dsorts.push((field_name.clone(), field_dsort)); + all_field_slids.extend(nested_fields); + } + + let (dsort, new_field_slids) = + self.make_product_dsort_with_fields(theory, &field_dsorts)?; + all_field_slids.extend(new_field_slids); + + Ok((dsort, all_field_slids)) + } + } + } + } + + // ================================================================ + // AXIOM PERSISTENCE + // ================================================================ + + /// Create a Binder element with the given type. + fn persist_binder( + &mut self, + name: &str, + dsort: Slid, + ) -> Result { + let binder_sort = self.sort_ids.binder.ok_or("Binder sort not found")?; + let binder_slid = self.add_element(binder_sort, &format!("binder_{}", name)); + + let type_func = self.func_ids.binder_type.ok_or("Binder/type not found")?; + self.define_func(type_func, binder_slid, dsort)?; + + Ok(binder_slid) + } + + /// Persist a Term, returning its Term Slid. + /// + /// # Arguments + /// - `theory`: The theory this term belongs to + /// - `term`: The term to persist + /// - `sig_result`: Mapping from signature indices to Slids + /// - `binders`: Mapping from variable names to their Binder Slids + pub fn persist_term( + &mut self, + theory: Slid, + term: &Term, + sig_result: &SignaturePersistResult, + binders: &HashMap, + ) -> Result { + let term_sort = self.sort_ids.term.ok_or("Term sort not found")?; + let node_sort = self.sort_ids.node.ok_or("Node sort not found")?; + + match term { + Term::Var(name, _sort) => { + // Create VarT element + let var_t_sort = self.sort_ids.var_t.ok_or("VarT sort not found")?; + let term_slid = self.add_element(term_sort, &format!("term_var_{}", name)); + let var_t_slid = self.add_element(var_t_sort, &format!("var_t_{}", name)); + + // Link VarT to Term + let term_func = self.func_ids.var_t_term.ok_or("VarT/term not found")?; + self.define_func(term_func, var_t_slid, term_slid)?; + + // Link VarT to Binder + let binder_slid = binders + .get(name) + .copied() + .ok_or_else(|| format!("Unknown variable: {}", name))?; + let binder_func = self.func_ids.var_t_binder.ok_or("VarT/binder not found")?; + self.define_func(binder_func, var_t_slid, binder_slid)?; + + // Create Node for scoping + let node_slid = self.add_element(node_sort, &format!("node_term_var_{}", name)); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::App(func_id, arg) => { + // Recursively persist argument + let arg_slid = self.persist_term(theory, arg, sig_result, binders)?; + + // Create AppT element + let app_t_sort = self.sort_ids.app_t.ok_or("AppT sort not found")?; + let term_slid = self.add_element(term_sort, "term_app"); + let app_t_slid = self.add_element(app_t_sort, "app_t"); + + // Link AppT to Term + let term_func = self.func_ids.app_t_term.ok_or("AppT/term not found")?; + self.define_func(term_func, app_t_slid, term_slid)?; + + // Link AppT to Func + let func_slid = sig_result + .func_slids + .get(func_id) + .copied() + .ok_or_else(|| format!("Unknown function id: {}", func_id))?; + let func_func = self.func_ids.app_t_func.ok_or("AppT/func not found")?; + self.define_func(func_func, app_t_slid, func_slid)?; + + // Link AppT to argument Term + let arg_func = self.func_ids.app_t_arg.ok_or("AppT/arg not found")?; + self.define_func(arg_func, app_t_slid, arg_slid)?; + + // Create Node for scoping + let node_slid = self.add_element(node_sort, "node_term_app"); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::Record(fields) => { + // Create RecordT element + let record_t_sort = self.sort_ids.record_t.ok_or("RecordT sort not found")?; + let rec_entry_sort = self.sort_ids.rec_entry.ok_or("RecEntry sort not found")?; + + let term_slid = self.add_element(term_sort, "term_record"); + let record_t_slid = self.add_element(record_t_sort, "record_t"); + + // Link RecordT to Term + let term_func = self.func_ids.record_t_term.ok_or("RecordT/term not found")?; + self.define_func(term_func, record_t_slid, term_slid)?; + + // Create RecEntry for each field + for (field_name, field_term) in fields { + let val_slid = self.persist_term(theory, field_term, sig_result, binders)?; + + let rec_entry_slid = + self.add_element(rec_entry_sort, &format!("rec_entry_{}", field_name)); + + // Link to record + let record_func = self.func_ids.rec_entry_record.ok_or("RecEntry/record not found")?; + self.define_func(record_func, rec_entry_slid, record_t_slid)?; + + // Link to value + let val_func = self.func_ids.rec_entry_val.ok_or("RecEntry/val not found")?; + self.define_func(val_func, rec_entry_slid, val_slid)?; + + // Link to field (need to look up Field Slid by name) + if let Some(&field_slid) = sig_result.field_slids.get(field_name) { + let field_func = self.func_ids.rec_entry_field.ok_or("RecEntry/field not found")?; + self.define_func(field_func, rec_entry_slid, field_slid)?; + } + // Note: field_slids may not contain all fields if they weren't persisted + // (e.g., for inline record types). This is a known limitation. + } + + // Create Node for scoping + let node_slid = self.add_element(node_sort, "node_term_record"); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::Project(base, field_name) => { + // Recursively persist base term + let base_slid = self.persist_term(theory, base, sig_result, binders)?; + + // Create ProjT element + let proj_t_sort = self.sort_ids.proj_t.ok_or("ProjT sort not found")?; + let term_slid = self.add_element(term_sort, &format!("term_proj_{}", field_name)); + let proj_t_slid = self.add_element(proj_t_sort, &format!("proj_t_{}", field_name)); + + // Link ProjT to Term + let term_func = self.func_ids.proj_t_term.ok_or("ProjT/term not found")?; + self.define_func(term_func, proj_t_slid, term_slid)?; + + // Link ProjT to base Term + let base_func = self.func_ids.proj_t_base.ok_or("ProjT/base not found")?; + self.define_func(base_func, proj_t_slid, base_slid)?; + + // Link ProjT to Field (if we can find it) + if let Some(&field_slid) = sig_result.field_slids.get(field_name) { + let field_func = self.func_ids.proj_t_field.ok_or("ProjT/field not found")?; + self.define_func(field_func, proj_t_slid, field_slid)?; + } + + // Create Node for scoping + let node_slid = self.add_element(node_sort, &format!("node_term_proj_{}", field_name)); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + } + } + + /// Persist a Formula, returning its Formula Slid. + /// + /// # Arguments + /// - `theory`: The theory this formula belongs to + /// - `formula`: The formula to persist + /// - `sig_result`: Mapping from signature indices to Slids + /// - `binders`: Mapping from variable names to their Binder Slids (mutable for Exists) + pub fn persist_formula( + &mut self, + theory: Slid, + formula: &Formula, + sig_result: &SignaturePersistResult, + binders: &mut HashMap, + ) -> Result { + let formula_sort = self.sort_ids.formula.ok_or("Formula sort not found")?; + let node_sort = self.sort_ids.node.ok_or("Node sort not found")?; + + match formula { + Formula::True => { + let true_f_sort = self.sort_ids.true_f.ok_or("TrueF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_true"); + let true_f_slid = self.add_element(true_f_sort, "true_f"); + + let formula_func = self.func_ids.true_f_formula.ok_or("TrueF/formula not found")?; + self.define_func(formula_func, true_f_slid, formula_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_true"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::False => { + let false_f_sort = self.sort_ids.false_f.ok_or("FalseF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_false"); + let false_f_slid = self.add_element(false_f_sort, "false_f"); + + let formula_func = self.func_ids.false_f_formula.ok_or("FalseF/formula not found")?; + self.define_func(formula_func, false_f_slid, formula_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_false"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Eq(lhs, rhs) => { + let lhs_slid = self.persist_term(theory, lhs, sig_result, binders)?; + let rhs_slid = self.persist_term(theory, rhs, sig_result, binders)?; + + let eq_f_sort = self.sort_ids.eq_f.ok_or("EqF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_eq"); + let eq_f_slid = self.add_element(eq_f_sort, "eq_f"); + + let formula_func = self.func_ids.eq_f_formula.ok_or("EqF/formula not found")?; + self.define_func(formula_func, eq_f_slid, formula_slid)?; + + let lhs_func = self.func_ids.eq_f_lhs.ok_or("EqF/lhs not found")?; + self.define_func(lhs_func, eq_f_slid, lhs_slid)?; + + let rhs_func = self.func_ids.eq_f_rhs.ok_or("EqF/rhs not found")?; + self.define_func(rhs_func, eq_f_slid, rhs_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_eq"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Rel(rel_id, arg) => { + let arg_slid = self.persist_term(theory, arg, sig_result, binders)?; + + let rel_f_sort = self.sort_ids.rel_f.ok_or("RelF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_rel"); + let rel_f_slid = self.add_element(rel_f_sort, "rel_f"); + + let formula_func = self.func_ids.rel_f_formula.ok_or("RelF/formula not found")?; + self.define_func(formula_func, rel_f_slid, formula_slid)?; + + let arg_func = self.func_ids.rel_f_arg.ok_or("RelF/arg not found")?; + self.define_func(arg_func, rel_f_slid, arg_slid)?; + + let rel_slid = sig_result + .rel_slids + .get(rel_id) + .copied() + .ok_or_else(|| format!("Unknown relation id: {}", rel_id))?; + let rel_func = self.func_ids.rel_f_rel.ok_or("RelF/rel not found")?; + self.define_func(rel_func, rel_f_slid, rel_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_rel"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Conj(conjuncts) => { + let conj_f_sort = self.sort_ids.conj_f.ok_or("ConjF sort not found")?; + let conj_arm_sort = self.sort_ids.conj_arm.ok_or("ConjArm sort not found")?; + + let formula_slid = self.add_element(formula_sort, "formula_conj"); + let conj_f_slid = self.add_element(conj_f_sort, "conj_f"); + + let formula_func = self.func_ids.conj_f_formula.ok_or("ConjF/formula not found")?; + self.define_func(formula_func, conj_f_slid, formula_slid)?; + + // Persist each conjunct as a ConjArm + for (i, child_formula) in conjuncts.iter().enumerate() { + let child_slid = self.persist_formula(theory, child_formula, sig_result, binders)?; + + let arm_slid = self.add_element(conj_arm_sort, &format!("conj_arm_{}", i)); + + let conj_func = self.func_ids.conj_arm_conj.ok_or("ConjArm/conj not found")?; + self.define_func(conj_func, arm_slid, conj_f_slid)?; + + let child_func = self.func_ids.conj_arm_child.ok_or("ConjArm/child not found")?; + self.define_func(child_func, arm_slid, child_slid)?; + } + + let node_slid = self.add_element(node_sort, "node_formula_conj"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Disj(disjuncts) => { + let disj_f_sort = self.sort_ids.disj_f.ok_or("DisjF sort not found")?; + let disj_arm_sort = self.sort_ids.disj_arm.ok_or("DisjArm sort not found")?; + + let formula_slid = self.add_element(formula_sort, "formula_disj"); + let disj_f_slid = self.add_element(disj_f_sort, "disj_f"); + + let formula_func = self.func_ids.disj_f_formula.ok_or("DisjF/formula not found")?; + self.define_func(formula_func, disj_f_slid, formula_slid)?; + + // Persist each disjunct as a DisjArm + for (i, child_formula) in disjuncts.iter().enumerate() { + let child_slid = self.persist_formula(theory, child_formula, sig_result, binders)?; + + let arm_slid = self.add_element(disj_arm_sort, &format!("disj_arm_{}", i)); + + let disj_func = self.func_ids.disj_arm_disj.ok_or("DisjArm/disj not found")?; + self.define_func(disj_func, arm_slid, disj_f_slid)?; + + let child_func = self.func_ids.disj_arm_child.ok_or("DisjArm/child not found")?; + self.define_func(child_func, arm_slid, child_slid)?; + } + + let node_slid = self.add_element(node_sort, "node_formula_disj"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Exists(var_name, var_sort, body) => { + let exists_f_sort = self.sort_ids.exists_f.ok_or("ExistsF sort not found")?; + + let formula_slid = self.add_element(formula_sort, &format!("formula_exists_{}", var_name)); + let exists_f_slid = self.add_element(exists_f_sort, &format!("exists_f_{}", var_name)); + + let formula_func = self.func_ids.exists_f_formula.ok_or("ExistsF/formula not found")?; + self.define_func(formula_func, exists_f_slid, formula_slid)?; + + // Create binder for this existential + let dsort = self.persist_derived_sort(theory, var_sort, &sig_result.dsort_slids)?; + let binder_slid = self.persist_binder(var_name, dsort)?; + + let binder_func = self.func_ids.exists_f_binder.ok_or("ExistsF/binder not found")?; + self.define_func(binder_func, exists_f_slid, binder_slid)?; + + // Extend binders for the body + let old_binder = binders.insert(var_name.clone(), binder_slid); + + // Persist body with extended binders + let body_slid = self.persist_formula(theory, body, sig_result, binders)?; + + // Restore old binder (if any) for proper scoping + if let Some(old) = old_binder { + binders.insert(var_name.clone(), old); + } else { + binders.remove(var_name); + } + + let body_func = self.func_ids.exists_f_body.ok_or("ExistsF/body not found")?; + self.define_func(body_func, exists_f_slid, body_slid)?; + + let node_slid = self.add_element(node_sort, &format!("node_formula_exists_{}", var_name)); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + } + } + + /// Persist an axiom (Sequent) to GeologMeta. + /// + /// Creates the Sequent element, context variables, premise, and conclusion. + pub fn persist_axiom( + &mut self, + theory: Slid, + axiom: &Sequent, + axiom_name: &str, + sig_result: &SignaturePersistResult, + ) -> Result { + let sequent_sort = self.sort_ids.sequent.ok_or("Sequent sort not found")?; + let ctx_var_sort = self.sort_ids.ctx_var.ok_or("CtxVar sort not found")?; + + // Create Sequent element + let sequent_slid = self.add_element_qualified( + sequent_sort, + vec![self.get_element_name(theory), axiom_name.to_string()], + ); + + // Link to theory + let theory_func = self.func_ids.sequent_theory.ok_or("Sequent/theory not found")?; + self.define_func(theory_func, sequent_slid, theory)?; + + // Create binders for context variables + let mut binders = HashMap::new(); + for (var_name, var_sort) in &axiom.context.vars { + let dsort = self.persist_derived_sort(theory, var_sort, &sig_result.dsort_slids)?; + let binder_slid = self.persist_binder(var_name, dsort)?; + binders.insert(var_name.clone(), binder_slid); + + // Create CtxVar linking sequent to binder + let ctx_var_slid = self.add_element(ctx_var_sort, &format!("ctx_var_{}", var_name)); + + let sequent_func = self.func_ids.ctx_var_sequent.ok_or("CtxVar/sequent not found")?; + self.define_func(sequent_func, ctx_var_slid, sequent_slid)?; + + let binder_func = self.func_ids.ctx_var_binder.ok_or("CtxVar/binder not found")?; + self.define_func(binder_func, ctx_var_slid, binder_slid)?; + } + + // Persist premise formula + let premise_slid = self.persist_formula(theory, &axiom.premise, sig_result, &mut binders)?; + let premise_func = self.func_ids.sequent_premise.ok_or("Sequent/premise not found")?; + self.define_func(premise_func, sequent_slid, premise_slid)?; + + // Persist conclusion formula + let conclusion_slid = self.persist_formula(theory, &axiom.conclusion, sig_result, &mut binders)?; + let conclusion_func = self.func_ids.sequent_conclusion.ok_or("Sequent/conclusion not found")?; + self.define_func(conclusion_func, sequent_slid, conclusion_slid)?; + + Ok(sequent_slid) + } + + /// Persist all axioms from a Theory to GeologMeta. + pub fn persist_axioms( + &mut self, + theory: Slid, + axioms: &[Sequent], + axiom_names: &[String], + sig_result: &SignaturePersistResult, + ) -> Result, String> { + let mut axiom_slids = Vec::new(); + for (axiom, name) in axioms.iter().zip(axiom_names.iter()) { + let slid = self.persist_axiom(theory, axiom, name, sig_result)?; + axiom_slids.push(slid); + } + Ok(axiom_slids) + } +} + +/// Result of persisting a signature to GeologMeta. +/// +/// Maps from local indices (as used in Signature) to Slids in GeologMeta. +#[derive(Debug)] +pub struct SignaturePersistResult { + /// Sort index -> Srt Slid + pub sort_slids: HashMap, + /// Sort index -> base DSort Slid for that sort + pub dsort_slids: HashMap, + /// Function index -> Func Slid + pub func_slids: HashMap, + /// Relation index -> Rel Slid + pub rel_slids: HashMap, + /// Field name -> Field Slid (for record types in domains) + pub field_slids: HashMap, +} diff --git a/src/tensor/builder.rs b/src/tensor/builder.rs new file mode 100644 index 0000000..27ef44a --- /dev/null +++ b/src/tensor/builder.rs @@ -0,0 +1,392 @@ +//! Builder helpers for tensor expressions. +//! +//! High-level operations like conjunction, existential quantification, and disjunction. + +use std::collections::{BTreeSet, HashMap}; + +use super::expr::TensorExpr; + +/// Conjunction of two tensor expressions with variable alignment. +/// +/// Given tensors T₁ and T₂ with named variables, compute their conjunction +/// by building Product + Contract to identify shared variables. +pub fn conjunction( + t1: TensorExpr, + vars1: &[String], + t2: TensorExpr, + vars2: &[String], +) -> (TensorExpr, Vec) { + // Compute combined variable list and mapping + let mut combined_vars: Vec = vars1.to_vec(); + let mut var_to_target: HashMap<&str, usize> = HashMap::new(); + + for (i, v) in vars1.iter().enumerate() { + var_to_target.insert(v, i); + } + + let mut index_map: Vec = (0..vars1.len()).collect(); + + for v in vars2 { + if let Some(&target) = var_to_target.get(v.as_str()) { + index_map.push(target); + } else { + let new_target = combined_vars.len(); + var_to_target.insert(v, new_target); + combined_vars.push(v.clone()); + index_map.push(new_target); + } + } + + let output: BTreeSet = (0..combined_vars.len()).collect(); + + let expr = TensorExpr::Contract { + inner: Box::new(TensorExpr::Product(vec![t1, t2])), + index_map, + output, + }; + + (expr, combined_vars) +} + +/// Existential quantification over a variable. +/// +/// Removes the variable by OR-ing over all its values (contraction). +pub fn exists(tensor: TensorExpr, vars: &[String], var: &str) -> (TensorExpr, Vec) { + let var_idx = vars.iter().position(|v| v == var); + + match var_idx { + None => (tensor, vars.to_vec()), + Some(idx) => { + let fresh_target = vars.len(); + let index_map: Vec = (0..vars.len()) + .map(|i| if i == idx { fresh_target } else { i }) + .collect(); + + let output: BTreeSet = (0..vars.len()).filter(|&i| i != idx).collect(); + + let result_vars: Vec = vars + .iter() + .enumerate() + .filter(|&(i, _)| i != idx) + .map(|(_, v)| v.clone()) + .collect(); + + let expr = TensorExpr::Contract { + inner: Box::new(tensor), + index_map, + output, + }; + + (expr, result_vars) + } + } +} + +/// Multi-way conjunction with variable alignment. +pub fn conjunction_all(tensors: Vec<(TensorExpr, Vec)>) -> (TensorExpr, Vec) { + if tensors.is_empty() { + return (TensorExpr::scalar(true), vec![]); + } + + let mut result = tensors.into_iter(); + let (mut acc_expr, mut acc_vars) = result.next().unwrap(); + + for (expr, vars) in result { + let (new_expr, new_vars) = conjunction(acc_expr, &acc_vars, expr, &vars); + acc_expr = new_expr; + acc_vars = new_vars; + } + + (acc_expr, acc_vars) +} + +/// Disjunction of two tensor expressions with variable alignment. +/// +/// Both tensors must have the same variables (possibly in different order). +/// The result is the pointwise OR. +pub fn disjunction( + t1: TensorExpr, + vars1: &[String], + t2: TensorExpr, + vars2: &[String], +) -> (TensorExpr, Vec) { + // Check that variables are the same set + let set1: std::collections::HashSet<_> = vars1.iter().collect(); + let set2: std::collections::HashSet<_> = vars2.iter().collect(); + + if set1 != set2 { + // Variables don't match - this should have been handled at compile_formula level + // by extending tensors with full-domain products for missing variables. + // If we get here, something went wrong. + let only_in_1: Vec<_> = set1.difference(&set2).collect(); + let only_in_2: Vec<_> = set2.difference(&set1).collect(); + panic!( + "disjunction received mismatched variables (should have been aligned).\n\ + Left has: {:?}, Right has: {:?}\n\ + Only in left: {:?}, Only in right: {:?}", + vars1, vars2, only_in_1, only_in_2 + ); + } + + // If vars2 is in different order than vars1, reorder t2 via Contract + if vars1 == vars2 { + // Same order, just union + (TensorExpr::Sum(vec![t1, t2]), vars1.to_vec()) + } else { + // Need to reorder t2 to match vars1 ordering + // Build index_map from vars2 positions to vars1 positions + let index_map: Vec = vars2 + .iter() + .map(|v| vars1.iter().position(|v1| v1 == v).unwrap()) + .collect(); + let output: BTreeSet = (0..vars1.len()).collect(); + + let t2_reordered = TensorExpr::Contract { + inner: Box::new(t2), + index_map, + output, + }; + + (TensorExpr::Sum(vec![t1, t2_reordered]), vars1.to_vec()) + } +} + +/// Multi-way disjunction with variable alignment. +/// +/// All tensors must have the same variables. +pub fn disjunction_all(tensors: Vec<(TensorExpr, Vec)>) -> (TensorExpr, Vec) { + if tensors.is_empty() { + return (TensorExpr::scalar(false), vec![]); + } + + let mut result = tensors.into_iter(); + let (mut acc_expr, mut acc_vars) = result.next().unwrap(); + + for (expr, vars) in result { + let (new_expr, new_vars) = disjunction(acc_expr, &acc_vars, expr, &vars); + acc_expr = new_expr; + acc_vars = new_vars; + } + + (acc_expr, acc_vars) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tensor::sparse::SparseTensor; + + fn leaf(t: SparseTensor) -> TensorExpr { + TensorExpr::leaf(t) + } + + #[test] + fn test_conjunction() { + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 1]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 0]); + s.insert(vec![1, 1]); + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (expr, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y", "z"]); + assert_eq!(result.dims, vec![2, 2, 2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1, 0])); + assert!(result.contains(&[0, 1, 1])); + } + + #[test] + fn test_exists() { + let mut t = SparseTensor::empty(vec![2, 2]); + t.insert(vec![0, 0]); + t.insert(vec![0, 1]); + t.insert(vec![1, 1]); + + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(leaf(t), &vars, "y"); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + } + + #[test] + fn test_relational_join() { + // R(x,y) ⋈ S(y,z) then ∃y + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 1]); + r.insert(vec![1, 2]); + + let mut s = SparseTensor::empty(vec![3, 3]); + s.insert(vec![0, 1]); + s.insert(vec![1, 2]); + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (conj, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + assert_eq!(vars, vec!["x", "y", "z"]); + + let (result_expr, result_vars) = exists(conj, &vars, "y"); + let result = result_expr.materialize(); + + assert_eq!(result_vars, vec!["x", "z"]); + assert!(result.contains(&[0, 2])); // path 0→1→2 + } + + #[test] + fn test_fused_join_uses_hash() { + // Large-ish tensors to verify hash join path works + let mut r = SparseTensor::empty(vec![100, 100]); + let mut s = SparseTensor::empty(vec![100, 100]); + + // Sparse data + for i in 0..50 { + r.insert(vec![i, i + 1]); + s.insert(vec![i + 1, i + 2]); + } + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (conj, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let (result_expr, _) = exists(conj, &vars, "y"); + let result = result_expr.materialize(); + + // Should have 50 paths: 0→2, 1→3, ..., 49→51 + assert_eq!(result.len(), 50); + assert!(result.contains(&[0, 2])); + assert!(result.contains(&[49, 51])); + } + + #[test] + fn test_disjunction_same_vars() { + // R(x,y) ∨ S(x,y) with same variable order + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 0]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 1]); + + let vars = vec!["x".to_string(), "y".to_string()]; + + let (expr, result_vars) = disjunction(leaf(r), &vars, leaf(s), &vars); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x", "y"]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + } + + #[test] + fn test_disjunction_reordered_vars() { + // R(x,y) ∨ S(y,x) - different variable order requires reordering + let mut r = SparseTensor::empty(vec![2, 3]); + r.insert(vec![0, 1]); // x=0, y=1 + + let mut s = SparseTensor::empty(vec![3, 2]); + s.insert(vec![2, 1]); // y=2, x=1 + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "x".to_string()]; + + let (expr, result_vars) = disjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x", "y"]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1])); // from R + assert!(result.contains(&[1, 2])); // from S reordered + } + + #[test] + fn test_disjunction_all() { + // R(x) ∨ S(x) ∨ T(x) + let mut r = SparseTensor::empty(vec![5]); + r.insert(vec![0]); + + let mut s = SparseTensor::empty(vec![5]); + s.insert(vec![1]); + + let mut t = SparseTensor::empty(vec![5]); + t.insert(vec![2]); + + let vars = vec!["x".to_string()]; + + let (expr, result_vars) = disjunction_all(vec![ + (leaf(r), vars.clone()), + (leaf(s), vars.clone()), + (leaf(t), vars.clone()), + ]); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + assert_eq!(result.len(), 3); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + assert!(result.contains(&[2])); + } + + #[test] + fn test_disjunction_all_empty() { + // Empty disjunction = false + let (expr, vars) = disjunction_all(vec![]); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert!(result.is_empty()); + } + + #[test] + fn test_geometric_formula_pattern() { + // Test pattern from geometric logic: ∃y. (R(x,y) ∧ S(y)) ∨ (T(x)) + // This exercises Sum inside a more complex expression + + // R(x,y): edges 0→1, 1→2 + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 1]); + r.insert(vec![1, 2]); + + // S(y): valid y values {1, 2} + let mut s = SparseTensor::empty(vec![3]); + s.insert(vec![1]); + s.insert(vec![2]); + + // T(x): alternative x values {2} + let mut t = SparseTensor::empty(vec![3]); + t.insert(vec![2]); + + // Build: R(x,y) ∧ S(y) + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string()]; + let (conj, conj_vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + // conj_vars = ["x", "y"] + + // ∃y. (R(x,y) ∧ S(y)) + let (exists_expr, exists_vars) = exists(conj, &conj_vars, "y"); + // exists_vars = ["x"] + + // (∃y. R(x,y) ∧ S(y)) ∨ T(x) + let vars_t = vec!["x".to_string()]; + let (result_expr, result_vars) = disjunction(exists_expr, &exists_vars, leaf(t), &vars_t); + + let result = result_expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + // From R ∧ S: x=0 (path 0→1, 1∈S) and x=1 (path 1→2, 2∈S) + // From T: x=2 + assert_eq!(result.len(), 3); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + assert!(result.contains(&[2])); + } +} diff --git a/src/tensor/check.rs b/src/tensor/check.rs new file mode 100644 index 0000000..cbdd5c8 --- /dev/null +++ b/src/tensor/check.rs @@ -0,0 +1,580 @@ +//! Sequent checking using tensor expressions. + +use crate::core::{Sequent, Signature, Structure}; + +use super::compile::{compile_formula, derived_sort_cardinality, CompileContext, CompileError}; +use super::sparse::DomainIterator; + +/// A violation of a sequent: a variable assignment where the premise holds but conclusion doesn't. +#[derive(Clone, Debug)] +pub struct Violation { + /// The tuple indices representing the variable assignment + pub assignment: Vec, + /// Variable names (for debugging/reporting) + pub variable_names: Vec, +} + +impl Violation { + pub fn new(assignment: Vec, variable_names: Vec) -> Self { + Self { + assignment, + variable_names, + } + } +} + +/// Result of checking a sequent +#[derive(Clone, Debug)] +pub enum CheckResult { + /// The sequent is satisfied (all assignments that satisfy the premise also satisfy the conclusion) + Satisfied, + /// The sequent is violated (some assignments satisfy the premise but not the conclusion) + Violated(Vec), +} + +impl CheckResult { + pub fn is_satisfied(&self) -> bool { + matches!(self, CheckResult::Satisfied) + } + + pub fn violations(&self) -> &[Violation] { + match self { + CheckResult::Satisfied => &[], + CheckResult::Violated(vs) => vs, + } + } +} + +/// Check if a sequent is satisfied by a structure. +/// +/// For sequent `∀ctx. premise ⊢ conclusion`: +/// - Compiles both premise and conclusion to TensorExprs +/// - Materializes both (with fusion) +/// - Checks that every tuple in premise is also in conclusion +/// +/// Returns `CheckResult::Satisfied` if the sequent holds, or `CheckResult::Violated` +/// with a list of violating assignments. +pub fn check_sequent(sequent: &Sequent, structure: &Structure, sig: &Signature) -> Result { + let ctx = CompileContext::from_context(&sequent.context); + + // Compile premise and conclusion + let (premise_expr, premise_vars) = compile_formula(&sequent.premise, &ctx, structure, sig)?; + let (conclusion_expr, conclusion_vars) = + compile_formula(&sequent.conclusion, &ctx, structure, sig)?; + + // Materialize both + let premise_tensor = premise_expr.materialize(); + let conclusion_tensor = conclusion_expr.materialize(); + + // Handle edge cases + if premise_tensor.is_empty() { + // Vacuously true: no assignments satisfy the premise + return Ok(CheckResult::Satisfied); + } + + // Handle case where conclusion is scalar true (no variables) + if conclusion_vars.is_empty() && conclusion_tensor.contains(&[]) { + // Conclusion is just "true" - always satisfied + return Ok(CheckResult::Satisfied); + } + + // Handle case where premise has no variables (scalar) but conclusion has variables + // This means premise is "true" and we need to check conclusion holds universally + if premise_vars.is_empty() && !conclusion_vars.is_empty() { + // Premise is "true", need to check if conclusion is universally true + // This means: for all values of conclusion_vars, conclusion holds + // We need to enumerate the domain from the context + + // Get the domain sizes from conclusion variable sorts + // We need to look up sorts in the context + let domain_sizes: Vec = sequent + .context + .vars + .iter() + .filter(|(name, _)| conclusion_vars.contains(name)) + .map(|(_, sort)| derived_sort_cardinality(structure, sort)) + .collect(); + + // Check that conclusion covers all tuples in the domain + let expected_count: usize = domain_sizes.iter().product(); + + if conclusion_tensor.len() == expected_count { + // All tuples covered + return Ok(CheckResult::Satisfied); + } + + // Find violations: tuples in domain not in conclusion + let mut violations = Vec::new(); + for tuple in DomainIterator::new(&domain_sizes) { + if !conclusion_tensor.contains(&tuple) { + violations.push(Violation::new(tuple, conclusion_vars.clone())); + } + } + + return if violations.is_empty() { + Ok(CheckResult::Satisfied) + } else { + Ok(CheckResult::Violated(violations)) + }; + } + + // Build mapping from premise vars to conclusion vars + // Premise might have MORE variables than conclusion (e.g., ∃y quantified out in conclusion) + // We need to project premise tuples to conclusion variables + let _projection: Vec> = premise_vars + .iter() + .map(|pv| conclusion_vars.iter().position(|cv| cv == pv)) + .collect(); + + // All conclusion vars should be present in premise vars + // (premise provides the context for checking) + for cv in &conclusion_vars { + if !premise_vars.contains(cv) { + // This shouldn't happen in well-formed sequents + panic!( + "Conclusion variable '{}' not found in premise variables {:?}", + cv, premise_vars + ); + } + } + + // Check: for every tuple in premise, the projected tuple should be in conclusion + let mut violations = Vec::new(); + + for tuple in premise_tensor.iter() { + // Project premise tuple to conclusion vars + let conclusion_tuple: Vec = conclusion_vars + .iter() + .map(|cv| { + let premise_idx = premise_vars.iter().position(|pv| pv == cv).unwrap(); + tuple[premise_idx] + }) + .collect(); + + if !conclusion_tensor.contains(&conclusion_tuple) { + violations.push(Violation::new(tuple.clone(), premise_vars.clone())); + } + } + + if violations.is_empty() { + Ok(CheckResult::Satisfied) + } else { + Ok(CheckResult::Violated(violations)) + } +} + +/// Check if a sequent is satisfied, returning just a boolean. +/// Returns false if compilation fails. +pub fn check_sequent_bool(sequent: &Sequent, structure: &Structure, sig: &Signature) -> bool { + check_sequent(sequent, structure, sig) + .map(|r| r.is_satisfied()) + .unwrap_or(false) +} + +/// Check multiple sequents (axioms of a theory) against a structure. +/// Returns a list of (sequent_index, violations) for each violated sequent. +/// +/// If tensor compilation fails (e.g., for unsupported formula patterns like +/// record terms in equality), silently skips that axiom. Forward chaining +/// can handle these axioms differently via `eval_term_to_slid`. +pub fn check_theory_axioms( + axioms: &[Sequent], + structure: &Structure, + sig: &Signature, +) -> Vec<(usize, Vec)> { + axioms + .iter() + .enumerate() + .filter_map(|(i, seq)| { + match check_sequent(seq, structure, sig) { + Ok(CheckResult::Satisfied) => None, + Ok(CheckResult::Violated(vs)) => Some((i, vs)), + Err(_) => { + // Tensor compilation failed (e.g., unsupported term in equality) + // Treat as satisfied for now - forward chaining will handle these + // axioms via a different code path (eval_term_to_slid). + None + } + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{Context, DerivedSort, Formula, Signature, Structure, Term}; + use crate::id::{NumericId, Slid}; + use crate::universe::Universe; + + /// Helper to create Slid from integer + fn slid(n: usize) -> Slid { + Slid::from_usize(n) + } + + /// Helper to create a test structure with a single sort and some elements + fn make_test_structure_with_relation() -> (Structure, Signature) { + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add a binary relation: edge(from: Node, to: Node) + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); // 1 sort + + // Add 3 nodes (Slids 0, 1, 2) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[2]); // One binary relation + + // Add edges: 0→1, 1→2 + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + + (structure, sig) + } + + #[test] + fn test_check_sequent_reflexivity() { + // Axiom: ∀x:Node. true ⊢ edge(x,x) -- reflexivity + // This should FAIL because our graph doesn't have self-loops + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + + let premise = Formula::True; + let conclusion = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let sequent = Sequent { + context: ctx, + premise, + conclusion, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Should be violated for all 3 nodes (no self-loops) + assert!(!result.is_satisfied()); + assert_eq!(result.violations().len(), 3); + } + + #[test] + fn test_check_sequent_edge_implies_edge() { + // Axiom: ∀x,y:Node. edge(x,y) ⊢ edge(x,y) -- tautology + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let sequent = Sequent { + context: ctx, + premise: edge_xy.clone(), + conclusion: edge_xy, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_sequent_transitivity() { + // Axiom: ∀x,y,z:Node. edge(x,y) ∧ edge(y,z) ⊢ edge(x,z) -- transitivity + // This should FAIL because we have 0→1→2 but not 0→2 + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_xz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let premise = Formula::Conj(vec![edge_xy, edge_yz]); + + let sequent = Sequent { + context: ctx, + premise, + conclusion: edge_xz, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Should be violated: (0,1,2) satisfies premise but 0→2 is not an edge + assert!(!result.is_satisfied()); + assert_eq!(result.violations().len(), 1); + assert_eq!(result.violations()[0].assignment, vec![0, 1, 2]); + } + + #[test] + fn test_check_sequent_vacuously_true() { + // Axiom: ∀x,y:Node. false ⊢ edge(x,y) -- vacuously true + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let sequent = Sequent { + context: ctx, + premise: Formula::False, + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ), + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_sequent_with_closure() { + // Add transitive closure edges to make transitivity hold + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + structure.init_relations(&[2]); + + // Add edges: 0→1, 1→2, AND 0→2 (transitive closure) + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + structure.assert_relation(0, vec![slid(0), slid(2)]); // Closure! + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_xz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let premise = Formula::Conj(vec![edge_xy, edge_yz]); + + let sequent = Sequent { + context: ctx, + premise, + conclusion: edge_xz, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Now should be satisfied because we have 0→2 + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_theory_axioms() { + let (structure, sig) = make_test_structure_with_relation(); + + // Two axioms: one true, one false + let ctx1 = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + // Axiom 1: edge(x,y) ⊢ edge(x,y) -- tautology (satisfied) + let axiom1 = Sequent { + context: ctx1.clone(), + premise: edge_xy.clone(), + conclusion: edge_xy.clone(), + }; + + // Axiom 2: true ⊢ edge(x,x) -- reflexivity (violated) + let ctx2 = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + let edge_xx = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let axiom2 = Sequent { + context: ctx2, + premise: Formula::True, + conclusion: edge_xx, + }; + + let violations = check_theory_axioms(&[axiom1, axiom2], &structure, &sig); + + // Only axiom 2 (index 1) should be violated + assert_eq!(violations.len(), 1); + assert_eq!(violations[0].0, 1); // Second axiom + assert_eq!(violations[0].1.len(), 3); // All 3 nodes violate reflexivity + } +} diff --git a/src/tensor/compile.rs b/src/tensor/compile.rs new file mode 100644 index 0000000..44ed75c --- /dev/null +++ b/src/tensor/compile.rs @@ -0,0 +1,1229 @@ +//! Formula compilation to tensor expressions. + +use std::collections::{BTreeSet, HashMap}; + +use crate::core::{Context, DerivedSort, Formula, RelId, Signature, Structure, Term}; +use crate::id::{NumericId, Slid}; + +use super::builder::{conjunction, conjunction_all, disjunction_all, exists}; +use super::expr::TensorExpr; +use super::sparse::SparseTensor; + +/// Error type for formula/term compilation +#[derive(Debug, Clone)] +pub enum CompileError { + /// Product sort in variable term (not yet supported) + ProductSortInVariable, + /// Function with product domain (not yet supported) + ProductDomainFunction(String), + /// Function with product codomain (not yet supported) + ProductCodomainFunction(String), + /// Record term in equality (not yet supported) + RecordInEquality, + /// Projection term in equality (not yet supported) + ProjectionInEquality, + /// Variable not found in context + UnboundVariable(String), +} + +impl std::fmt::Display for CompileError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CompileError::ProductSortInVariable => { + write!(f, "product sort in variable term not yet supported") + } + CompileError::ProductDomainFunction(name) => { + write!(f, "function '{}' has product domain (not yet supported)", name) + } + CompileError::ProductCodomainFunction(name) => { + write!(f, "function '{}' has product codomain (not yet supported)", name) + } + CompileError::RecordInEquality => { + write!(f, "record terms in equality not yet supported") + } + CompileError::ProjectionInEquality => { + write!(f, "projection terms in equality not yet supported") + } + CompileError::UnboundVariable(name) => { + write!(f, "variable '{}' not found in context", name) + } + } + } +} + +impl std::error::Error for CompileError {} + +/// Context for formula compilation, tracking variable names and their dimensions. +#[derive(Clone, Debug)] +pub struct CompileContext { + /// Variable names in order (these become tensor dimensions) + pub vars: Vec, + /// Variable sorts (for looking up cardinalities) + pub sorts: Vec, +} + +impl CompileContext { + pub fn new() -> Self { + Self { + vars: vec![], + sorts: vec![], + } + } + + pub fn from_context(ctx: &Context) -> Self { + Self { + vars: ctx.vars.iter().map(|(n, _)| n.clone()).collect(), + sorts: ctx.vars.iter().map(|(_, s)| s.clone()).collect(), + } + } + + pub fn lookup(&self, name: &str) -> Option { + self.vars.iter().position(|n| n == name) + } + + pub fn add(&mut self, name: String, sort: DerivedSort) { + self.vars.push(name); + self.sorts.push(sort); + } +} + +impl Default for CompileContext { + fn default() -> Self { + Self::new() + } +} + +/// Get the cardinality of a base sort in the structure. +pub fn sort_cardinality(structure: &Structure, sort_id: usize) -> usize { + structure.carriers[sort_id].len() as usize +} + +/// Get the cardinality of a derived sort. +pub fn derived_sort_cardinality(structure: &Structure, sort: &DerivedSort) -> usize { + match sort { + DerivedSort::Base(sort_id) => sort_cardinality(structure, *sort_id), + DerivedSort::Product(fields) => { + // Product cardinality is the product of field cardinalities + fields + .iter() + .map(|(_, s)| derived_sort_cardinality(structure, s)) + .product() + } + } +} + +/// Build a Slid-to-index map for a sort's carrier. +/// Returns a map from Slid to its position within the carrier. +pub fn build_carrier_index(structure: &Structure, sort_id: usize) -> HashMap { + structure.carriers[sort_id] + .iter() + .enumerate() + .map(|(idx, slid_u64)| (Slid::from_usize(slid_u64 as usize), idx)) + .collect() +} + +/// Convert a function's graph (extent) to a SparseTensor. +/// +/// For function f : A → B, builds a 2D tensor where (i, j) is present +/// iff f(a_i) = b_j (where a_i is the i-th element of A, b_j is j-th of B). +pub fn function_to_tensor( + structure: &Structure, + func_id: usize, + domain_sort_id: usize, + codomain_sort_id: usize, +) -> SparseTensor { + use crate::id::{NumericId, Slid}; + use std::collections::BTreeSet; + + let domain_carrier = &structure.carriers[domain_sort_id]; + let codomain_carrier = &structure.carriers[codomain_sort_id]; + + let domain_size = domain_carrier.len() as usize; + let codomain_size = codomain_carrier.len() as usize; + + // Build reverse index for codomain (Slid -> position) + let codomain_index: HashMap = codomain_carrier + .iter() + .enumerate() + .map(|(idx, slid_u64)| (Slid::from_usize(slid_u64 as usize), idx)) + .collect(); + + // Iterate over function's extent + let mut extent = BTreeSet::new(); + for (domain_idx, domain_slid_u64) in domain_carrier.iter().enumerate() { + let domain_slid = Slid::from_usize(domain_slid_u64 as usize); + let sort_slid = structure.sort_local_id(domain_slid); + + if let Some(codomain_slid) = structure.get_function(func_id, sort_slid) + && let Some(&codomain_idx) = codomain_index.get(&codomain_slid) { + extent.insert(vec![domain_idx, codomain_idx]); + } + } + + SparseTensor { + dims: vec![domain_size, codomain_size], + extent, + } +} + +/// Convert a VecRelation to a SparseTensor. +/// +/// The relation has tuples of Slids; we convert to indices using carrier maps. +/// `column_sorts` specifies the sort of each column for looking up carriers. +pub fn relation_to_tensor( + structure: &Structure, + rel_id: RelId, + column_sorts: &[usize], // SortId for each column +) -> SparseTensor { + let relation = &structure.relations[rel_id]; + + // Build carrier index maps for each column + let carrier_indices: Vec> = column_sorts + .iter() + .map(|&sort_id| build_carrier_index(structure, sort_id)) + .collect(); + + // Build dimensions from carrier sizes + let dims: Vec = column_sorts + .iter() + .map(|&sort_id| structure.carriers[sort_id].len() as usize) + .collect(); + + // Convert tuples + let mut extent = std::collections::BTreeSet::new(); + for tuple in relation.iter() { + let indices: Option> = tuple + .iter() + .zip(&carrier_indices) + .map(|(&slid, index_map)| index_map.get(&slid).copied()) + .collect(); + + if let Some(idx_tuple) = indices { + extent.insert(idx_tuple); + } + // Skip tuples with elements not in carriers (shouldn't happen in valid data) + } + + SparseTensor { dims, extent } +} + +/// Extract variable names from a term pattern. +/// Returns pairs of (field_position, variable_name). +fn extract_term_vars(term: &Term) -> Vec<(usize, String, DerivedSort)> { + match term { + Term::Var(name, sort) => vec![(0, name.clone(), sort.clone())], + Term::Record(fields) => fields + .iter() + .enumerate() + .flat_map(|(i, (_, t))| { + extract_term_vars(t) + .into_iter() + .map(move |(_, name, sort)| (i, name, sort)) + }) + .collect(), + // For function applications and projections, we'd need more work + Term::App(_, _) | Term::Project(_, _) => { + // These are more complex - for now, treat as opaque + vec![] + } + } +} + +/// Check if a term contains any function applications +fn term_has_func_app(term: &Term) -> bool { + match term { + Term::Var(_, _) => false, + Term::App(_, _) => true, + Term::Project(base, _) => term_has_func_app(base), + Term::Record(fields) => fields.iter().any(|(_, t)| term_has_func_app(t)), + } +} + +/// Compile a simple relation formula (no function applications in term) +fn compile_rel_simple( + rel_id: RelId, + term: &Term, + structure: &Structure, + sig: &Signature, +) -> (TensorExpr, Vec) { + let vars_info = extract_term_vars(term); + let column_sorts = relation_column_sorts(sig, rel_id); + + // Build the tensor from the relation + let tensor = relation_to_tensor(structure, rel_id, &column_sorts); + + // Build variable list (ordered by column position) + let mut var_info_sorted = vars_info.clone(); + var_info_sorted.sort_by_key(|(pos, _, _)| *pos); + + // Check for repeated variables (same variable in multiple columns) + // e.g., edge(x, x) should produce a diagonal tensor + let mut seen_vars: HashMap = HashMap::new(); + let mut unique_vars: Vec = Vec::new(); + let mut index_map: Vec = Vec::new(); + + for (_, name, _) in &var_info_sorted { + if let Some(&existing_idx) = seen_vars.get(name) { + // Repeated variable: map to same target + index_map.push(existing_idx); + } else { + // New variable + let new_idx = unique_vars.len(); + seen_vars.insert(name.clone(), new_idx); + unique_vars.push(name.clone()); + index_map.push(new_idx); + } + } + + // If all variables are unique, no contraction needed + if unique_vars.len() == var_info_sorted.len() { + (TensorExpr::leaf(tensor), unique_vars) + } else { + // Need to contract to handle repeated variables (diagonal) + let output: BTreeSet = (0..unique_vars.len()).collect(); + let expr = TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(tensor)), + index_map, + output, + }; + (expr, unique_vars) + } +} + +/// Compile a relation formula with function applications in the term +/// For `[from: e src, to: e tgt] reachable`: +/// 1. Compile each field term (e src, e tgt) using compile_term +/// 2. Join the resulting tensors +/// 3. Join with the relation tensor +/// 4. Project out the intermediate value variables +fn compile_rel_with_func_apps( + rel_id: RelId, + term: &Term, + structure: &Structure, + sig: &Signature, +) -> Result<(TensorExpr, Vec), CompileError> { + let column_sorts = relation_column_sorts(sig, rel_id); + let rel_tensor = relation_to_tensor(structure, rel_id, &column_sorts); + + // Get the relation's field info (unused for now but documents the structure) + let _rel = &sig.relations[rel_id]; + + let mut fresh_counter = 0; + + // Compile each field term and collect their value variables + let field_terms: Vec<&Term> = match term { + Term::Record(fields) => fields.iter().map(|(_, t)| t).collect(), + _ => vec![term], // Single term for unary relation + }; + + // Compile all field terms + let mut all_compiled: Vec<(TensorExpr, Vec, String)> = Vec::new(); + for field_term in &field_terms { + let (expr, vars, value_var) = compile_term(field_term, structure, sig, &mut fresh_counter)?; + all_compiled.push((expr, vars, value_var)); + } + + // Join all field terms together + let mut joined_expr = all_compiled[0].0.clone(); + let mut joined_vars = all_compiled[0].1.clone(); + + for (expr, vars, _) in all_compiled.iter().skip(1) { + let (new_expr, new_vars) = conjunction(joined_expr, &joined_vars, expr.clone(), vars); + joined_expr = new_expr; + joined_vars = new_vars; + } + + // Build the relation tensor with value variables as dimensions + // The relation tensor has dimensions corresponding to the column sorts + // We need to rename the relation's dimensions to match the field value variables + let value_vars: Vec<&String> = all_compiled.iter().map(|(_, _, v)| v).collect(); + + // Build relation tensor variable names (one per column) + let rel_vars: Vec = value_vars.iter().map(|&v| v.clone()).collect(); + + // Join with relation tensor + let (result_expr, result_vars) = + conjunction(joined_expr, &joined_vars, TensorExpr::leaf(rel_tensor), &rel_vars); + + // Project out the value variables (they're internal) + let mut final_expr = result_expr; + let mut final_vars = result_vars; + for value_var in &value_vars { + let (new_expr, new_vars) = exists(final_expr, &final_vars, value_var); + final_expr = new_expr; + final_vars = new_vars; + } + + Ok((final_expr, final_vars)) +} + +/// Get the base sort IDs from a relation's domain. +fn relation_column_sorts(sig: &Signature, rel_id: RelId) -> Vec { + let rel_sym = &sig.relations[rel_id]; + match &rel_sym.domain { + DerivedSort::Base(sort_id) => vec![*sort_id], + DerivedSort::Product(fields) => fields + .iter() + .filter_map(|(_, sort)| { + if let DerivedSort::Base(sort_id) = sort { + Some(*sort_id) + } else { + None // Nested products not supported yet + } + }) + .collect(), + } +} + +/// Compile a term to a tensor expression. +/// +/// Returns (expr, vars, value_var) where: +/// - expr is a tensor over vars (including value_var) +/// - vars are all free variables in alphabetical order +/// - value_var is the internal name for the term's value dimension +/// +/// The tensor represents: for each assignment to free variables, +/// what is the value of the term? +fn compile_term( + term: &Term, + structure: &Structure, + sig: &Signature, + fresh_counter: &mut usize, +) -> Result<(TensorExpr, Vec, String), CompileError> { + match term { + Term::Var(name, sort) => { + // Variable x evaluates to itself + // Tensor is identity: (x, value) where value = x + // This is the diagonal tensor + let DerivedSort::Base(sort_id) = sort else { + return Err(CompileError::ProductSortInVariable); + }; + let size = structure.carriers[*sort_id].len() as usize; + + // Create diagonal tensor: extent = {(i, i) | i < size} + let extent: BTreeSet> = (0..size).map(|i| vec![i, i]).collect(); + let tensor = SparseTensor { + dims: vec![size, size], + extent, + }; + + // Value variable is the same as the input variable + // Actually we need a fresh name to track the "output" dimension + let value_var = format!("_val{}", *fresh_counter); + *fresh_counter += 1; + + // The tensor has dimensions [name, value_var] + // We need them in alphabetical order + let vars = if name < &value_var { + vec![name.clone(), value_var.clone()] + } else { + vec![value_var.clone(), name.clone()] + }; + + let expr = if name < &value_var { + TensorExpr::leaf(tensor) + } else { + // Need to transpose + TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(tensor)), + index_map: vec![1, 0], + output: (0..2).collect(), + } + }; + + Ok((expr, vars, value_var)) + } + + Term::App(func_id, arg) => { + // f(arg): first compile arg, then apply function + let (arg_expr, arg_vars, arg_value_var) = + compile_term(arg.as_ref(), structure, sig, fresh_counter)?; + + // Get function info + let func_sym = &sig.functions[*func_id]; + let DerivedSort::Base(domain_sort_id) = &func_sym.domain else { + return Err(CompileError::ProductDomainFunction(func_sym.name.clone())); + }; + let DerivedSort::Base(codomain_sort_id) = &func_sym.codomain else { + return Err(CompileError::ProductCodomainFunction(func_sym.name.clone())); + }; + + // Build function tensor: (domain, codomain) pairs + let func_tensor = function_to_tensor(structure, *func_id, *domain_sort_id, *codomain_sort_id); + + // Fresh variable for output + let result_var = format!("_val{}", *fresh_counter); + *fresh_counter += 1; + + // Function tensor has vars [arg_value_var, result_var] (we need to match arg's value) + let func_vars = if arg_value_var < result_var { + vec![arg_value_var.clone(), result_var.clone()] + } else { + vec![result_var.clone(), arg_value_var.clone()] + }; + + let func_expr = if arg_value_var < result_var { + TensorExpr::leaf(func_tensor) + } else { + TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(func_tensor)), + index_map: vec![1, 0], + output: (0..2).collect(), + } + }; + + // Join arg_expr and func_expr on arg_value_var + let (joined_expr, joined_vars) = conjunction(arg_expr, &arg_vars, func_expr, &func_vars); + + // Existentially quantify out arg_value_var (the intermediate value) + let (result_expr, result_vars) = exists(joined_expr, &joined_vars, &arg_value_var); + + Ok((result_expr, result_vars, result_var)) + } + + Term::Record(_) => { + Err(CompileError::RecordInEquality) + } + + Term::Project(_, _) => { + Err(CompileError::ProjectionInEquality) + } + } +} + +/// Compile a formula to a tensor expression. +/// +/// Returns the expression and the list of free variables in order. +pub fn compile_formula( + formula: &Formula, + _ctx: &CompileContext, + structure: &Structure, + sig: &Signature, +) -> Result<(TensorExpr, Vec), CompileError> { + match formula { + Formula::True => Ok((TensorExpr::scalar(true), vec![])), + + Formula::False => Ok((TensorExpr::scalar(false), vec![])), + + Formula::Rel(rel_id, term) => { + // Check if term contains function applications + if term_has_func_app(term) { + // Use compile_term for each field, then join with relation + compile_rel_with_func_apps(*rel_id, term, structure, sig) + } else { + // Simple case: direct variable binding + Ok(compile_rel_simple(*rel_id, term, structure, sig)) + } + } + + Formula::Conj(formulas) => { + if formulas.is_empty() { + return Ok((TensorExpr::scalar(true), vec![])); + } + + let compiled: Result)>, CompileError> = formulas + .iter() + .map(|f| compile_formula(f, _ctx, structure, sig)) + .collect(); + + Ok(conjunction_all(compiled?)) + } + + Formula::Disj(formulas) => { + if formulas.is_empty() { + return Ok((TensorExpr::scalar(false), vec![])); + } + + let mut compiled: Vec<(TensorExpr, Vec)> = formulas + .iter() + .map(|f| compile_formula(f, _ctx, structure, sig)) + .collect::, _>>()?; + + // Collect all variables across all disjuncts + let all_vars: std::collections::HashSet<&String> = compiled + .iter() + .flat_map(|(_, vars)| vars.iter()) + .collect(); + + // If all disjuncts have the same variables, we're good + let need_extension = compiled.iter().any(|(_, vars)| { + let var_set: std::collections::HashSet<_> = vars.iter().collect(); + var_set != all_vars + }); + + if need_extension { + // Build a canonical variable ordering + let all_vars_vec: Vec = { + let mut v: Vec<_> = all_vars.iter().cloned().cloned().collect(); + v.sort(); // Canonical ordering + v + }; + + // Extend each disjunct with missing variables + for (expr, vars) in &mut compiled { + let var_set: std::collections::HashSet<_> = vars.iter().collect(); + let missing: Vec<_> = all_vars_vec + .iter() + .filter(|v| !var_set.contains(*v)) + .collect(); + + if !missing.is_empty() { + // Create full-domain tensors for missing variables and take product + let mut full_domain_tensors = Vec::new(); + let mut new_vars = vars.clone(); + + for var in missing { + // Look up the variable's sort in the context + if let Some(idx) = _ctx.vars.iter().position(|v| v == var) { + let sort = &_ctx.sorts[idx]; + let card = derived_sort_cardinality(structure, sort); + + // Create a 1D tensor with all values [0..card) + let mut extent = BTreeSet::new(); + for i in 0..card { + extent.insert(vec![i]); + } + let full_tensor = SparseTensor { + dims: vec![card], + extent, + }; + full_domain_tensors.push(TensorExpr::leaf(full_tensor)); + new_vars.push(var.clone()); + } else { + // Variable not in context - return error + return Err(CompileError::UnboundVariable(var.clone())); + } + } + + // Take product: original × full_domain_1 × full_domain_2 × ... + if !full_domain_tensors.is_empty() { + let mut product_parts = vec![std::mem::replace( + expr, + TensorExpr::scalar(false), + )]; + product_parts.extend(full_domain_tensors); + *expr = TensorExpr::Product(product_parts); + *vars = new_vars; + } + } + } + } + + Ok(disjunction_all(compiled)) + } + + Formula::Exists(var_name, sort, inner) => { + // Compile inner formula + let (inner_expr, inner_vars) = compile_formula(inner, _ctx, structure, sig)?; + + // Check if the quantified variable appears in the inner formula + if !inner_vars.contains(var_name) { + // The variable doesn't appear free in the inner formula. + // For example: ∃x. True or ∃x. (y = y) + // + // In this case, the existential is: + // - FALSE if the domain is empty (no witness exists) + // - Equal to the inner formula otherwise (witness exists vacuously) + let domain_card = derived_sort_cardinality(structure, sort); + if domain_card == 0 { + // Empty domain: existential is false + return Ok((TensorExpr::scalar(false), inner_vars)); + } + // Non-empty domain: the existential is equivalent to the inner formula + return Ok((inner_expr, inner_vars)); + } + + // Apply existential (sum over the variable) + Ok(exists(inner_expr, &inner_vars, var_name)) + } + + Formula::Eq(t1, t2) => { + // Handle equality using recursive term compilation + // This supports arbitrary term expressions including nested function applications + // + // Strategy: compile both terms to tensors, join on value dimensions, + // then project out the internal value variables + + // Special case: x = x is trivially true + if let (Term::Var(name1, _), Term::Var(name2, _)) = (t1, t2) + && name1 == name2 { + return Ok((TensorExpr::scalar(true), vec![])); + } + + let mut fresh_counter = 0; + + // Compile both terms + let (expr1, vars1, val1) = compile_term(t1, structure, sig, &mut fresh_counter)?; + let (expr2, vars2, val2) = compile_term(t2, structure, sig, &mut fresh_counter)?; + + // t1 = t2 means their values are equal + // We need to: + // 1. Join expr1 and expr2 on their value dimensions (val1 = val2) + // 2. Project out the value dimensions + + // First, rename val2 to val1 in vars2 so they join on the same variable + let vars2_renamed: Vec = vars2 + .iter() + .map(|v| if v == &val2 { val1.clone() } else { v.clone() }) + .collect(); + + // Rename val2 to val1 in expr2 by reordering dimensions + // The vars are sorted alphabetically, so we need to figure out where val2 was + // and where val1 should go + let val2_pos = vars2.iter().position(|v| v == &val2).unwrap(); + + // Where should val1 go in the sorted vars2_renamed? + let mut sorted_vars2: Vec = vars2_renamed.clone(); + sorted_vars2.sort(); + let val1_pos_in_sorted = sorted_vars2.iter().position(|v| v == &val1).unwrap(); + + // Build index map for reordering + let expr2_reordered = if val2_pos != val1_pos_in_sorted { + // Need to reorder dimensions + let mut index_map: Vec = (0..vars2.len()).collect(); + // The dimension at val2_pos needs to go to val1_pos_in_sorted + index_map.remove(val2_pos); + index_map.insert(val1_pos_in_sorted, val2_pos); + + // Actually, we need the inverse mapping for Contract + let mut inverse_map = vec![0; vars2.len()]; + for (new_pos, &old_pos) in index_map.iter().enumerate() { + inverse_map[old_pos] = new_pos; + } + + TensorExpr::Contract { + inner: Box::new(expr2), + index_map: inverse_map, + output: (0..vars2.len()).collect(), + } + } else { + expr2 + }; + + // Now join on val1 + let (joined_expr, joined_vars) = + conjunction(expr1, &vars1, expr2_reordered, &sorted_vars2); + + // Project out the internal value variable val1 + let (result_expr, result_vars) = exists(joined_expr, &joined_vars, &val1); + + Ok((result_expr, result_vars)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::DerivedSort; + use crate::id::Slid; + use crate::universe::Universe; + + /// Helper to create Slid from integer + fn slid(n: usize) -> Slid { + Slid::from_usize(n) + } + + /// Helper to create a test structure with a single sort and some elements + fn make_test_structure_with_relation() -> (Structure, Signature) { + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add a binary relation: edge(from: Node, to: Node) + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); // 1 sort + + // Add 3 nodes (Slids 0, 1, 2) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[2]); // One binary relation + + // Add edges: 0→1, 1→2 + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + + (structure, sig) + } + + #[test] + fn test_compile_formula_true() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + let (expr, vars) = compile_formula(&Formula::True, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert_eq!(result.len(), 1); // scalar true + assert!(result.contains(&[])); + } + + #[test] + fn test_compile_formula_false() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + let (expr, vars) = compile_formula(&Formula::False, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert!(result.is_empty()); + } + + #[test] + fn test_compile_formula_relation() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: edge(x, y) + let term = Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]); + let formula = Formula::Rel(0, term); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y"]); + assert_eq!(result.dims, vec![3, 3]); // 3 nodes + assert_eq!(result.len(), 2); // 2 edges + assert!(result.contains(&[0, 1])); // 0→1 + assert!(result.contains(&[1, 2])); // 1→2 + } + + #[test] + fn test_compile_formula_conjunction() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: edge(x, y) ∧ edge(y, z) + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let formula = Formula::Conj(vec![edge_xy, edge_yz]); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y", "z"]); + assert_eq!(result.len(), 1); // Only one 2-hop path: 0→1→2 + assert!(result.contains(&[0, 1, 2])); + } + + #[test] + fn test_compile_formula_exists() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: ∃y. edge(x, y) ∧ edge(y, z) + // This is 2-hop reachability + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let inner = Formula::Conj(vec![edge_xy, edge_yz]); + let formula = Formula::Exists("y".to_string(), DerivedSort::Base(0), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "z"]); + assert_eq!(result.len(), 1); // One 2-hop path: 0→2 (via 1) + assert!(result.contains(&[0, 2])); + } + + #[test] + fn test_compile_formula_equality() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: x = y (diagonal) + let formula = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars.len(), 2); + assert_eq!(result.dims, vec![3, 3]); + assert_eq!(result.len(), 3); // Diagonal: (0,0), (1,1), (2,2) + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + assert!(result.contains(&[2, 2])); + } + + #[test] + fn test_compile_formula_reflexive_identity() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: x = x (trivially true) + let formula = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert_eq!(result.len(), 1); // scalar true + assert!(result.contains(&[])); + } + + #[test] + fn test_compile_formula_func_app_equality() { + // Test: f(x) = y where f is a function + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add function f : Node -> Node + sig.add_function("f".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Define f: 0 -> 1, 1 -> 2, 2 -> 0 + structure.init_functions(&[Some(0)]); // f has domain sort 0 + structure.define_function(0, Slid::from_usize(0), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(1), Slid::from_usize(2)).unwrap(); + structure.define_function(0, Slid::from_usize(2), Slid::from_usize(0)).unwrap(); + + let ctx = CompileContext::new(); + + // Build: f(x) = y + let formula = Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Variables should be x and y (alphabetical order) + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // Result should have exactly 3 tuples: (0,1), (1,2), (2,0) + // representing f(0)=1, f(1)=2, f(2)=0 + // But order depends on alphabetical sort of variable names + assert_eq!(result.len(), 3); + } + + #[test] + fn test_compile_formula_two_func_apps_equality() { + // Test: f(x) = g(y) where f, g are functions + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add functions f, g : Node -> Node + sig.add_function("f".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + sig.add_function("g".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Define f: 0 -> 1, 1 -> 1, 2 -> 2 + // Define g: 0 -> 0, 1 -> 1, 2 -> 2 + structure.init_functions(&[Some(0), Some(0)]); // Both have domain sort 0 + structure.define_function(0, Slid::from_usize(0), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(1), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(2), Slid::from_usize(2)).unwrap(); + structure.define_function(1, Slid::from_usize(0), Slid::from_usize(0)).unwrap(); + structure.define_function(1, Slid::from_usize(1), Slid::from_usize(1)).unwrap(); + structure.define_function(1, Slid::from_usize(2), Slid::from_usize(2)).unwrap(); + + let ctx = CompileContext::new(); + + // Build: f(x) = g(y) + // f(x) = g(y) when ∃z. f(x) = z ∧ g(y) = z + // f(0)=1, f(1)=1, f(2)=2 + // g(0)=0, g(1)=1, g(2)=2 + // So f(x)=g(y) holds for: (0,1), (1,1), (2,2) since f(0)=g(1)=1, f(1)=g(1)=1, f(2)=g(2)=2 + let formula = Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::App(1, Box::new(Term::Var("y".to_string(), DerivedSort::Base(0)))), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Variables should be x and y + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // f(x) = g(y) holds for: (x=0,y=1), (x=1,y=1), (x=2,y=2) + assert_eq!(result.len(), 3); + } + + #[test] + fn test_compile_formula_exists_empty_domain() { + // When the domain is empty, ∃x. φ should be false even if φ is true + // This is the case for ∃x. x = x on an empty structure + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Empty structure (no elements) + let structure = Structure::new(1); + + let ctx = CompileContext::new(); + + // Build: ∃x. x = x + // Inner formula x = x compiles to scalar true (no variables) + // But since domain is empty, the existential should be false + let inner = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + ); + let formula = Formula::Exists("x".to_string(), DerivedSort::Base(node_id), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Should be FALSE (empty) because there's no witness in empty domain + assert!(vars.is_empty()); + assert!(result.is_empty(), "∃x. x = x should be false on empty domain"); + } + + #[test] + fn test_compile_formula_exists_nonempty_domain() { + // When the domain is non-empty, ∃x. x = x should be true + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + structure.add_element(&mut universe, node_id); // Add one element + + let ctx = CompileContext::new(); + + // Build: ∃x. x = x + let inner = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + ); + let formula = Formula::Exists("x".to_string(), DerivedSort::Base(node_id), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Should be TRUE because there's a witness + assert!(vars.is_empty()); + assert!(result.contains(&[]), "∃x. x = x should be true on non-empty domain"); + } + + #[test] + fn test_compile_formula_disjunction_different_vars() { + // Test disjunction where each disjunct has different variables + // R(x) \/ S(y) - this used to panic, now should work + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add two unary relations + sig.add_relation("R".to_string(), DerivedSort::Base(node_id)); + sig.add_relation("S".to_string(), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[1, 1]); // Two unary relations + + // R = {0}, S = {1} + structure.assert_relation(0, vec![Slid::from_usize(0)]); + structure.assert_relation(1, vec![Slid::from_usize(1)]); + + // Need context with both x and y + let ctx = CompileContext { + vars: vec!["x".to_string(), "y".to_string()], + sorts: vec![DerivedSort::Base(node_id), DerivedSort::Base(node_id)], + }; + + // Build: R(x) \/ S(y) + let r_x = Formula::Rel( + 0, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ); + let s_y = Formula::Rel( + 1, + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let formula = Formula::Disj(vec![r_x, s_y]); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Result should have both x and y + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // The result is the union of: + // - R(x) extended with all y: {(0,0), (0,1), (0,2)} + // - S(y) extended with all x: {(0,1), (1,1), (2,1)} + // Note: the tuple order depends on variable order + assert!(!result.is_empty()); + } + + #[test] + fn test_compile_formula_relation_with_func_apps() { + // Test: [from: e src, to: e tgt] edge (function applications in relation term) + // This verifies that compile_rel_with_func_apps works correctly + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + let edge_id = sig.add_sort("Edge".to_string()); + + // Add functions src, tgt : Edge -> Node + sig.add_function("src".to_string(), DerivedSort::Base(edge_id), DerivedSort::Base(node_id)); + sig.add_function("tgt".to_string(), DerivedSort::Base(edge_id), DerivedSort::Base(node_id)); + + // Add binary relation: reachable(from: Node, to: Node) + sig.add_relation( + "reachable".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts + + // Add 3 nodes (sort 0) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + // Add 2 edges (sort 1) + for _ in 0..2 { + structure.add_element(&mut universe, edge_id); + } + + // Define edges: e0: 0->1, e1: 1->2 + structure.init_functions(&[Some(edge_id), Some(edge_id)]); // src, tgt have domain Edge + // e0: src=0, tgt=1 + structure.define_function(0, Slid::from_usize(3), Slid::from_usize(0)).unwrap(); // e0.src = node0 + structure.define_function(1, Slid::from_usize(3), Slid::from_usize(1)).unwrap(); // e0.tgt = node1 + // e1: src=1, tgt=2 + structure.define_function(0, Slid::from_usize(4), Slid::from_usize(1)).unwrap(); // e1.src = node1 + structure.define_function(1, Slid::from_usize(4), Slid::from_usize(2)).unwrap(); // e1.tgt = node2 + + // Reachable relation: initially {(0,1), (0,2), (1,2)} + structure.init_relations(&[2]); // One binary relation + structure.assert_relation(0, vec![Slid::from_usize(0), Slid::from_usize(1)]); // 0->1 + structure.assert_relation(0, vec![Slid::from_usize(0), Slid::from_usize(2)]); // 0->2 + structure.assert_relation(0, vec![Slid::from_usize(1), Slid::from_usize(2)]); // 1->2 + + let ctx = CompileContext::new(); + + // Build: [from: e src, to: e tgt] reachable + // This should match edges e where reachable(src(e), tgt(e)) holds + let formula = Formula::Rel( + 0, // reachable + Term::Record(vec![ + ( + "from".to_string(), + Term::App(0, Box::new(Term::Var("e".to_string(), DerivedSort::Base(edge_id)))), // e src + ), + ( + "to".to_string(), + Term::App(1, Box::new(Term::Var("e".to_string(), DerivedSort::Base(edge_id)))), // e tgt + ), + ]), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // The formula should match edges where reachable(src(e), tgt(e)) holds + // e0: src=0, tgt=1 -> reachable(0,1) holds ✓ + // e1: src=1, tgt=2 -> reachable(1,2) holds ✓ + // So both edges should match + assert_eq!(vars, vec!["e"]); + assert_eq!(result.len(), 2); // Both edges match + } +} diff --git a/src/tensor/expr.rs b/src/tensor/expr.rs new file mode 100644 index 0000000..c22f500 --- /dev/null +++ b/src/tensor/expr.rs @@ -0,0 +1,454 @@ +//! Lazy tensor expressions. + +use std::collections::{BTreeSet, HashMap}; +use std::rc::Rc; + +use super::sparse::{cartesian_product_of_extents, CartesianProductIter, SparseTensor}; + +/// A lazy tensor expression. +/// +/// Operations build up an expression tree rather than immediately computing. +/// Evaluation fuses operations to avoid materializing large intermediates. +#[derive(Clone, Debug)] +pub enum TensorExpr { + /// Materialized sparse tensor (leaf) + Leaf(Rc), + + /// Lazy tensor product (cross join) + /// Result dimensions = concatenation of input dimensions + Product(Vec), + + /// Lazy disjunction (union of extents) + /// All children must have the same dimensions. + /// Result is true wherever ANY child is true (pointwise OR). + Sum(Vec), + + /// Lazy contraction + /// Maps input indices to output indices; indices mapping to same target + /// are identified; targets not in output are summed (OR'd) over. + Contract { + inner: Box, + /// For each input index, which target index (in 0..M) + index_map: Vec, + /// Which target indices appear in output + output: BTreeSet, + }, +} + +impl TensorExpr { + /// Create a leaf from a sparse tensor + pub fn leaf(t: SparseTensor) -> Self { + TensorExpr::Leaf(Rc::new(t)) + } + + /// Create a scalar (0-dimensional) tensor expression + pub fn scalar(value: bool) -> Self { + TensorExpr::leaf(SparseTensor::scalar(value)) + } + + /// Get dimensions without materializing + pub fn dims(&self) -> Vec { + match self { + TensorExpr::Leaf(t) => t.dims.clone(), + TensorExpr::Product(exprs) => exprs.iter().flat_map(|e| e.dims()).collect(), + TensorExpr::Sum(exprs) => { + // All children should have same dims; return first or empty + exprs.first().map(|e| e.dims()).unwrap_or_default() + } + TensorExpr::Contract { + inner, + index_map, + output, + } => { + let inner_dims = inner.dims(); + // Build target -> dim mapping + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(inner_dims[i]); + } + // Output dims in order + let max_target = index_map.iter().copied().max().unwrap_or(0); + (0..=max_target) + .filter(|t| output.contains(t)) + .map(|t| *target_dims.get(&t).unwrap_or(&1)) + .collect() + } + } + } + + /// Arity (number of dimensions) + pub fn arity(&self) -> usize { + self.dims().len() + } + + /// Materialize the tensor expression into a sparse tensor. + /// + /// This is where fusion happens: Contract(Product(...)) is evaluated + /// without materializing the intermediate product. + pub fn materialize(&self) -> SparseTensor { + match self { + TensorExpr::Leaf(t) => (**t).clone(), + + TensorExpr::Product(exprs) => { + if exprs.is_empty() { + return SparseTensor::scalar(true); + } + // Materialize children and compute Cartesian product + let materialized: Vec = + exprs.iter().map(|e| e.materialize()).collect(); + let dims: Vec = materialized + .iter() + .flat_map(|t| t.dims.iter().copied()) + .collect(); + let extent = cartesian_product_of_extents(&materialized); + SparseTensor { dims, extent } + } + + TensorExpr::Sum(exprs) => { + if exprs.is_empty() { + // Empty disjunction = false = empty tensor with unknown dims + return SparseTensor::scalar(false); + } + // Union of extents (pointwise OR) + let first = exprs[0].materialize(); + let dims = first.dims.clone(); + let mut extent = first.extent; + + for expr in &exprs[1..] { + let child = expr.materialize(); + debug_assert_eq!(child.dims, dims, "Sum children must have same dimensions"); + extent.extend(child.extent); + } + + SparseTensor { dims, extent } + } + + TensorExpr::Contract { + inner, + index_map, + output, + } => { + // Check for fusion opportunity: Contract(Product(...)) + if let TensorExpr::Product(children) = inner.as_ref() { + return self.fused_join(children, index_map, output); + } + + // Fusion: Contract(Sum(...)) distributes + // Contract(Sum(a, b)) = Sum(Contract(a), Contract(b)) + if let TensorExpr::Sum(children) = inner.as_ref() { + let contracted_children: Vec = children + .iter() + .map(|child| TensorExpr::Contract { + inner: Box::new(child.clone()), + index_map: index_map.clone(), + output: output.clone(), + }) + .collect(); + return TensorExpr::Sum(contracted_children).materialize(); + } + + // Otherwise, materialize inner and contract + let inner_tensor = inner.materialize(); + contract_sparse(&inner_tensor, index_map, output) + } + } + } + + /// Fused evaluation of Contract(Product([...])). + /// Avoids materializing the full Cartesian product. + fn fused_join( + &self, + children: &[TensorExpr], + index_map: &[usize], + output: &BTreeSet, + ) -> SparseTensor { + if children.is_empty() { + let inner_result = SparseTensor::scalar(true); + return contract_sparse(&inner_result, index_map, output); + } + + // Materialize children + let materialized: Vec = children.iter().map(|e| e.materialize()).collect(); + + // Compute dimension offsets for each child + let mut offsets = vec![0usize]; + for t in &materialized { + offsets.push(offsets.last().unwrap() + t.arity()); + } + + // Figure out which target indices come from which children + // and which input indices map to each target + let max_target = index_map.iter().copied().max().unwrap_or(0); + let mut target_to_inputs: HashMap> = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_to_inputs.entry(target).or_default().push(i); + } + + // Build output dimensions + let inner_dims: Vec = materialized + .iter() + .flat_map(|t| t.dims.iter().copied()) + .collect(); + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(inner_dims[i]); + } + let output_targets: Vec = (0..=max_target).filter(|t| output.contains(t)).collect(); + let output_dims: Vec = output_targets + .iter() + .map(|t| *target_dims.get(t).unwrap_or(&1)) + .collect(); + + // Use hash join for 2-way, nested loops otherwise + // (Future: Leapfrog Triejoin for multi-way) + let mut result_extent: BTreeSet> = BTreeSet::new(); + + if materialized.len() == 2 { + // Hash join + let (t1, t2) = (&materialized[0], &materialized[1]); + let offset2 = offsets[1]; + + // Find join keys: target indices that have inputs from both t1 and t2 + let t1_range = 0..t1.arity(); + let t2_range = offset2..(offset2 + t2.arity()); + + let mut join_targets: Vec = Vec::new(); + let mut t1_key_indices: Vec = Vec::new(); + let mut t2_key_indices: Vec = Vec::new(); + + for (&target, inputs) in &target_to_inputs { + let from_t1: Vec<_> = inputs.iter().filter(|&&i| t1_range.contains(&i)).collect(); + let from_t2: Vec<_> = inputs.iter().filter(|&&i| t2_range.contains(&i)).collect(); + if !from_t1.is_empty() && !from_t2.is_empty() { + join_targets.push(target); + t1_key_indices.push(*from_t1[0]); // First input from t1 + t2_key_indices.push(*from_t2[0] - offset2); // First input from t2 (local index) + } + } + + // Build hash table on t1 + let mut hash_table: HashMap, Vec<&Vec>> = HashMap::new(); + for tuple in t1.iter() { + let key: Vec = t1_key_indices.iter().map(|&i| tuple[i]).collect(); + hash_table.entry(key).or_default().push(tuple); + } + + // Probe with t2 + for tuple2 in t2.iter() { + let key: Vec = t2_key_indices.iter().map(|&i| tuple2[i]).collect(); + if let Some(matches) = hash_table.get(&key) { + for tuple1 in matches { + // Combine and check full consistency + let combined: Vec = + tuple1.iter().chain(tuple2.iter()).copied().collect(); + if let Some(out_tuple) = try_project(&combined, index_map, &output_targets) + { + result_extent.insert(out_tuple); + } + } + } + } + } else { + // Nested loops for other cases + for combo in CartesianProductIter::new(&materialized) { + if let Some(out_tuple) = try_project(&combo, index_map, &output_targets) { + result_extent.insert(out_tuple); + } + } + } + + SparseTensor { + dims: output_dims, + extent: result_extent, + } + } + + /// Iterate over result tuples without full materialization. + /// (For now, just materializes; future: streaming evaluation) + pub fn iter(&self) -> impl Iterator> { + self.materialize().extent.into_iter() + } + + /// Check if result is empty (may short-circuit) + pub fn is_empty(&self) -> bool { + // Future: smarter emptiness checking + self.materialize().is_empty() + } + + /// Check if result contains a specific tuple + pub fn contains(&self, tuple: &[usize]) -> bool { + // Future: smarter containment checking + self.materialize().contains(tuple) + } +} + +// ============================================================================ +// INTERNAL HELPERS +// ============================================================================ + +/// Contract a materialized sparse tensor. +fn contract_sparse( + tensor: &SparseTensor, + index_map: &[usize], + output: &BTreeSet, +) -> SparseTensor { + let max_target = index_map.iter().copied().max().unwrap_or(0); + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(tensor.dims[i]); + } + + let output_targets: Vec = (0..=max_target).filter(|t| output.contains(t)).collect(); + let output_dims: Vec = output_targets + .iter() + .map(|t| *target_dims.get(t).unwrap_or(&1)) + .collect(); + + let mut extent: BTreeSet> = BTreeSet::new(); + + for input_tuple in tensor.iter() { + if let Some(out_tuple) = try_project(input_tuple, index_map, &output_targets) { + extent.insert(out_tuple); + } + } + + SparseTensor { + dims: output_dims, + extent, + } +} + +/// Try to project a combined tuple to output indices. +/// Returns None if identified indices don't match. +fn try_project( + combined: &[usize], + index_map: &[usize], + output_targets: &[usize], +) -> Option> { + let mut target_values: HashMap = HashMap::new(); + + for (i, &val) in combined.iter().enumerate() { + let target = index_map[i]; + if let Some(&existing) = target_values.get(&target) { + if existing != val { + return None; // Inconsistent + } + } else { + target_values.insert(target, val); + } + } + + Some( + output_targets + .iter() + .map(|t| *target_values.get(t).unwrap_or(&0)) + .collect(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn leaf(t: SparseTensor) -> TensorExpr { + TensorExpr::leaf(t) + } + + #[test] + fn test_product_simple() { + let mut r = SparseTensor::empty(vec![3]); + r.insert(vec![0]); + r.insert(vec![2]); + + let mut s = SparseTensor::empty(vec![2]); + s.insert(vec![1]); + + let expr = TensorExpr::Product(vec![leaf(r), leaf(s)]); + let result = expr.materialize(); + + assert_eq!(result.dims, vec![3, 2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1])); + assert!(result.contains(&[2, 1])); + } + + #[test] + fn test_contract_reduction() { + let mut t = SparseTensor::empty(vec![2, 3]); + t.insert(vec![0, 0]); + t.insert(vec![0, 2]); + t.insert(vec![1, 1]); + + let output: BTreeSet = [0].into_iter().collect(); + let expr = TensorExpr::Contract { + inner: Box::new(leaf(t)), + index_map: vec![0, 1], + output, + }; + let result = expr.materialize(); + + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + } + + #[test] + fn test_sum_basic() { + // R ∨ S where R = {(0,0), (1,1)} and S = {(1,1), (2,2)} + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 0]); + r.insert(vec![1, 1]); + + let mut s = SparseTensor::empty(vec![3, 3]); + s.insert(vec![1, 1]); + s.insert(vec![2, 2]); + + let expr = TensorExpr::Sum(vec![leaf(r), leaf(s)]); + let result = expr.materialize(); + + assert_eq!(result.dims, vec![3, 3]); + assert_eq!(result.len(), 3); // Union removes duplicates + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + assert!(result.contains(&[2, 2])); + } + + #[test] + fn test_sum_empty() { + // Empty disjunction = false + let expr = TensorExpr::Sum(vec![]); + let result = expr.materialize(); + + assert!(result.is_empty()); + } + + #[test] + fn test_contract_sum_distributes() { + // Contract(Sum(R, S)) = Sum(Contract(R), Contract(S)) + // Using ∃y. (R(x,y) ∨ S(x,y)) + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 0]); + r.insert(vec![0, 1]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 0]); + + let sum = TensorExpr::Sum(vec![leaf(r), leaf(s)]); + + // ∃y: map y to fresh target, output only x + let output: BTreeSet = [0].into_iter().collect(); + let expr = TensorExpr::Contract { + inner: Box::new(sum), + index_map: vec![0, 2], // x→0, y→2 (fresh) + output, + }; + + let result = expr.materialize(); + + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0])); // from R + assert!(result.contains(&[1])); // from S + } +} diff --git a/src/tensor/mod.rs b/src/tensor/mod.rs new file mode 100644 index 0000000..2fd1a6b --- /dev/null +++ b/src/tensor/mod.rs @@ -0,0 +1,31 @@ +//! Lazy tensor expressions for axiom checking +//! +//! A tensor indexed by finite sets A₀, A₁, ..., Aₙ₋₁ is a function +//! [∏ᵢ Aᵢ] → Bool. We represent this sparsely as the set of tuples +//! mapping to true. +//! +//! Key insight: tensor product followed by contraction should NEVER +//! materialize the intermediate product. Instead, we build expression +//! trees and fuse operations during evaluation. +//! +//! Two primitives suffice for einsum-style operations: +//! - **tensor_product**: ⊗ₖ Sₖ — indexed by all indices, value = ∧ of contributions +//! - **contract**: along `a:[N]→[M]`, output `O⊆[M]` — identifies indices, sums over non-output +//! +//! Over the Boolean semiring: product = AND, sum = OR. + +mod builder; +mod check; +mod compile; +mod expr; +mod sparse; + +// Re-export main types +pub use builder::{conjunction, conjunction_all, disjunction, disjunction_all, exists}; +pub use check::{check_sequent, check_sequent_bool, check_theory_axioms, CheckResult, Violation}; +pub use compile::{ + build_carrier_index, compile_formula, derived_sort_cardinality, relation_to_tensor, + sort_cardinality, CompileContext, CompileError, +}; +pub use expr::TensorExpr; +pub use sparse::SparseTensor; diff --git a/src/tensor/sparse.rs b/src/tensor/sparse.rs new file mode 100644 index 0000000..e89d840 --- /dev/null +++ b/src/tensor/sparse.rs @@ -0,0 +1,223 @@ +//! Sparse Boolean tensor (materialized). + +use std::collections::BTreeSet; + +/// A sparse Boolean tensor (materialized). +/// +/// Indexed by a product of finite sets with given cardinalities. +/// Stores the set of index tuples that map to `true`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SparseTensor { + /// Cardinality of each index dimension + pub dims: Vec, + /// Set of tuples (as `Vec`) where the tensor is true + /// Each tuple has length == dims.len() + pub extent: BTreeSet>, +} + +impl SparseTensor { + /// Create an empty tensor (all false) with given dimensions + pub fn empty(dims: Vec) -> Self { + Self { + dims, + extent: BTreeSet::new(), + } + } + + /// Create a scalar tensor (0-dimensional) with given value + pub fn scalar(value: bool) -> Self { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + Self { + dims: vec![], + extent, + } + } + + /// Number of dimensions (arity) + pub fn arity(&self) -> usize { + self.dims.len() + } + + /// Number of true entries + pub fn len(&self) -> usize { + self.extent.len() + } + + /// Check if empty (all false) + pub fn is_empty(&self) -> bool { + self.extent.is_empty() + } + + /// Check if a specific tuple is true + pub fn contains(&self, tuple: &[usize]) -> bool { + self.extent.contains(tuple) + } + + /// Insert a tuple (set to true) + pub fn insert(&mut self, tuple: Vec) -> bool { + debug_assert_eq!(tuple.len(), self.dims.len()); + debug_assert!(tuple.iter().zip(&self.dims).all(|(v, d)| *v < *d)); + self.extent.insert(tuple) + } + + /// Remove a tuple (set to false) + pub fn remove(&mut self, tuple: &[usize]) -> bool { + self.extent.remove(tuple) + } + + /// Iterate over all true tuples + pub fn iter(&self) -> impl Iterator> { + self.extent.iter() + } +} + +// ============================================================================ +// ITERATORS +// ============================================================================ + +/// Iterator over all tuples in a domain (Cartesian product of ranges) +pub(crate) struct DomainIterator { + dims: Vec, + current: Vec, + done: bool, +} + +impl DomainIterator { + pub fn new(dims: &[usize]) -> Self { + let done = dims.contains(&0); + Self { + dims: dims.to_vec(), + current: vec![0; dims.len()], + done, + } + } +} + +impl Iterator for DomainIterator { + type Item = Vec; + + fn next(&mut self) -> Option { + if self.done { + return None; + } + + if self.dims.is_empty() { + self.done = true; + return Some(vec![]); + } + + let result = self.current.clone(); + + // Advance (odometer style) + for i in (0..self.dims.len()).rev() { + self.current[i] += 1; + if self.current[i] < self.dims[i] { + break; + } + self.current[i] = 0; + if i == 0 { + self.done = true; + } + } + + Some(result) + } +} + +/// Iterator over Cartesian product of sparse tensor extents +pub(crate) struct CartesianProductIter<'a> { + tensors: &'a [SparseTensor], + iterators: Vec>>, + current: Vec>>, + done: bool, +} + +impl<'a> CartesianProductIter<'a> { + pub fn new(tensors: &'a [SparseTensor]) -> Self { + if tensors.is_empty() { + return Self { + tensors, + iterators: vec![], + current: vec![], + done: false, + }; + } + + let done = tensors.iter().any(|t| t.is_empty()); + let mut iterators: Vec<_> = tensors.iter().map(|t| t.extent.iter()).collect(); + let current: Vec<_> = iterators.iter_mut().map(|it| it.next()).collect(); + + Self { + tensors, + iterators, + current, + done, + } + } +} + +impl<'a> Iterator for CartesianProductIter<'a> { + type Item = Vec; + + fn next(&mut self) -> Option { + if self.done { + return None; + } + + if self.tensors.is_empty() { + self.done = true; + return Some(vec![]); + } + + // Build result + let result: Vec = self + .current + .iter() + .filter_map(|opt| opt.as_ref()) + .flat_map(|tuple| tuple.iter().copied()) + .collect(); + + // Advance (odometer style) + for i in (0..self.tensors.len()).rev() { + if let Some(next) = self.iterators[i].next() { + self.current[i] = Some(next); + break; + } else { + self.iterators[i] = self.tensors[i].extent.iter(); + self.current[i] = self.iterators[i].next(); + if i == 0 { + self.done = true; + } + } + } + + Some(result) + } +} + +/// Cartesian product of extents of multiple sparse tensors +pub(crate) fn cartesian_product_of_extents(tensors: &[SparseTensor]) -> BTreeSet> { + CartesianProductIter::new(tensors).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sparse_tensor_basic() { + let mut t = SparseTensor::empty(vec![3, 2]); + assert_eq!(t.arity(), 2); + assert!(t.is_empty()); + + t.insert(vec![0, 1]); + t.insert(vec![2, 0]); + assert_eq!(t.len(), 2); + assert!(t.contains(&[0, 1])); + assert!(t.contains(&[2, 0])); + assert!(!t.contains(&[0, 0])); + } +} diff --git a/src/universe.rs b/src/universe.rs new file mode 100644 index 0000000..076f8e9 --- /dev/null +++ b/src/universe.rs @@ -0,0 +1,207 @@ +//! Global UUID universe with Luid (Locally Universal ID) mapping +//! +//! This provides a single, persistent index of all UUIDs known to this +//! installation. UUIDs are mapped to compact integer Luids for efficient +//! in-memory operations. +//! +//! Following chit's multi-level ID design: +//! - Uuid: 128-bit globally unique identifier (for persistence, cross-system) +//! - Luid: Local index into this installation's universe (for computation) + +use crate::id::{Luid, NumericId, Uuid}; +use indexmap::IndexSet; +use memmap2::Mmap; +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Archive, Deserialize, Serialize, check_archived_root}; +use std::fs::{self, File}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +/// The global universe of all UUIDs known to this installation. +/// +/// Provides bidirectional mapping between UUIDs and Luids: +/// - `intern(uuid)` → Luid (get or create) +/// - `get(luid)` → Uuid +/// - `lookup(uuid)` → `Option` +/// +/// The universe is persisted to disk and can be memory-mapped for +/// efficient access without loading everything into memory. +#[derive(Debug)] +pub struct Universe { + /// The index mapping Luid → Uuid (and via IndexSet, Uuid → Luid) + index: IndexSet, + /// Path to the universe file (if persistent) + path: Option, + /// Whether there are unsaved changes + dirty: bool, +} + +/// Serializable form of the universe for persistence +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +struct UniverseData { + uuids: Vec, +} + +impl Universe { + /// Create a new empty universe (in-memory only) + pub fn new() -> Self { + Self { + index: IndexSet::new(), + path: None, + dirty: false, + } + } + + /// Create a new universe with a persistence path + pub fn with_path(path: impl Into) -> Self { + Self { + index: IndexSet::new(), + path: Some(path.into()), + dirty: false, + } + } + + /// Load a universe from disk, or create empty if file doesn't exist + pub fn load(path: impl Into) -> Result { + let path = path.into(); + + if !path.exists() { + return Ok(Self::with_path(path)); + } + + let file = File::open(&path).map_err(|e| format!("Failed to open universe file: {}", e))?; + + // Memory-map the file for zero-copy access + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap universe file: {}", e))?; + + if mmap.is_empty() { + return Ok(Self::with_path(path)); + } + + // Validate and access the archived data + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate universe archive: {}", e))?; + + // Deserialize to build the IndexSet + let data: UniverseData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize universe")?; + + let index: IndexSet = data.uuids.into_iter().collect(); + + Ok(Self { + index, + path: Some(path), + dirty: false, + }) + } + + /// Save the universe to disk + pub fn save(&mut self) -> Result<(), String> { + let path = self + .path + .as_ref() + .ok_or("Universe has no persistence path")?; + + // Create parent directories if needed + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create universe directory: {}", e))?; + } + + // Serialize the universe + let data = UniverseData { + uuids: self.index.iter().copied().collect(), + }; + + let mut serializer = AllocSerializer::<1024>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize universe: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + // Write atomically by writing to temp file then renaming + let temp_path = path.with_extension("universe.tmp"); + { + let mut file = File::create(&temp_path) + .map_err(|e| format!("Failed to create temp universe file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write universe file: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync universe file: {}", e))?; + } + + fs::rename(&temp_path, path) + .map_err(|e| format!("Failed to rename universe file: {}", e))?; + + self.dirty = false; + Ok(()) + } + + /// Intern a UUID, returning its Luid (creating if new) + pub fn intern(&mut self, uuid: Uuid) -> Luid { + let (idx, inserted) = self.index.insert_full(uuid); + if inserted { + self.dirty = true; + } + Luid::from_usize(idx) + } + + /// Get the UUID for a Luid + pub fn get(&self, luid: Luid) -> Option { + self.index.get_index(luid.index()).copied() + } + + /// Look up the Luid for a UUID (if known) + pub fn lookup(&self, uuid: &Uuid) -> Option { + self.index.get_index_of(uuid).map(Luid::from_usize) + } + + /// Get the number of UUIDs in the universe + pub fn len(&self) -> usize { + self.index.len() + } + + /// Check if the universe is empty + pub fn is_empty(&self) -> bool { + self.index.is_empty() + } + + /// Check if there are unsaved changes + pub fn is_dirty(&self) -> bool { + self.dirty + } + + /// Iterate over all (Luid, Uuid) pairs + pub fn iter(&self) -> impl Iterator + '_ { + self.index + .iter() + .enumerate() + .map(|(idx, &uuid)| (Luid::from_usize(idx), uuid)) + } + + /// Get the persistence path (if any) + pub fn path(&self) -> Option<&Path> { + self.path.as_deref() + } +} + +impl Default for Universe { + fn default() -> Self { + Self::new() + } +} + +impl Drop for Universe { + fn drop(&mut self) { + // Auto-save on drop if dirty and has a path + if self.dirty && self.path.is_some() { + let _ = self.save(); // Ignore errors on drop + } + } +} + +// Unit tests moved to tests/proptest_universe.rs diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 0000000..da4bb57 --- /dev/null +++ b/src/version.rs @@ -0,0 +1,272 @@ +//! Version control for geolog structures +//! +//! This module provides a simple linear version control system for structures. +//! Patches are serialized to disk and can be loaded to reconstruct any version. + +use crate::core::Structure; +use crate::id::Uuid; +use crate::naming::NamingIndex; +use crate::patch::{Patch, apply_patch, diff, to_initial_patch}; +use crate::universe::Universe; + +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Deserialize, check_archived_root}; +use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +/// A version-controlled state for managing structure history. +/// +/// This provides a simple linear history (no branches/merges yet). +/// Patches are stored on disk and loaded on demand. +/// +/// Contains a Universe for mapping UUIDs to Luids. The Universe is +/// persisted alongside the patches. +#[derive(Debug)] +pub struct VersionedState { + /// All patches, indexed by target_commit UUID + pub patches: BTreeMap, + /// Map from target_commit to source_commit (for walking history) + pub commit_parents: BTreeMap>, + /// Current HEAD commit (None = empty) + pub head: Option, + /// Directory where patches are stored + pub patches_dir: PathBuf, + /// The universe for UUID↔Luid mapping + pub universe: Universe, + /// The naming index for element names + pub naming: NamingIndex, +} + +impl VersionedState { + /// Create a new versioned state with the given patches directory + pub fn new(patches_dir: impl Into) -> Self { + let patches_dir = patches_dir.into(); + let universe_path = patches_dir.join("universe.bin"); + let naming_path = patches_dir.join("names.bin"); + Self { + patches: BTreeMap::new(), + commit_parents: BTreeMap::new(), + head: None, + patches_dir, + universe: Universe::with_path(universe_path), + naming: NamingIndex::with_path(naming_path), + } + } + + /// Load all patches from the patches directory + pub fn load_patches(&mut self) -> Result<(), String> { + fs::create_dir_all(&self.patches_dir) + .map_err(|e| format!("Failed to create patches directory: {}", e))?; + + // Load the universe + let universe_path = self.patches_dir.join("universe.bin"); + self.universe = Universe::load(&universe_path)?; + + // Load the naming index + let naming_path = self.patches_dir.join("names.bin"); + self.naming = NamingIndex::load(&naming_path)?; + + let entries = fs::read_dir(&self.patches_dir) + .map_err(|e| format!("Failed to read patches directory: {}", e))?; + + for entry in entries { + let entry = entry.map_err(|e| format!("Failed to read directory entry: {}", e))?; + let path = entry.path(); + + if path.extension().is_some_and(|ext| ext == "patch") { + self.load_patch(&path)?; + } + } + + // Find the head (the commit that is not a source of any other commit) + self.find_head(); + + Ok(()) + } + + /// Load a single patch file + fn load_patch(&mut self, path: &Path) -> Result<(), String> { + let mut file = File::open(path).map_err(|e| format!("Failed to open patch file: {}", e))?; + + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes) + .map_err(|e| format!("Failed to read patch file: {}", e))?; + + // Use check_archived_root for validation + let archived = check_archived_root::(&bytes) + .map_err(|e| format!("Failed to validate patch archive: {}", e))?; + + // Deserialize to owned Patch + let patch: Patch = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize patch")?; + + let target = patch.target_commit; + let source = patch.source_commit; + + self.commit_parents.insert(target, source); + self.patches.insert(target, patch); + + Ok(()) + } + + /// Find the head commit (most recent commit not superseded by another) + fn find_head(&mut self) { + // Collect all source commits (commits that have children) + let sources: std::collections::HashSet = + self.commit_parents.values().filter_map(|s| *s).collect(); + + // Head is a commit that is not a source of any other commit + for &commit in self.commit_parents.keys() { + if !sources.contains(&commit) { + self.head = Some(commit); + return; + } + } + } + + /// Save a patch to disk (also saves the universe and naming if dirty) + pub fn save_patch(&mut self, patch: &Patch) -> Result<(), String> { + fs::create_dir_all(&self.patches_dir) + .map_err(|e| format!("Failed to create patches directory: {}", e))?; + + let filename = format!("{}.patch", patch.target_commit); + let path = self.patches_dir.join(filename); + + // Serialize with rkyv + let mut serializer = AllocSerializer::<256>::default(); + serializer + .serialize_value(patch) + .map_err(|e| format!("Failed to serialize patch: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let mut file = + File::create(&path).map_err(|e| format!("Failed to create patch file: {}", e))?; + + file.write_all(&bytes) + .map_err(|e| format!("Failed to write patch file: {}", e))?; + + // Save the universe if dirty + if self.universe.is_dirty() { + self.universe.save()?; + } + + // Save the naming index if dirty + if self.naming.is_dirty() { + self.naming.save()?; + } + + Ok(()) + } + + /// Checkout a specific commit, returning the reconstructed structure + /// + /// Also updates the naming index with names from applied patches. + pub fn checkout(&mut self, commit: Uuid) -> Result { + // Build the chain of patches from root to target + let mut chain = Vec::new(); + let mut current = Some(commit); + + while let Some(c) = current { + let patch = self + .patches + .get(&c) + .ok_or_else(|| format!("Commit {} not found", c))?; + chain.push(patch.clone()); + current = patch.source_commit; + } + + // Reverse to apply from root to target + chain.reverse(); + + // Apply patches in order + let mut structure = if let Some(first_patch) = chain.first() { + Structure::new(first_patch.num_sorts) + } else { + return Err("No patches to apply".to_string()); + }; + + // Create a temporary naming index for checkout (don't modify the main one) + let mut checkout_naming = NamingIndex::new(); + + for patch in &chain { + structure = apply_patch(&structure, patch, &mut self.universe, &mut checkout_naming)?; + } + + Ok(structure) + } + + /// Commit a structure, creating a new patch from the current HEAD + /// + /// Returns the new commit's UUID. + /// The naming parameter provides names for elements in the structure. + pub fn commit(&mut self, structure: &Structure, naming: &NamingIndex) -> Result { + let patch = if let Some(head) = self.head { + // Diff from current HEAD + let base = self.checkout(head)?; + // Use empty naming for base (names are reconstructed from patches) + let base_naming = NamingIndex::new(); + let mut patch = diff(&base, structure, &self.universe, &base_naming, naming); + patch.source_commit = Some(head); + patch + } else { + // Initial commit + to_initial_patch(structure, &self.universe, naming) + }; + + // Skip empty patches + if patch.is_empty() { + return Err("No changes to commit".to_string()); + } + + let commit_uuid = patch.target_commit; + + // Apply names from patch to our naming index + for (uuid, name) in &patch.names.additions { + self.naming.insert(*uuid, name.clone()); + } + + // Save to disk + self.save_patch(&patch)?; + + // Update in-memory state + self.commit_parents.insert(commit_uuid, patch.source_commit); + self.patches.insert(commit_uuid, patch); + self.head = Some(commit_uuid); + + Ok(commit_uuid) + } + + /// Get the current HEAD structure, or None if no commits + pub fn get_head_structure(&mut self) -> Result, String> { + match self.head { + Some(head) => Ok(Some(self.checkout(head)?)), + None => Ok(None), + } + } + + /// List all commits in order from oldest to newest + pub fn list_commits(&self) -> Vec { + // Build list by following parents + let mut commits = Vec::new(); + let mut current = self.head; + + while let Some(c) = current { + commits.push(c); + current = self.commit_parents.get(&c).and_then(|p| *p); + } + + commits.reverse(); + commits + } + + /// Get the number of commits + pub fn num_commits(&self) -> usize { + self.patches.len() + } +} + +// Unit tests moved to tests/unit_version.rs diff --git a/src/zerocopy.rs b/src/zerocopy.rs new file mode 100644 index 0000000..0f66b64 --- /dev/null +++ b/src/zerocopy.rs @@ -0,0 +1,422 @@ +//! Zero-copy access to serialized structures via memory mapping. +//! +//! This module provides `MappedStructure` which memory-maps a serialized structure +//! file and provides direct access to the archived data without deserialization. +//! +//! # Benefits +//! - **No deserialization cost**: Data is accessed directly from the mmap +//! - **Minimal memory overhead**: Only the mmap exists, no heap copies +//! - **Fast startup**: Opening a structure is O(1), not O(n) elements +//! +//! # Trade-offs +//! - Read-only access (archived types are immutable) +//! - Slightly different API (ArchivedVec vs Vec, etc.) +//! - Requires file to remain valid for lifetime of MappedStructure + +use std::fs::File; +use std::path::Path; +use std::sync::Arc; + +use memmap2::Mmap; +use rkyv::check_archived_root; +use rkyv::Archived; + +use crate::core::{SortId, TupleId}; +use crate::id::{Luid, Slid, NumericId}; +use crate::serialize::{ + StructureData, RelationData, FunctionColumnData, ArchivedFunctionColumnData, +}; + +/// A memory-mapped structure providing zero-copy access to archived data. +/// +/// The structure data is accessed directly from the memory map without +/// deserialization. This is ideal for read-heavy workloads on large structures. +pub struct MappedStructure { + /// The memory map - must outlive all references to archived data + _mmap: Arc, + /// Pointer to the archived structure data (valid for lifetime of mmap) + archived: &'static Archived, +} + +// Safety: The archived data is read-only and the mmap is reference-counted +unsafe impl Send for MappedStructure {} +unsafe impl Sync for MappedStructure {} + +impl MappedStructure { + /// Open a structure file with zero-copy access. + /// + /// The file is memory-mapped and validated. Returns an error if the file + /// cannot be opened or contains invalid data. + pub fn open(path: &Path) -> Result { + let file = File::open(path) + .map_err(|e| format!("Failed to open {}: {}", path.display(), e))?; + + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap {}: {}", path.display(), e))?; + + // Validate and get reference to archived data + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Invalid archive in {}: {:?}", path.display(), e))?; + + // Extend lifetime to 'static - safe because mmap is Arc'd and outlives the reference + let archived: &'static Archived = unsafe { + std::mem::transmute(archived) + }; + + Ok(Self { + _mmap: Arc::new(mmap), + archived, + }) + } + + /// Number of sorts in the structure + #[inline] + pub fn num_sorts(&self) -> usize { + self.archived.num_sorts as usize + } + + /// Number of elements in the structure + #[inline] + pub fn len(&self) -> usize { + self.archived.luids.len() + } + + /// Check if empty + #[inline] + pub fn is_empty(&self) -> bool { + self.archived.luids.is_empty() + } + + /// Number of functions + #[inline] + pub fn num_functions(&self) -> usize { + self.archived.functions.len() + } + + /// Number of relations + #[inline] + pub fn num_relations(&self) -> usize { + self.archived.relations.len() + } + + /// Get the Luid for an element by Slid + #[inline] + pub fn get_luid(&self, slid: Slid) -> Option { + self.archived.luids.get(slid.index()).map(|l| Luid::from_usize(l.rep as usize)) + } + + /// Get the sort for an element by Slid + #[inline] + pub fn get_sort(&self, slid: Slid) -> Option { + self.archived.sorts.get(slid.index()).map(|&s| s as SortId) + } + + /// Iterate over all (slid, luid, sort) triples + pub fn elements(&self) -> impl Iterator + '_ { + self.archived.luids.iter().enumerate().map(|(i, luid)| { + let slid = Slid::from_usize(i); + let luid = Luid::from_usize(luid.rep as usize); + let sort = self.archived.sorts[i] as SortId; + (slid, luid, sort) + }) + } + + /// Get a zero-copy view of a relation + pub fn relation(&self, rel_id: usize) -> Option> { + self.archived.relations.get(rel_id).map(|r| MappedRelation { archived: r }) + } + + /// Iterate over all relations + pub fn relations(&self) -> impl Iterator> + '_ { + self.archived.relations.iter().map(|r| MappedRelation { archived: r }) + } + + /// Get a zero-copy view of a function column + pub fn function(&self, func_id: usize) -> Option> { + self.archived.functions.get(func_id).map(|f| MappedFunction { archived: f }) + } + + /// Get elements of a particular sort (zero-copy iteration) + pub fn elements_of_sort(&self, sort_id: SortId) -> impl Iterator + '_ { + self.archived.sorts.iter().enumerate() + .filter(move |&(_, s)| *s as SortId == sort_id) + .map(|(i, _)| Slid::from_usize(i)) + } +} + +/// A zero-copy view of an archived relation. +pub struct MappedRelation<'a> { + archived: &'a Archived, +} + +impl<'a> MappedRelation<'a> { + /// Relation arity + #[inline] + pub fn arity(&self) -> usize { + self.archived.arity as usize + } + + /// Number of tuples in the relation (including non-live ones) + #[inline] + pub fn tuple_count(&self) -> usize { + self.archived.tuples.len() + } + + /// Number of live tuples (in the extent) + #[inline] + pub fn live_count(&self) -> usize { + self.archived.extent.len() + } + + /// Get a tuple by ID (zero-copy - returns slice into mmap) + pub fn get_tuple(&self, id: TupleId) -> Option + '_> { + self.archived.tuples.get(id).map(|tuple| { + tuple.iter().map(|s| Slid::from_usize(s.rep as usize)) + }) + } + + /// Iterate over live tuple IDs + pub fn live_tuple_ids(&self) -> impl Iterator + '_ { + self.archived.extent.iter().map(|&id| id as TupleId) + } + + /// Iterate over live tuples (zero-copy) + pub fn live_tuples(&self) -> impl Iterator + '_> + '_ { + self.live_tuple_ids().filter_map(|id| self.get_tuple(id)) + } +} + +/// A zero-copy view of an archived function column. +pub struct MappedFunction<'a> { + archived: &'a Archived, +} + +impl<'a> MappedFunction<'a> { + /// Check if this is a local function + pub fn is_local(&self) -> bool { + matches!(self.archived, ArchivedFunctionColumnData::Local(_)) + } + + /// Get function value for a domain element (local functions only) + pub fn get_local(&self, domain_sort_local_id: usize) -> Option { + match self.archived { + ArchivedFunctionColumnData::Local(col) => { + col.get(domain_sort_local_id).and_then(|opt| { + // ArchivedOption - check if Some + match opt { + rkyv::option::ArchivedOption::Some(idx) => { + Some(Slid::from_usize(*idx as usize)) + } + rkyv::option::ArchivedOption::None => None, + } + }) + } + _ => None, + } + } + + /// Iterate over defined local function values: (domain_sort_local_id, codomain_slid) + pub fn iter_local(&self) -> impl Iterator + '_ { + match self.archived { + ArchivedFunctionColumnData::Local(col) => { + itertools::Either::Left(col.iter().enumerate().filter_map(|(i, opt)| { + match opt { + rkyv::option::ArchivedOption::Some(idx) => { + Some((i, Slid::from_usize(*idx as usize))) + } + rkyv::option::ArchivedOption::None => None, + } + })) + } + _ => itertools::Either::Right(std::iter::empty()), + } + } + + /// Iterate over product domain function values: (tuple, result_slid) + pub fn iter_product(&self) -> impl Iterator, Slid)> + '_ { + match self.archived { + ArchivedFunctionColumnData::ProductLocal { entries, .. } => { + itertools::Either::Left(entries.iter().map(|(tuple, result)| { + let tuple: Vec = tuple.iter().map(|&x| x as usize).collect(); + let result = Slid::from_usize(*result as usize); + (tuple, result) + })) + } + _ => itertools::Either::Right(std::iter::empty()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::Structure; + use crate::universe::Universe; + use crate::serialize::save_structure; + use tempfile::tempdir; + + #[test] + fn test_mapped_structure_basic() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + + // Create and save a structure + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts + structure.init_relations(&[1, 2]); // unary and binary relations + + // Add some elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 1); + + // Assert some relation tuples + structure.assert_relation(0, vec![a]); + structure.assert_relation(0, vec![b]); + structure.assert_relation(1, vec![a, c]); + + save_structure(&structure, &path).unwrap(); + + // Open with zero-copy + let mapped = MappedStructure::open(&path).unwrap(); + + assert_eq!(mapped.num_sorts(), 2); + assert_eq!(mapped.len(), 3); + assert_eq!(mapped.num_relations(), 2); + + // Check relation 0 (unary) + let rel0 = mapped.relation(0).unwrap(); + assert_eq!(rel0.arity(), 1); + assert_eq!(rel0.live_count(), 2); + + // Check relation 1 (binary) + let rel1 = mapped.relation(1).unwrap(); + assert_eq!(rel1.arity(), 2); + assert_eq!(rel1.live_count(), 1); + + // Iterate over live tuples + let tuples: Vec> = rel0.live_tuples() + .map(|t| t.collect()) + .collect(); + assert_eq!(tuples.len(), 2); + } + + #[test] + fn test_zero_copy_elements() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + + let mut universe = Universe::new(); + let mut structure = Structure::new(3); + + // Add elements to different sorts + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + structure.add_element(&mut universe, 2); + structure.add_element(&mut universe, 2); + structure.add_element(&mut universe, 2); + + save_structure(&structure, &path).unwrap(); + + let mapped = MappedStructure::open(&path).unwrap(); + + // Count elements per sort + assert_eq!(mapped.elements_of_sort(0).count(), 2); + assert_eq!(mapped.elements_of_sort(1).count(), 1); + assert_eq!(mapped.elements_of_sort(2).count(), 3); + } + + /// Benchmark test comparing zero-copy vs deserialize access patterns. + /// Run with: `cargo test --release benchmark_zerocopy -- --ignored --nocapture` + #[test] + #[ignore] + fn benchmark_zerocopy_vs_deserialize() { + use crate::serialize::load_structure; + use std::time::Instant; + + let dir = tempdir().unwrap(); + let path = dir.path().join("large.structure"); + + // Create a moderately large structure + let num_elements = 100_000; + let num_sorts = 10; + let num_relations = 5; + + eprintln!("Creating structure with {} elements, {} sorts, {} relations...", + num_elements, num_sorts, num_relations); + + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + // Initialize relations with varying arities + let arities: Vec = (0..num_relations).map(|i| (i % 3) + 1).collect(); + structure.init_relations(&arities); + + // Add elements distributed across sorts + let elements: Vec = (0..num_elements) + .map(|i| { + let sort = i % num_sorts; + let (slid, _) = structure.add_element(&mut universe, sort); + slid + }) + .collect(); + + // Add some relation tuples + for (rel_id, &arity) in arities.iter().enumerate().take(num_relations) { + for i in (0..1000).step_by(arity) { + let tuple: Vec = (0..arity) + .map(|j| elements[(i + j) % num_elements]) + .collect(); + structure.assert_relation(rel_id, tuple); + } + } + + save_structure(&structure, &path).unwrap(); + + let file_size = std::fs::metadata(&path).unwrap().len(); + eprintln!("Structure file size: {} bytes ({:.2} KB)", file_size, file_size as f64 / 1024.0); + + // Benchmark: deserialize approach (current) + const ITERATIONS: usize = 100; + + eprintln!("\n--- Deserialize approach ({} iterations) ---", ITERATIONS); + let start = Instant::now(); + for _ in 0..ITERATIONS { + let loaded = load_structure(&path).unwrap(); + // Access pattern: count elements per sort using carrier_size + let _counts: Vec = (0..num_sorts) + .map(|sort| loaded.carrier_size(sort)) + .collect(); + // Also access all elements to exercise deserialization + let _total: usize = loaded.luids.len(); + } + let deserialize_time = start.elapsed(); + eprintln!("Total: {:?}, Per iteration: {:?}", + deserialize_time, deserialize_time / ITERATIONS as u32); + + // Benchmark: zero-copy approach (new) + eprintln!("\n--- Zero-copy approach ({} iterations) ---", ITERATIONS); + let start = Instant::now(); + for _ in 0..ITERATIONS { + let mapped = MappedStructure::open(&path).unwrap(); + // Same access pattern: count elements of each sort + let _counts: Vec = (0..num_sorts) + .map(|sort| mapped.elements_of_sort(sort).count()) + .collect(); + // Also access len + let _total: usize = mapped.len(); + } + let zerocopy_time = start.elapsed(); + eprintln!("Total: {:?}, Per iteration: {:?}", + zerocopy_time, zerocopy_time / ITERATIONS as u32); + + // Compare + let speedup = deserialize_time.as_nanos() as f64 / zerocopy_time.as_nanos() as f64; + eprintln!("\n--- Results ---"); + eprintln!("Zero-copy is {:.2}x faster than deserialize", speedup); + + // The zero-copy approach should be faster for large structures + // (we don't assert this since performance varies by system) + } +} diff --git a/tests/examples_integration.rs b/tests/examples_integration.rs new file mode 100644 index 0000000..d7e40f6 --- /dev/null +++ b/tests/examples_integration.rs @@ -0,0 +1,960 @@ +//! Integration tests for example .geolog files +//! +//! These tests ensure that the example files in `examples/geolog/` remain +//! valid as the language evolves. They serve as living documentation. + +use geolog::repl::ReplState; +use std::fs; +use std::path::Path; + +/// Helper to load and execute a .geolog file, returning the REPL state +fn load_geolog_file(path: &Path) -> Result { + let content = fs::read_to_string(path) + .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?; + + let mut state = ReplState::new(); + + // Use execute_geolog which handles everything correctly + state + .execute_geolog(&content) + .map_err(|e| format!("Error in {}: {}", path.display(), e))?; + + Ok(state) +} + +// ============================================================================ +// Graph examples +// ============================================================================ + +#[test] +fn test_graph_example_parses() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).expect("graph.geolog should parse and elaborate"); + + // Check theory + let graph = state.theories.get("Graph").expect("Graph theory should exist"); + assert_eq!(graph.theory.signature.sorts.len(), 2, "Graph should have 2 sorts (V, E)"); + assert_eq!(graph.theory.signature.functions.len(), 2, "Graph should have 2 functions (src, tgt)"); + + // Check instances + assert!(state.instances.contains_key("Triangle"), "Triangle instance should exist"); + assert!(state.instances.contains_key("Loop"), "Loop instance should exist"); + assert!(state.instances.contains_key("Arrow"), "Arrow instance should exist"); + assert!(state.instances.contains_key("Diamond"), "Diamond instance should exist"); +} + +#[test] +fn test_graph_triangle_structure() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).unwrap(); + + let triangle = state.instances.get("Triangle").unwrap(); + + // Triangle has 3 vertices + 3 edges = 6 elements + assert_eq!(triangle.structure.len(), 6, "Triangle should have 6 elements"); + + // Check carrier sizes: V has 3, E has 3 + assert_eq!(triangle.structure.carrier_size(0), 3, "Triangle should have 3 vertices"); + assert_eq!(triangle.structure.carrier_size(1), 3, "Triangle should have 3 edges"); +} + +#[test] +fn test_graph_diamond_structure() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).unwrap(); + + let diamond = state.instances.get("Diamond").unwrap(); + + // Diamond has 4 vertices + 4 edges = 8 elements + assert_eq!(diamond.structure.len(), 8, "Diamond should have 8 elements"); + assert_eq!(diamond.structure.carrier_size(0), 4, "Diamond should have 4 vertices"); + assert_eq!(diamond.structure.carrier_size(1), 4, "Diamond should have 4 edges"); +} + +// ============================================================================ +// Petri net examples +// ============================================================================ + +#[test] +fn test_petri_net_example_parses() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).expect("petri_net.geolog should parse and elaborate"); + + // Check theory + let petri = state.theories.get("PetriNet").expect("PetriNet theory should exist"); + assert_eq!(petri.theory.signature.sorts.len(), 4, "PetriNet should have 4 sorts (P, T, In, Out)"); + assert_eq!(petri.theory.signature.functions.len(), 4, "PetriNet should have 4 functions"); + + // Check instances + assert!(state.instances.contains_key("ProducerConsumer")); + assert!(state.instances.contains_key("MutualExclusion")); +} + +// ============================================================================ +// Petri Net Showcase - Full Type-Theoretic Encoding (from 2025-12-12 vision) +// ============================================================================ + +#[test] +fn test_petri_net_showcase_loads() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).expect("petri_net_showcase.geolog should parse and elaborate"); + + // Check theories + assert!(state.theories.contains_key("PetriNet"), "PetriNet theory should exist"); + assert!(state.theories.contains_key("Marking"), "Marking theory should exist"); + assert!(state.theories.contains_key("ReachabilityProblem"), "ReachabilityProblem theory should exist"); + assert!(state.theories.contains_key("Trace"), "Trace theory should exist"); + assert!(state.theories.contains_key("Iso"), "Iso theory should exist"); + assert!(state.theories.contains_key("Solution"), "Solution theory should exist"); + + // Check PetriNet theory structure + let petri = state.theories.get("PetriNet").unwrap(); + assert_eq!(petri.theory.signature.sorts.len(), 4, "PetriNet should have 4 sorts"); + assert_eq!(petri.theory.signature.functions.len(), 4, "PetriNet should have 4 functions"); + + // Check parameterized theories have correct parameter structure + let marking = state.theories.get("Marking").unwrap(); + assert_eq!(marking.params.len(), 1, "Marking should have 1 parameter (N : PetriNet instance)"); + assert_eq!(marking.params[0].name, "N"); + + let reach_prob = state.theories.get("ReachabilityProblem").unwrap(); + assert_eq!(reach_prob.params.len(), 1, "ReachabilityProblem should have 1 parameter"); + + let trace = state.theories.get("Trace").unwrap(); + assert_eq!(trace.params.len(), 1, "Trace should have 1 parameter"); + + let iso = state.theories.get("Iso").unwrap(); + assert_eq!(iso.params.len(), 2, "Iso should have 2 parameters (X : Sort, Y : Sort)"); + + let solution = state.theories.get("Solution").unwrap(); + assert_eq!(solution.params.len(), 2, "Solution should have 2 parameters (N, RP)"); + + // Check instances + assert!(state.instances.contains_key("ExampleNet"), "ExampleNet instance should exist"); + assert!(state.instances.contains_key("problem0"), "problem0 instance should exist"); + assert!(state.instances.contains_key("solution0"), "solution0 instance should exist"); + assert!(state.instances.contains_key("problem2"), "problem2 instance should exist"); + assert!(state.instances.contains_key("solution2"), "solution2 instance should exist"); +} + +#[test] +fn test_petri_net_showcase_example_net_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + let example_net = state.instances.get("ExampleNet").unwrap(); + + // ExampleNet has: + // - 3 places (A, B, C) + // - 3 transitions (ab, ba, abc) + // - 4 input arcs (ab_in, ba_in, abc_in1, abc_in2) + // - 3 output arcs (ab_out, ba_out, abc_out) + // Total: 3 + 3 + 4 + 3 = 13 elements + assert_eq!(example_net.structure.len(), 13, "ExampleNet should have 13 elements"); + + // Check carrier sizes by sort index (P=0, T=1, in=2, out=3) + assert_eq!(example_net.structure.carrier_size(0), 3, "ExampleNet should have 3 places"); + assert_eq!(example_net.structure.carrier_size(1), 3, "ExampleNet should have 3 transitions"); + assert_eq!(example_net.structure.carrier_size(2), 4, "ExampleNet should have 4 input arcs"); + assert_eq!(example_net.structure.carrier_size(3), 3, "ExampleNet should have 3 output arcs"); +} + +#[test] +fn test_petri_net_showcase_problem0_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // problem0: 1 token in A -> 1 token in B + let problem0 = state.instances.get("problem0").unwrap(); + + // ReachabilityProblem structure includes the nested Marking instances + // This test verifies the cross-references work correctly + assert!(!problem0.structure.is_empty(), "problem0 should have elements"); +} + +#[test] +fn test_petri_net_showcase_solution0_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // solution0 proves A -> B reachability by firing 'ab' once + let solution0 = state.instances.get("solution0").unwrap(); + + // Solution structure includes nested Trace and Iso instances + // The trace has: 1 firing, 1 input terminal, 1 output terminal + // Plus the isomorphism mappings + assert!(!solution0.structure.is_empty(), "solution0 should have elements"); +} + +#[test] +fn test_petri_net_showcase_solution2_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // solution2 proves 2A -> C reachability by firing 'ab' then 'abc' + // This is the complex case: requires firing ab to move one A-token to B, + // then abc consumes both an A-token and the new B-token to produce C + let solution2 = state.instances.get("solution2").unwrap(); + + assert!(!solution2.structure.is_empty(), "solution2 should have elements"); +} + +/// NEGATIVE TEST: Verify that an incomplete solution correctly fails axiom checking. +/// +/// This test ensures that the Trace theory's wire axioms properly catch +/// solutions that are missing required wires between firings. +#[test] +fn test_solution2_incomplete_fails_axiom_check() { + let path = Path::new("tests/negative/solution2_incomplete_negative_test.geolog"); + let result = load_geolog_file(path); + + // This file should FAIL to load because it's missing a wire + // connecting f1's output to f2's input + match result { + Ok(_) => panic!("Incomplete solution2 should fail axiom checking, but it succeeded"), + Err(err) => { + assert!( + err.contains("axiom") && err.contains("violated"), + "Error should mention axiom violation, got: {}", + err + ); + } + } +} + +#[test] +fn test_petri_net_producer_consumer() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).unwrap(); + + let pc = state.instances.get("ProducerConsumer").unwrap(); + + // ProducerConsumer: 3 places + 2 transitions + 2 input arcs + 2 output arcs = 9 + assert_eq!(pc.structure.len(), 9, "ProducerConsumer should have 9 elements"); +} + +#[test] +fn test_petri_net_mutual_exclusion() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).unwrap(); + + let mutex = state.instances.get("MutualExclusion").unwrap(); + + // MutualExclusion: 5 places + 4 transitions + 6 input arcs + 6 output arcs = 21 + assert_eq!(mutex.structure.len(), 21, "MutualExclusion should have 21 elements"); +} + +// ============================================================================ +// Monoid example (with product domain function support) +// ============================================================================ + +#[test] +fn test_monoid_example_parses() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).expect("monoid.geolog should parse and elaborate"); + + // Check theory + let monoid = state.theories.get("Monoid").expect("Monoid theory should exist"); + assert_eq!(monoid.theory.signature.sorts.len(), 1, "Monoid should have 1 sort (M)"); + assert_eq!(monoid.theory.signature.functions.len(), 2, "Monoid should have 2 functions (mul, id)"); + assert_eq!(monoid.theory.axioms.len(), 4, "Monoid should have 4 axioms"); + + // Check instances (product domain support via geolog-ulh) + assert!(state.instances.contains_key("Trivial"), "Trivial monoid should exist"); + assert!(state.instances.contains_key("BoolAnd"), "BoolAnd monoid should exist"); + assert!(state.instances.contains_key("BoolOr"), "BoolOr monoid should exist"); +} + +#[test] +fn test_monoid_trivial_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let trivial = state.instances.get("Trivial").unwrap(); + + // Trivial monoid has 1 element + assert_eq!(trivial.structure.carrier_size(0), 1, "Trivial monoid should have 1 element"); + + // Check id function (base domain: M -> M) + // id: e -> e + assert!(trivial.structure.functions[1].as_local().is_some(), "id should be a local function"); + let id_col = trivial.structure.functions[1].as_local().unwrap(); + assert_eq!(id_col.len(), 1, "id should have 1 entry"); + assert!(id_col[0].is_some(), "id(e) should be defined"); + + // Check mul function (product domain: M × M -> M) + // mul: (e,e) -> e + if let geolog::core::FunctionColumn::ProductLocal { storage, field_sorts } = &trivial.structure.functions[0] { + assert_eq!(field_sorts.len(), 2, "mul should have 2-element domain"); + assert_eq!(storage.defined_count(), 1, "mul should have 1 entry defined"); + } else { + panic!("mul should be a ProductLocal function"); + } +} + +#[test] +fn test_monoid_bool_and_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let bool_and = state.instances.get("BoolAnd").unwrap(); + + // BoolAnd has 2 elements (T, F) + assert_eq!(bool_and.structure.carrier_size(0), 2, "BoolAnd should have 2 elements"); + + // Check mul function (product domain): all 4 entries should be defined + if let geolog::core::FunctionColumn::ProductLocal { storage, .. } = &bool_and.structure.functions[0] { + assert_eq!(storage.defined_count(), 4, "mul should have all 4 entries defined (2×2)"); + + // Verify it's total + assert!(storage.is_total(&[2, 2]), "mul should be total on 2×2 domain"); + } else { + panic!("mul should be a ProductLocal function"); + } + + // Check id function (base domain): both entries defined + let id_col = bool_and.structure.functions[1].as_local().unwrap(); + assert_eq!(id_col.len(), 2, "id should have 2 entries"); + assert!(id_col.iter().all(|opt| opt.is_some()), "id should be total"); +} + +#[test] +fn test_monoid_bool_or_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let bool_or = state.instances.get("BoolOr").unwrap(); + + // BoolOr has 2 elements (T, F) + assert_eq!(bool_or.structure.carrier_size(0), 2, "BoolOr should have 2 elements"); + + // Check mul function is total + if let geolog::core::FunctionColumn::ProductLocal { storage, .. } = &bool_or.structure.functions[0] { + assert!(storage.is_total(&[2, 2]), "mul should be total on 2×2 domain"); + } else { + panic!("mul should be a ProductLocal function"); + } +} + +// ============================================================================ +// Preorder example +// ============================================================================ + +#[test] +fn test_preorder_example_parses() { + let path = Path::new("examples/geolog/preorder.geolog"); + let state = load_geolog_file(path).expect("preorder.geolog should parse and elaborate"); + + // Check theory + let preorder = state.theories.get("Preorder").expect("Preorder theory should exist"); + assert_eq!(preorder.theory.signature.sorts.len(), 1, "Preorder should have 1 sort (X)"); + assert_eq!(preorder.theory.signature.relations.len(), 1, "Preorder should have 1 relation (leq)"); + assert_eq!(preorder.theory.axioms.len(), 2, "Preorder should have 2 axioms (refl, trans)"); + + // Check instances + assert!(state.instances.contains_key("Discrete3")); + assert!(state.instances.contains_key("Chain3")); +} + +// ============================================================================ +// Transitive closure example (demonstrates chase algorithm) +// ============================================================================ + +#[test] +fn test_transitive_closure_example_parses() { + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let state = load_geolog_file(path).expect("transitive_closure.geolog should parse and elaborate"); + + // Check theory + let graph = state.theories.get("Graph").expect("Graph theory should exist"); + assert_eq!(graph.theory.signature.sorts.len(), 1, "Graph should have 1 sort (V)"); + assert_eq!(graph.theory.signature.relations.len(), 2, "Graph should have 2 relations (Edge, Path)"); + assert_eq!(graph.theory.axioms.len(), 2, "Graph should have 2 axioms (base, trans)"); + + // Check instances + assert!(state.instances.contains_key("Chain"), "Chain instance should exist"); + assert!(state.instances.contains_key("Diamond"), "Diamond instance should exist"); + assert!(state.instances.contains_key("Cycle"), "Cycle instance should exist"); +} + +#[test] +fn test_transitive_closure_chain_structure() { + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let state = load_geolog_file(path).unwrap(); + + let chain = state.instances.get("Chain").unwrap(); + + // Chain has 4 vertices + assert_eq!(chain.structure.carrier_size(0), 4, "Chain should have 4 vertices"); + + // With `= chase { ... }`, axioms are applied during elaboration. + // Path now has 6 tuples (transitive closure computed automatically). + use geolog::core::RelationStorage; + assert_eq!(chain.structure.relations[0].len(), 3, "Chain should have 3 Edge tuples"); + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should have 6 Path tuples after chase: 3 base + 2 one-step + 1 two-step"); +} + +#[test] +fn test_transitive_closure_chase() { + use geolog::core::RelationStorage; + use geolog::query::chase::chase_fixpoint; + use geolog::universe::Universe; + + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let mut state = load_geolog_file(path).unwrap(); + + let chain = state.instances.get_mut("Chain").unwrap(); + let theory = state.theories.get("Graph").unwrap(); + + // Chase already ran during elaboration (instance uses `= chase { ... }`), + // so Path already has 6 tuples. + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should have 6 Path tuples after elaboration with chase"); + + // Running chase again should be idempotent (1 iteration, no changes) + let mut universe = Universe::new(); + + let iterations = chase_fixpoint( + &theory.theory.axioms, + &mut chain.structure, + &mut universe, + &theory.theory.signature, + 100, + ).unwrap(); + + // Should converge immediately (already at fixpoint) + assert_eq!(iterations, 1, "Chase should converge in 1 iteration when already at fixpoint"); + + // Still have 6 Path tuples + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should still have 6 Path tuples"); +} + +// ============================================================================ +// Theories: GeologMeta and RelAlgIR +// ============================================================================ + +#[test] +fn test_geolog_meta_loads() { + let path = Path::new("theories/GeologMeta.geolog"); + let state = load_geolog_file(path).expect("GeologMeta.geolog should parse and elaborate"); + + let meta = state.theories.get("GeologMeta").expect("GeologMeta theory should exist"); + + // GeologMeta is a large theory: 41 sorts, 78 functions, 3 relations, 16 axioms + assert_eq!(meta.theory.signature.sorts.len(), 41, "GeologMeta should have 41 sorts"); + assert_eq!(meta.theory.signature.functions.len(), 78, "GeologMeta should have 78 functions"); + assert_eq!(meta.theory.signature.relations.len(), 3, "GeologMeta should have 3 relations"); + assert_eq!(meta.theory.axioms.len(), 16, "GeologMeta should have 16 axioms"); + + // Check some key sorts exist + assert!(meta.theory.signature.lookup_sort("Theory").is_some(), "Theory sort should exist"); + assert!(meta.theory.signature.lookup_sort("Srt").is_some(), "Srt sort should exist"); + assert!(meta.theory.signature.lookup_sort("Func").is_some(), "Func sort should exist"); + assert!(meta.theory.signature.lookup_sort("Elem").is_some(), "Elem sort should exist"); +} + +#[test] +fn test_relalg_ir_loads() { + // First load GeologMeta (RelAlgIR extends it) + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + + state.execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state.execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + let ir = state.theories.get("RelAlgIR").expect("RelAlgIR theory should exist"); + + // RelAlgIR has 80 sorts (41 from GeologMeta + 39 own) + assert_eq!(ir.theory.signature.sorts.len(), 80, "RelAlgIR should have 80 sorts"); + + // Check GeologMeta sorts are correctly qualified + assert!(ir.theory.signature.lookup_sort("GeologMeta/Srt").is_some(), + "GeologMeta/Srt should exist (inherited sort)"); + assert!(ir.theory.signature.lookup_sort("GeologMeta/Func").is_some(), + "GeologMeta/Func should exist (inherited sort)"); + + // Check RelAlgIR's own sorts exist (no prefix) + assert!(ir.theory.signature.lookup_sort("Wire").is_some(), + "Wire sort should exist"); + assert!(ir.theory.signature.lookup_sort("Op").is_some(), + "Op sort should exist"); + assert!(ir.theory.signature.lookup_sort("ScanOp").is_some(), + "ScanOp sort should exist"); + + // Check functions are correctly qualified + // GeologMeta's "Func/dom" should become "GeologMeta/Func/dom" + assert!(ir.theory.signature.lookup_func("GeologMeta/Func/dom").is_some(), + "GeologMeta/Func/dom should exist (inherited function)"); + assert!(ir.theory.signature.lookup_func("GeologMeta/Func/cod").is_some(), + "GeologMeta/Func/cod should exist (inherited function)"); + + // RelAlgIR's own functions + assert!(ir.theory.signature.lookup_func("Wire/schema").is_some(), + "Wire/schema should exist"); + assert!(ir.theory.signature.lookup_func("ScanOp/out").is_some(), + "ScanOp/out should exist"); + + // Check functions referencing inherited sorts have correct domain/codomain + // ScanOp/srt : ScanOp -> GeologMeta/Srt + let scan_srt = ir.theory.signature.lookup_func("ScanOp/srt") + .expect("ScanOp/srt should exist"); + let func_info = &ir.theory.signature.functions[scan_srt]; + match &func_info.codomain { + geolog::core::DerivedSort::Base(sort_id) => { + let sort_name = &ir.theory.signature.sorts[*sort_id]; + assert_eq!(sort_name, "GeologMeta/Srt", + "ScanOp/srt codomain should be GeologMeta/Srt"); + } + _ => panic!("ScanOp/srt codomain should be a base sort"), + } +} + +// ============================================================================ +// RelAlgIR query plan examples +// ============================================================================ + +/// Test that RelAlgIR instances can be created and represent query plans +/// +/// These instances use `= chase { ... }` to derive relations from axioms. +/// The chase handles function applications in premises (e.g., `s ScanOp/out = w`) +/// and universal conclusions (e.g., `forall x. |- R(x,x)`). +#[test] +fn test_relalg_simple_examples() { + // Load theories first + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + let examples_content = fs::read_to_string("examples/geolog/relalg_simple.geolog") + .expect("Failed to read relalg_simple.geolog"); + + let mut state = ReplState::new(); + + state.execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state.execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + state.execute_geolog(&examples_content) + .expect("relalg_simple.geolog should load"); + + // Check ScanV instance + let scan_v = state.instances.get("ScanV") + .expect("ScanV instance should exist"); + assert_eq!(scan_v.structure.len(), 7, "ScanV should have 7 elements"); + + // Check FilterScan instance + let filter_scan = state.instances.get("FilterScan") + .expect("FilterScan instance should exist"); + assert_eq!(filter_scan.structure.len(), 18, "FilterScan should have 18 elements"); + + // Verify FilterScan has the expected sorts populated + // Get RelAlgIR theory for sort lookups + let ir = state.theories.get("RelAlgIR").expect("RelAlgIR should exist"); + + // Check Wire sort has 2 elements (w1, w2) + let wire_sort = ir.theory.signature.lookup_sort("Wire").expect("Wire sort"); + assert_eq!( + filter_scan.structure.carrier_size(wire_sort), 2, + "FilterScan should have 2 Wire elements" + ); + + // Check FilterOp sort has 1 element + let filter_sort = ir.theory.signature.lookup_sort("FilterOp").expect("FilterOp sort"); + assert_eq!( + filter_scan.structure.carrier_size(filter_sort), 1, + "FilterScan should have 1 FilterOp element" + ); + + // Check ScanOp sort has 1 element + let scan_sort = ir.theory.signature.lookup_sort("ScanOp").expect("ScanOp sort"); + assert_eq!( + filter_scan.structure.carrier_size(scan_sort), 1, + "FilterScan should have 1 ScanOp element" + ); +} + +// ============================================================================ +// RelAlgIR compile → execute roundtrip +// ============================================================================ + +/// Helper to load RelAlgIR theory for tests +fn load_relalg_for_test() -> (ReplState, std::rc::Rc) { + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + let relalg_theory = state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone(); + + (state, relalg_theory) +} + +/// Helper to verify roundtrip: direct execution == RelAlgIR execution +fn verify_roundtrip( + plan: &geolog::query::backend::QueryOp, + target: &geolog::core::Structure, + relalg_theory: &geolog::core::ElaboratedTheory, + description: &str, +) { + use geolog::query::backend::execute; + use geolog::query::from_relalg::execute_relalg; + use geolog::query::to_relalg::compile_to_relalg; + use geolog::universe::Universe; + + // Execute directly + let direct_result = execute(plan, target); + + // Compile to RelAlgIR + let mut universe = Universe::new(); + let relalg_instance = compile_to_relalg(plan, &std::rc::Rc::new(relalg_theory.clone()), &mut universe) + .unwrap_or_else(|e| panic!("{}: Compilation failed: {}", description, e)); + + // Execute via RelAlgIR interpreter + let relalg_result = execute_relalg(&relalg_instance, relalg_theory, target, None) + .unwrap_or_else(|e| panic!("{}: RelAlgIR execution failed: {}", description, e)); + + // Compare results + assert_eq!( + direct_result.len(), + relalg_result.len(), + "{}: Length mismatch ({} vs {})", + description, + direct_result.len(), + relalg_result.len() + ); + + for (tuple, mult) in direct_result.iter() { + assert_eq!( + relalg_result.tuples.get(tuple), + Some(mult), + "{}: Tuple {:?} has wrong multiplicity", + description, + tuple + ); + } +} + +/// Tests that we can compile a query to RelAlgIR and then execute it, +/// getting the same results as direct execution. +#[test] +fn test_relalg_compile_execute_roundtrip() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create a simple test structure with 3 elements in sort 0 + let mut target = Structure::new(1); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[0].insert(2); + + // Test Scan + let scan_plan = QueryOp::Scan { sort_idx: 0 }; + verify_roundtrip(&scan_plan, &target, &relalg_theory, "Scan"); +} + +#[test] +fn test_relalg_roundtrip_filter() { + use geolog::core::Structure; + use geolog::id::{NumericId, Slid}; + use geolog::query::backend::{Predicate, QueryOp}; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure with 5 elements + let mut target = Structure::new(1); + for i in 0..5 { + target.carriers[0].insert(i); + } + + // Filter with True predicate (should keep all) + let filter_true = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::True, + }; + verify_roundtrip(&filter_true, &target, &relalg_theory, "Filter(True)"); + + // Filter with False predicate (should keep none) + let filter_false = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::False, + }; + verify_roundtrip(&filter_false, &target, &relalg_theory, "Filter(False)"); + + // Filter with ColEqConst + let filter_const = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(2), + }, + }; + verify_roundtrip(&filter_const, &target, &relalg_theory, "Filter(ColEqConst)"); +} + +#[test] +fn test_relalg_roundtrip_join() { + use geolog::core::Structure; + use geolog::query::backend::{JoinCond, QueryOp}; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure with 2 sorts + let mut target = Structure::new(2); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[1].insert(10); + target.carriers[1].insert(11); + target.carriers[1].insert(12); + + // Cross join + let cross_join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + verify_roundtrip(&cross_join, &target, &relalg_theory, "Join(Cross)"); +} + +#[test] +fn test_relalg_roundtrip_union() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure + let mut target = Structure::new(2); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[1].insert(2); + target.carriers[1].insert(3); + + // Union of two scans + let union_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + }; + verify_roundtrip(&union_plan, &target, &relalg_theory, "Union"); +} + +#[test] +fn test_relalg_roundtrip_distinct_negate() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure + let mut target = Structure::new(1); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[0].insert(2); + + // Distinct + let distinct_plan = QueryOp::Distinct { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + verify_roundtrip(&distinct_plan, &target, &relalg_theory, "Distinct"); + + // Negate + let negate_plan = QueryOp::Negate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + verify_roundtrip(&negate_plan, &target, &relalg_theory, "Negate"); +} + +// ============================================================================ +// Meta-test: all examples should parse +// ============================================================================ + +/// Tests that all standalone .geolog example files parse and elaborate. +/// +/// Note: Some examples require loading theories first (e.g., relalg_simple.geolog +/// requires GeologMeta and RelAlgIR). These are tested separately. +#[test] +fn test_all_examples_parse() { + let examples_dir = Path::new("examples/geolog"); + + if !examples_dir.exists() { + panic!("examples/geolog directory does not exist"); + } + + // Examples that require loading theories first (tested separately) + let requires_theories = [ + "relalg_simple.geolog", + ]; + + let mut failures = Vec::new(); + + for entry in fs::read_dir(examples_dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + + // Skip files that require loading theories first + if path.file_name() + .and_then(|f| f.to_str()) + .is_some_and(|file_name| requires_theories.contains(&file_name)) + { + continue; + } + + if path.extension().is_some_and(|ext| ext == "geolog") + && let Err(e) = load_geolog_file(&path) { + failures.push(format!("{}: {}", path.display(), e)); + } + } + + if !failures.is_empty() { + panic!( + "The following example files failed to parse/elaborate:\n{}", + failures.join("\n") + ); + } +} + +// ============================================================================ +// Multi-session persistence tests +// ============================================================================ + +/// Tests that theories and instances survive REPL restarts +/// +/// This is a critical test for persistence: create data in one "session", +/// then verify it's still accessible after creating a new ReplState with +/// the same persistence path. +#[test] +fn test_persistence_survives_restart() { + use tempfile::tempdir; + + let dir = tempdir().expect("Failed to create temp directory"); + let db_path = dir.path().join("test.geolog"); + + // Session 1: Create a theory and instance + { + let mut state = ReplState::with_path(&db_path); + + let theory_def = r#" + theory Counter { + C : Sort; + next : C -> C; + } + "#; + state.execute_geolog(theory_def).expect("Theory should define"); + + let instance_def = r#" + instance Mod3 : Counter = { + zero : C; + one : C; + two : C; + zero next = one; + one next = two; + two next = zero; + } + "#; + state.execute_geolog(instance_def).expect("Instance should define"); + + // Verify it's in the current session + assert!(state.theories.contains_key("Counter"), "Counter theory should exist in session 1"); + assert!(state.instances.contains_key("Mod3"), "Mod3 instance should exist in session 1"); + + // Explicitly save before dropping + state.store.save().expect("Save should succeed"); + } + + // Session 2: Load from same path and verify data persists + { + let state = ReplState::with_path(&db_path); + + // Theory should be reconstructed + assert!( + state.theories.contains_key("Counter"), + "Counter theory should persist across sessions" + ); + + // Instance should be reconstructed + assert!( + state.instances.contains_key("Mod3"), + "Mod3 instance should persist across sessions" + ); + + // Verify instance structure has correct element count + let mod3 = state.instances.get("Mod3").expect("Mod3 should exist"); + assert_eq!( + mod3.structure.len(), + 3, + "Mod3 should have 3 elements after reload" + ); + } +} + +/// Tests that chase-derived data persists correctly +#[test] +fn test_persistence_with_chase() { + use tempfile::tempdir; + + let dir = tempdir().expect("Failed to create temp directory"); + let db_path = dir.path().join("chase_test.geolog"); + + // Session 1: Create a theory with chase + { + let mut state = ReplState::with_path(&db_path); + + let content = r#" + theory Preorder { + X : Sort; + leq : [lo: X, hi: X] -> Prop; + ax/refl : forall x : X. |- [lo: x, hi: x] leq; + } + + instance Three : Preorder = chase { + a : X; + b : X; + c : X; + } + "#; + state.execute_geolog(content).expect("Should define theory and chase instance"); + + // Verify chase added diagonal tuples + let three = state.instances.get("Three").expect("Three should exist"); + assert_eq!(three.structure.relations[0].tuples.len(), 3, "Should have 3 reflexive tuples"); + + // Explicitly save before dropping + state.store.save().expect("Save should succeed"); + } + + // Session 2: Verify chase results persist + { + let state = ReplState::with_path(&db_path); + + assert!(state.theories.contains_key("Preorder"), "Theory should persist"); + assert!(state.instances.contains_key("Three"), "Instance should persist"); + + let three = state.instances.get("Three").expect("Three should exist"); + assert_eq!( + three.structure.relations[0].tuples.len(), + 3, + "Chase-derived tuples should persist" + ); + } +} diff --git a/tests/generators.rs b/tests/generators.rs new file mode 100644 index 0000000..82b0c38 --- /dev/null +++ b/tests/generators.rs @@ -0,0 +1,375 @@ +//! Proptest generators for geolog data structures +//! +//! Provides `Strategy` implementations for generating valid instances +//! of core data types used in property tests. + +#![allow(dead_code)] + +use geolog::core::{SortId, Structure}; +use geolog::id::{NumericId, Slid, Uuid}; +use geolog::naming::NamingIndex; +use geolog::universe::Universe; +use proptest::collection::vec; +use proptest::prelude::*; +use std::collections::HashSet; + +// ============================================================================ +// UUID Generation +// ============================================================================ + +/// Generate arbitrary UUIDs (using v7 format) +pub fn arb_uuid() -> impl Strategy { + // Generate random bytes for the UUID + prop::array::uniform16(any::()).prop_map(|bytes| { + // Create a valid v7-ish UUID from random bytes + Uuid::from_bytes(bytes) + }) +} + +/// Generate a vector of unique UUIDs +pub fn arb_unique_uuids(count: usize) -> impl Strategy> { + vec(arb_uuid(), count..=count).prop_filter_map("unique uuids", |uuids| { + let set: HashSet<_> = uuids.iter().collect(); + if set.len() == uuids.len() { + Some(uuids) + } else { + None + } + }) +} + +// ============================================================================ +// Name Generation +// ============================================================================ + +/// Generate a valid identifier (alphanumeric, starting with letter) +pub fn arb_identifier() -> impl Strategy { + "[a-zA-Z][a-zA-Z0-9_]{0,15}".prop_map(String::from) +} + +/// Generate a qualified name path (non-empty vector of identifiers) +pub fn arb_qualified_name() -> impl Strategy> { + vec(arb_identifier(), 1..=3) +} + +// ============================================================================ +// Structure Generation +// ============================================================================ + +/// Parameters for structure generation +#[derive(Debug, Clone)] +pub struct StructureParams { + pub num_sorts: usize, + pub max_elements_per_sort: usize, +} + +impl Default for StructureParams { + fn default() -> Self { + Self { + num_sorts: 3, + max_elements_per_sort: 5, + } + } +} + +/// Generate a valid Structure with elements distributed across sorts +pub fn arb_structure(params: StructureParams) -> impl Strategy { + // Generate element counts for each sort + vec(0..=params.max_elements_per_sort, params.num_sorts) + .prop_flat_map(move |element_counts| { + let num_sorts = params.num_sorts; + Just((element_counts, num_sorts)) + }) + .prop_map(|(element_counts, num_sorts)| { + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + for (sort_id, &count) in element_counts.iter().enumerate() { + for _ in 0..count { + structure.add_element(&mut universe, sort_id as SortId); + } + } + + (structure, universe) + }) +} + +/// Generate a structure with specific element count +pub fn arb_structure_with_elements( + num_sorts: usize, + total_elements: usize, +) -> impl Strategy { + // Distribute elements randomly across sorts + vec(0..num_sorts, total_elements).prop_map(move |sort_assignments| { + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + for sort_id in sort_assignments { + structure.add_element(&mut universe, sort_id as SortId); + } + + (structure, universe) + }) +} + +// ============================================================================ +// NamingIndex Generation +// ============================================================================ + +/// Generate a NamingIndex with random entries +pub fn arb_naming_index(max_entries: usize) -> impl Strategy { + vec((arb_uuid(), arb_qualified_name()), 0..=max_entries).prop_filter_map( + "unique uuids in naming", + |entries| { + // Ensure UUIDs are unique + let uuids: HashSet<_> = entries.iter().map(|(u, _)| u).collect(); + if uuids.len() == entries.len() { + let mut index = NamingIndex::new(); + for (uuid, name) in entries { + index.insert(uuid, name); + } + Some(index) + } else { + None + } + }, + ) +} + +/// Generate a NamingIndex that matches a Universe (same UUIDs) +pub fn arb_naming_for_universe(universe: &Universe) -> impl Strategy { + let uuids: Vec = universe.iter().map(|(_, uuid)| uuid).collect(); + let count = uuids.len(); + + vec(arb_qualified_name(), count).prop_map(move |names| { + let mut index = NamingIndex::new(); + for (uuid, name) in uuids.iter().zip(names.into_iter()) { + index.insert(*uuid, name); + } + index + }) +} + +// ============================================================================ +// Element Operations (for testing add/remove sequences) +// ============================================================================ + +/// An operation on a structure +#[derive(Debug, Clone)] +pub enum StructureOp { + AddElement { sort_id: SortId }, +} + +/// Generate a sequence of structure operations +pub fn arb_structure_ops( + num_sorts: usize, + max_ops: usize, +) -> impl Strategy> { + vec( + (0..num_sorts).prop_map(|sort_id| StructureOp::AddElement { sort_id }), + 0..=max_ops, + ) +} + +// ============================================================================ +// Test Helpers +// ============================================================================ + +/// Check that a Structure maintains its internal invariants +pub fn check_structure_invariants(structure: &Structure) -> Result<(), String> { + // Invariant 1: luids and sorts have same length + if structure.luids.len() != structure.sorts.len() { + return Err(format!( + "luids.len({}) != sorts.len({})", + structure.luids.len(), + structure.sorts.len() + )); + } + + // Invariant 2: luid_to_slid is inverse of luids + for (slid_idx, &luid) in structure.luids.iter().enumerate() { + let slid = Slid::from_usize(slid_idx); + match structure.luid_to_slid.get(&luid) { + Some(&mapped_slid) if mapped_slid == slid => {} + Some(&mapped_slid) => { + return Err(format!( + "luid_to_slid[{}] = {}, but luids[{}] = {}", + luid, mapped_slid, slid, luid + )); + } + None => { + return Err(format!( + "luid {} at slid {} not in luid_to_slid", + luid, slid + )); + } + } + } + + // Invariant 3: Each element appears in exactly one carrier, matching its sort + for (slid, &sort_id) in structure.sorts.iter().enumerate() { + if sort_id >= structure.carriers.len() { + return Err(format!( + "sort_id {} at slid {} >= carriers.len({})", + sort_id, + slid, + structure.carriers.len() + )); + } + + if !structure.carriers[sort_id].contains(slid as u64) { + return Err(format!( + "slid {} with sort {} not in carriers[{}]", + slid, sort_id, sort_id + )); + } + + // Check it's not in any other carrier + for (other_sort, carrier) in structure.carriers.iter().enumerate() { + if other_sort != sort_id && carrier.contains(slid as u64) { + return Err(format!( + "slid {} appears in carrier {} but has sort {}", + slid, other_sort, sort_id + )); + } + } + } + + // Invariant 4: Total carrier size equals number of elements + let total_carrier_size: usize = structure.carriers.iter().map(|c| c.len() as usize).sum(); + if total_carrier_size != structure.luids.len() { + return Err(format!( + "total carrier size {} != luids.len({})", + total_carrier_size, + structure.luids.len() + )); + } + + Ok(()) +} + +/// Check that two structures are equivalent (same elements and functions) +pub fn structures_equivalent(s1: &Structure, s2: &Structure, u1: &Universe, u2: &Universe) -> bool { + // Same number of sorts + if s1.num_sorts() != s2.num_sorts() { + return false; + } + + // Same number of elements + if s1.len() != s2.len() { + return false; + } + + // Same UUIDs (via Luid lookup) + let uuids1: HashSet<_> = s1.luids.iter().filter_map(|&luid| u1.get(luid)).collect(); + let uuids2: HashSet<_> = s2.luids.iter().filter_map(|&luid| u2.get(luid)).collect(); + + uuids1 == uuids2 +} + +// ============================================================================ +// Tensor Generation +// ============================================================================ + +use geolog::tensor::SparseTensor; +use std::collections::BTreeSet; + +/// Parameters for sparse tensor generation +#[derive(Debug, Clone)] +pub struct TensorParams { + pub max_dims: usize, + pub max_dim_size: usize, + pub max_tuples: usize, +} + +impl Default for TensorParams { + fn default() -> Self { + Self { + max_dims: 4, + max_dim_size: 10, + max_tuples: 20, + } + } +} + +/// Generate a random sparse tensor +pub fn arb_sparse_tensor(params: TensorParams) -> impl Strategy { + // First generate dimensions + vec(1..=params.max_dim_size, 0..=params.max_dims).prop_flat_map(move |dims| { + let dims_clone = dims.clone(); + let max_tuples = params.max_tuples; + + // Generate tuples within the dimension bounds + if dims.is_empty() { + // Scalar tensor - either true or false + any::() + .prop_map(|value| { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + SparseTensor { dims: vec![], extent } + }) + .boxed() + } else { + // Generate random tuples + let tuple_gen = dims + .iter() + .map(|&d| 0..d) + .collect::>(); + + vec(tuple_gen.prop_map(|indices| indices), 0..=max_tuples) + .prop_map(move |tuples| { + let extent: BTreeSet> = tuples.into_iter().collect(); + SparseTensor { + dims: dims_clone.clone(), + extent, + } + }) + .boxed() + } + }) +} + +/// Generate a sparse tensor with specific dimensions +pub fn arb_sparse_tensor_with_dims(dims: Vec, max_tuples: usize) -> impl Strategy { + if dims.is_empty() { + any::() + .prop_map(|value| { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + SparseTensor { dims: vec![], extent } + }) + .boxed() + } else { + let tuple_gen: Vec<_> = dims.iter().map(|&d| 0..d).collect(); + let dims_clone = dims.clone(); + + vec(tuple_gen.prop_map(|indices| indices), 0..=max_tuples) + .prop_map(move |tuples| { + let extent: BTreeSet> = tuples.into_iter().collect(); + SparseTensor { + dims: dims_clone.clone(), + extent, + } + }) + .boxed() + } +} + +/// Generate a pair of tensors with matching dimensions (for disjunction tests) +pub fn arb_tensor_pair_same_dims(params: TensorParams) -> impl Strategy { + vec(1..=params.max_dim_size, 0..=params.max_dims).prop_flat_map(move |dims| { + let max_tuples = params.max_tuples; + let t1 = arb_sparse_tensor_with_dims(dims.clone(), max_tuples); + let t2 = arb_sparse_tensor_with_dims(dims, max_tuples); + (t1, t2) + }) +} + +/// Generate variable names +pub fn arb_var_names(count: usize) -> impl Strategy> { + Just((0..count).map(|i| format!("v{}", i)).collect()) +} diff --git a/tests/manual_fuzz.rs b/tests/manual_fuzz.rs new file mode 100644 index 0000000..d810eda --- /dev/null +++ b/tests/manual_fuzz.rs @@ -0,0 +1,188 @@ +//! Quick manual fuzzer - run with: cargo test --release manual_fuzz -- --ignored --nocapture + +use geolog::repl::ReplState; +use rand::prelude::*; +use std::time::Instant; + +fn random_ascii_string(rng: &mut impl Rng, len: usize) -> String { + (0..len).map(|_| rng.random_range(0x20u8..0x7F) as char).collect() +} + +fn random_geolog_like(rng: &mut impl Rng) -> String { + let keywords = ["theory", "instance", "Sort", "Prop", "forall", "exists", "chase"]; + let ops = [":", "->", "=", "|-", "{", "}", "[", "]", "(", ")", ";", ",", "."]; + let idents = ["x", "y", "z", "A", "B", "foo", "bar", "src", "tgt"]; + + let mut s = String::new(); + let len = rng.random_range(1..200); + for _ in 0..len { + match rng.random_range(0..4) { + 0 => s.push_str(keywords.choose(rng).unwrap()), + 1 => s.push_str(ops.choose(rng).unwrap()), + 2 => s.push_str(idents.choose(rng).unwrap()), + _ => s.push(' '), + } + if rng.random_bool(0.3) { s.push(' '); } + } + s +} + +#[test] +#[ignore] +fn manual_fuzz_parser() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + while start.elapsed().as_secs() < 10 { + let len = rng.random_range(1usize..500); + let input = if rng.random_bool(0.5) { + random_ascii_string(&mut rng, len) + } else { + random_geolog_like(&mut rng) + }; + + // This should never panic + let result = std::panic::catch_unwind(|| { + let _ = geolog::parse(&input); + }); + + if result.is_err() { + eprintln!("PANIC on input: {:?}", input); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} iterations, {} panics", count, errors); + assert_eq!(errors, 0, "Parser panicked on some inputs!"); +} + +#[test] +#[ignore] +fn manual_fuzz_repl() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + while start.elapsed().as_secs() < 10 { + let len = rng.random_range(1usize..500); + let input = if rng.random_bool(0.5) { + random_ascii_string(&mut rng, len) + } else { + random_geolog_like(&mut rng) + }; + + let result = std::panic::catch_unwind(|| { + let mut state = ReplState::new(); + let _ = state.execute_geolog(&input); + }); + + if result.is_err() { + eprintln!("PANIC on input: {:?}", input); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} iterations, {} panics", count, errors); + assert_eq!(errors, 0, "REPL panicked on some inputs!"); +} + +/// More aggressive fuzzer with edge-case generators +#[test] +#[ignore] +fn manual_fuzz_edge_cases() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + // Edge case generators + let edge_cases: Vec String> = vec![ + // Deep nesting + |rng: &mut rand::rngs::ThreadRng| { + let depth = rng.random_range(10..100); + let mut s = "theory T { ".repeat(depth); + s.push_str(&"}".repeat(depth)); + s + }, + // Very long identifiers + |rng: &mut rand::rngs::ThreadRng| { + let len = rng.random_range(1000..10000); + format!("theory {} {{ }}", "a".repeat(len)) + }, + // Many small tokens + |rng: &mut rand::rngs::ThreadRng| { + let count = rng.random_range(100..1000); + (0..count).map(|_| "x ").collect::() + }, + // Unicode stress + |_rng: &mut rand::rngs::ThreadRng| { + "theory 日本語 { ∀ : Sort; ∃ : Sort -> Prop; }".to_string() + }, + // Null bytes and control chars + |rng: &mut rand::rngs::ThreadRng| { + let mut s = String::from("theory T { "); + for _ in 0..rng.random_range(1..50) { + s.push(rng.random_range(0u8..32) as char); + } + s.push_str(" }"); + s + }, + // Deeply nested records + |rng: &mut rand::rngs::ThreadRng| { + let depth = rng.random_range(5..30); + let mut s = String::from("theory T { f : "); + for _ in 0..depth { + s.push_str("[x: "); + } + s.push_str("Sort"); + for _ in 0..depth { + s.push_str("]"); + } + s.push_str(" -> Prop; }"); + s + }, + // Many axioms + |rng: &mut rand::rngs::ThreadRng| { + let count = rng.random_range(50..200); + let mut s = String::from("theory T { X : Sort; "); + for i in 0..count { + s.push_str(&format!("ax{} : forall x : X. |- x = x; ", i)); + } + s.push('}'); + s + }, + // Pathological chase + |_rng: &mut rand::rngs::ThreadRng| { + r#" + theory Loop { X : Sort; r : [a: X, b: X] -> Prop; + ax : forall x : X. |- exists y : X. [a: x, b: y] r; + } + instance I : Loop = chase { start : X; } + "#.to_string() + }, + ]; + + while start.elapsed().as_secs() < 30 { + let gen_idx = rng.random_range(0..edge_cases.len()); + let input = edge_cases[gen_idx](&mut rng); + + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut state = ReplState::new(); + let _ = state.execute_geolog(&input); + })); + + if result.is_err() { + eprintln!("PANIC on input (gen {}): {:?}", gen_idx, &input[..input.len().min(200)]); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} edge-case iterations, {} panics", count, errors); + assert_eq!(errors, 0, "REPL panicked on edge cases!"); +} diff --git a/tests/negative/solution2_incomplete_negative_test.geolog b/tests/negative/solution2_incomplete_negative_test.geolog new file mode 100644 index 0000000..f3ee078 --- /dev/null +++ b/tests/negative/solution2_incomplete_negative_test.geolog @@ -0,0 +1,218 @@ +// NEGATIVE TEST: This file contains an INTENTIONALLY INCOMPLETE solution. +// +// The trace for problem2 is missing the wire connecting f1's output to f2's input. +// When Trace theory has proper wire axioms, this should FAIL axiom checking. +// +// This file serves as a regression test: if this ever starts passing, +// either the axioms are broken or the solver has a bug. + +// ============================================================ +// THEORY: PetriNet +// ============================================================ + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// ============================================================ +// THEORY: Marking +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (with Wire axioms) +// +// A trace records transition firings and token flow via wires. +// The completeness axioms (ax5, ax6) ensure every arc is accounted for. +// ============================================================ + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence: source arc must belong to source firing's transition + ax/wire_src_coherent : forall w : W. + |- w W/src_arc N/out/src = w W/src_firing F/of; + + // Wire coherence: target arc must belong to target firing's transition + ax/wire_tgt_coherent : forall w : W. + |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + + // Wire place coherence: wire connects matching places + ax/wire_place_coherent : forall w : W. + |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + + // Terminals connect to specific firings and arcs + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: catches the missing wire in solution2! + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; +} + +// ============================================================ +// INSTANCE: ExampleNet +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// ============================================================ +// PROBLEM 2: Can we reach C from two A-tokens? +// ============================================================ + +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// INCOMPLETE SOLUTION 2: This should FAIL! +// +// The trace has two firings (f1: ab, f2: abc) but NO WIRE +// connecting f1's output to f2's B-input. The axiom +// ax/must_be_fed should catch this: f2's abc_in2 arc +// is neither wired nor terminal-fed. +// ============================================================ + +instance solution2_incomplete : ExampleNet problem2 Solution = { + trace = { + f1 : F; f1 F/of = ExampleNet/ab; + f2 : F; f2 F/of = ExampleNet/abc; + + // Input terminals for the two initial A-tokens + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal for the final C-token + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + + // INTENTIONALLY MISSING: The wire from f1's ab_out to f2's abc_in2! + // This means f2's abc_in2 (the B-input) is not fed by anything. + }; + + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} diff --git a/tests/proptest_naming.proptest-regressions b/tests/proptest_naming.proptest-regressions new file mode 100644 index 0000000..e56ed90 --- /dev/null +++ b/tests/proptest_naming.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 06ecbb5d81f792dbc38ba7b644be7c170752d98e62f77f39f94b80cb12be32c1 # shrinks to entries = [(00000000-0000-0000-0000-000000000000, ["O"]), (00000000-0000-0000-0000-000000000001, ["O"])] diff --git a/tests/proptest_naming.rs b/tests/proptest_naming.rs new file mode 100644 index 0000000..8000dbb --- /dev/null +++ b/tests/proptest_naming.rs @@ -0,0 +1,218 @@ +//! Property tests for NamingIndex (UUID ↔ Name bidirectional consistency) + +mod generators; + +use geolog::id::Uuid; +use geolog::naming::NamingIndex; +use proptest::prelude::*; +use std::collections::HashSet; +use tempfile::tempdir; + +proptest! { + /// Insert then lookup returns the same name + #[test] + fn insert_get_roundtrip( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + + index.insert(uuid, name.clone()); + + let retrieved = index.get(&uuid); + prop_assert_eq!(retrieved, Some(&name)); + } + + /// Simple name (last component) is correctly extracted + #[test] + fn simple_name_is_last_component( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + index.insert(uuid, name.clone()); + + let simple = index.get_simple(&uuid); + let expected = name.last().map(|s| s.as_str()); + + prop_assert_eq!(simple, expected); + } + + /// lookup(simple_name) contains the UUID + #[test] + fn lookup_contains_uuid( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + index.insert(uuid, name.clone()); + + if let Some(simple) = name.last() { + let results = index.lookup(simple); + prop_assert!(!results.is_empty()); + prop_assert!(results.contains(&uuid)); + } + } + + /// lookup_unique returns Some iff exactly one UUID has that name + #[test] + fn lookup_unique_semantics( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 1..10 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let mut index = NamingIndex::new(); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + + // For each simple name, check lookup_unique semantics + let mut name_counts: std::collections::HashMap> = + std::collections::HashMap::new(); + for (uuid, name) in &unique_entries { + if let Some(simple) = name.last() { + name_counts.entry(simple.clone()).or_default().push(*uuid); + } + } + + for (simple_name, uuids) in name_counts { + let unique_result = index.lookup_unique(&simple_name); + if uuids.len() == 1 { + prop_assert_eq!(unique_result, Some(uuids[0])); + } else { + prop_assert_eq!(unique_result, None); + } + } + } + + /// Ambiguous names (multiple UUIDs) return None for lookup_unique + #[test] + fn ambiguous_names_return_none( + uuid1 in generators::arb_uuid(), + uuid2 in generators::arb_uuid(), + shared_name in generators::arb_identifier() + ) { + prop_assume!(uuid1 != uuid2); + + let mut index = NamingIndex::new(); + index.insert(uuid1, vec!["Theory1".to_string(), shared_name.clone()]); + index.insert(uuid2, vec!["Theory2".to_string(), shared_name.clone()]); + + // lookup returns both + let results = index.lookup(&shared_name); + prop_assert_eq!(results.len(), 2); + prop_assert!(results.contains(&uuid1)); + prop_assert!(results.contains(&uuid2)); + + // lookup_unique returns None + prop_assert_eq!(index.lookup_unique(&shared_name), None); + } + + /// display_name returns the simple name if set, otherwise UUID string + #[test] + fn display_name_fallback(uuid in generators::arb_uuid()) { + let mut index = NamingIndex::new(); + + // Without name: should contain UUID + let display_without = index.display_name(&uuid); + let uuid_str = format!("{}", uuid); + prop_assert!(display_without.contains(&uuid_str)); + + // With name: should be the simple name + let name = vec!["Test".to_string(), "Element".to_string()]; + index.insert(uuid, name); + let display_with = index.display_name(&uuid); + prop_assert_eq!(display_with, "Element"); + } + + /// Save and load preserves all mappings + #[test] + fn save_load_roundtrip( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 1..15 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let dir = tempdir().unwrap(); + let path = dir.path().join("names.bin"); + + // Save + { + let mut index = NamingIndex::with_path(&path); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + index.save().unwrap(); + } + + // Load + { + let loaded = NamingIndex::load(&path).unwrap(); + + for (uuid, name) in &unique_entries { + prop_assert_eq!(loaded.get(uuid), Some(name)); + } + + prop_assert_eq!(loaded.len(), unique_entries.len()); + } + } + + /// Dirty flag consistency + #[test] + fn dirty_flag_consistency( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let dir = tempdir().unwrap(); + let path = dir.path().join("names.bin"); + + let mut index = NamingIndex::with_path(&path); + + // Initially clean + prop_assert!(!index.is_dirty()); + + // Dirty after insert + index.insert(uuid, name); + prop_assert!(index.is_dirty()); + + // Clean after save + index.save().unwrap(); + prop_assert!(!index.is_dirty()); + } + + /// Len reflects number of unique UUIDs + #[test] + fn len_reflects_entries( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 0..20 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let mut index = NamingIndex::new(); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + + prop_assert_eq!(index.len(), unique_entries.len()); + prop_assert_eq!(index.is_empty(), unique_entries.is_empty()); + } +} diff --git a/tests/proptest_overlay.proptest-regressions b/tests/proptest_overlay.proptest-regressions new file mode 100644 index 0000000..1a28615 --- /dev/null +++ b/tests/proptest_overlay.proptest-regressions @@ -0,0 +1,10 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 64ebf3015f934d35012468a9c62bc4bed3ccdfef098a2781f1a2e18d876db64f # shrinks to num_sorts = 1, num_elements_per_sort = 1, ops = [AddElement(1)] +cc b7c34753dab453dc9aa1c33b4726646aaa7fc1facd6b4037e6bf9a89aa469ff8 # shrinks to num_sorts = 1, num_elements_per_sort = 0, ops = [AddElement(1)] +cc 1cabffe39859e7c40bed9cc70a038cd0c84b121ffda2b8462c38941490c3b207 # shrinks to num_sorts = 1, num_elements_per_sort = 1, ops = [AddElement(1)] +cc bff7adcc8a25d73683c8b1c0a8dad051539b2ed9bc538d469ffa5d61cd7f34e7 # shrinks to num_sorts = 1, num_elements_per_sort = 0, ops = [AddElement(1)] diff --git a/tests/proptest_overlay.rs b/tests/proptest_overlay.rs new file mode 100644 index 0000000..22cb79a --- /dev/null +++ b/tests/proptest_overlay.rs @@ -0,0 +1,614 @@ +//! Property tests for the overlay system. +//! +//! The key invariant: reads through an overlay should match reads against +//! the materialized (committed) structure. This ensures the overlay correctly +//! represents all accumulated changes. + +use std::collections::{BTreeSet, HashSet}; +use std::sync::Arc; + +use geolog::core::{SortId, Structure}; +use geolog::id::{Luid, NumericId, Slid, Uuid}; +use geolog::overlay::OverlayStructure; +use geolog::universe::Universe; +use geolog::serialize::save_structure; +use geolog::zerocopy::MappedStructure; + +use proptest::prelude::*; +use tempfile::tempdir; + +// ============================================================================ +// STRATEGIES +// ============================================================================ + +/// Operations that can be applied to an overlay. +#[derive(Clone, Debug)] +enum OverlayOp { + /// Add a new element with the given sort + AddElement(SortId), + /// Assert a relation tuple (rel_id, indices into current elements) + AssertRelation(usize, Vec), + /// Retract a relation tuple (rel_id, indices into current elements) + RetractRelation(usize, Vec), +} + +/// Strategy for generating overlay operations. +fn overlay_op( + num_sorts: usize, + num_relations: usize, + arities: Vec, + max_elements: usize, +) -> impl Strategy { + let arities_assert = arities.clone(); + let arities_retract = arities; + + prop_oneof![ + // Add element (weighted more heavily to build up elements) + 3 => (0..num_sorts).prop_map(OverlayOp::AddElement), + // Assert relation + 2 => ((0..num_relations), prop::collection::vec(0..max_elements.max(1), 0..5)) + .prop_flat_map(move |(rel, indices)| { + let arity = arities_assert.get(rel).copied().unwrap_or(1); + let indices = if indices.len() >= arity { + indices[..arity].to_vec() + } else { + // Pad with zeros if not enough indices + let mut v = indices; + while v.len() < arity { + v.push(0); + } + v + }; + Just(OverlayOp::AssertRelation(rel, indices)) + }), + // Retract relation + 1 => ((0..num_relations), prop::collection::vec(0..max_elements.max(1), 0..5)) + .prop_flat_map(move |(rel, indices)| { + let arity = arities_retract.get(rel).copied().unwrap_or(1); + let indices = if indices.len() >= arity { + indices[..arity].to_vec() + } else { + let mut v = indices; + while v.len() < arity { + v.push(0); + } + v + }; + Just(OverlayOp::RetractRelation(rel, indices)) + }), + ] +} + +/// Strategy for generating a sequence of overlay operations. +fn overlay_ops( + num_sorts: usize, + num_relations: usize, + arities: Vec, + num_ops: usize, +) -> impl Strategy> { + // We generate ops that reference element indices up to some max + // The actual indices get clamped to valid range during execution + prop::collection::vec( + overlay_op(num_sorts, num_relations, arities, 100), + 0..num_ops, + ) +} + +// ============================================================================ +// TEST HELPERS +// ============================================================================ + +/// Create a base structure with some initial elements and relations. +fn create_base_structure( + universe: &mut Universe, + num_sorts: usize, + num_elements_per_sort: usize, + arities: &[usize], +) -> Structure { + let mut structure = Structure::new(num_sorts); + + // Add initial elements + for sort in 0..num_sorts { + for _ in 0..num_elements_per_sort { + structure.add_element(universe, sort); + } + } + + // Initialize relations + structure.init_relations(arities); + + structure +} + +/// Apply an operation to an overlay, tracking current element count. +fn apply_op( + overlay: &mut OverlayStructure, + universe: &mut Universe, + op: &OverlayOp, + element_count: &mut usize, +) { + let num_sorts = overlay.num_sorts(); + let num_relations = overlay.num_relations(); + + match op { + OverlayOp::AddElement(sort) => { + // Clamp sort to valid range + let sort = *sort % num_sorts; + let luid = universe.intern(Uuid::now_v7()); + overlay.add_element(luid, sort); + *element_count += 1; + } + OverlayOp::AssertRelation(rel_id, indices) => { + if *element_count == 0 || num_relations == 0 { + return; // Can't assert tuples without elements or relations + } + // Clamp rel_id and indices to valid range + let rel_id = *rel_id % num_relations; + let tuple: Vec = indices + .iter() + .map(|&i| Slid::from_usize(i % *element_count)) + .collect(); + overlay.assert_relation(rel_id, tuple); + } + OverlayOp::RetractRelation(rel_id, indices) => { + if *element_count == 0 || num_relations == 0 { + return; + } + let rel_id = *rel_id % num_relations; + let tuple: Vec = indices + .iter() + .map(|&i| Slid::from_usize(i % *element_count)) + .collect(); + overlay.retract_relation(rel_id, tuple); + } + } +} + +/// Collect all elements from a MappedStructure into a set. +fn collect_elements_mapped(mapped: &MappedStructure) -> HashSet<(Slid, Luid, SortId)> { + mapped.elements().collect() +} + +/// Collect all elements from an overlay into a set. +fn collect_elements_overlay(overlay: &OverlayStructure) -> HashSet<(Slid, Luid, SortId)> { + overlay.elements().collect() +} + +/// Collect all live tuples from a relation in a MappedStructure. +fn collect_tuples_mapped(mapped: &MappedStructure, rel_id: usize) -> BTreeSet> { + mapped + .relation(rel_id) + .map(|r| r.live_tuples().map(|t| t.collect()).collect()) + .unwrap_or_default() +} + +/// Collect all live tuples from a relation in an overlay. +fn collect_tuples_overlay(overlay: &OverlayStructure, rel_id: usize) -> BTreeSet> { + overlay + .relation(rel_id) + .map(|r| r.live_tuples().collect()) + .unwrap_or_default() +} + +// ============================================================================ +// PROPERTY TESTS +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// The committed structure should have the same elements as the overlay. + #[test] + fn overlay_commit_preserves_elements( + num_sorts in 1usize..5, + num_elements_per_sort in 0usize..10, + ops in overlay_ops(4, 3, vec![1, 2, 3], 50), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2, 3]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Collect elements from overlay + let overlay_elements = collect_elements_overlay(&overlay); + + // Commit and collect elements from committed structure + let committed = overlay.commit(&commit_path).unwrap(); + let committed_elements = collect_elements_mapped(&committed); + + // They should match + prop_assert_eq!( + overlay_elements.len(), + committed_elements.len(), + "Element count mismatch" + ); + + // Check each element + for (slid, luid, sort) in &overlay_elements { + prop_assert!( + committed_elements.contains(&(*slid, *luid, *sort)), + "Element {:?} in overlay but not in committed", + (slid, luid, sort) + ); + } + } + + /// The committed structure should have the same relation tuples as the overlay. + #[test] + fn overlay_commit_preserves_relations( + num_sorts in 1usize..4, + num_elements_per_sort in 1usize..8, + ops in overlay_ops(3, 3, vec![1, 2, 2], 30), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2, 2]; // unary, binary, binary + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Collect tuples from overlay for each relation + let overlay_tuples: Vec>> = (0..arities.len()) + .map(|rel_id| collect_tuples_overlay(&overlay, rel_id)) + .collect(); + + // Commit + let committed = overlay.commit(&commit_path).unwrap(); + + // Collect tuples from committed + let committed_tuples: Vec>> = (0..arities.len()) + .map(|rel_id| collect_tuples_mapped(&committed, rel_id)) + .collect(); + + // They should match for each relation + for (rel_id, (overlay_set, committed_set)) in + overlay_tuples.iter().zip(committed_tuples.iter()).enumerate() + { + prop_assert_eq!( + overlay_set, + committed_set, + "Relation {} tuples mismatch.\nOverlay: {:?}\nCommitted: {:?}", + rel_id, + overlay_set, + committed_set + ); + } + } + + /// Element lookups should be consistent between overlay and committed structure. + #[test] + fn overlay_element_lookups_match_committed( + num_sorts in 1usize..5, + num_elements_per_sort in 0usize..10, + ops in overlay_ops(4, 2, vec![1, 2], 40), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Commit + let committed = overlay.commit(&commit_path).unwrap(); + + // Check that len matches + prop_assert_eq!(overlay.len(), committed.len(), "len() mismatch"); + + // Check each element lookup + for i in 0..overlay.len() { + let slid = Slid::from_usize(i); + + let overlay_luid = overlay.get_luid(slid); + let committed_luid = committed.get_luid(slid); + prop_assert_eq!( + overlay_luid, + committed_luid, + "get_luid({:?}) mismatch", + slid + ); + + let overlay_sort = overlay.get_sort(slid); + let committed_sort = committed.get_sort(slid); + prop_assert_eq!( + overlay_sort, + committed_sort, + "get_sort({:?}) mismatch", + slid + ); + } + } + + /// Rollback should restore the overlay to match the base. + #[test] + fn overlay_rollback_restores_base( + num_sorts in 1usize..4, + num_elements_per_sort in 1usize..8, + ops in overlay_ops(3, 2, vec![1, 2], 20), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create base with some initial relation tuples + let mut base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + // Add some initial tuples + if base.len() >= 2 { + base.assert_relation(0, vec![Slid::from_usize(0)]); + base.assert_relation(1, vec![Slid::from_usize(0), Slid::from_usize(1)]); + } + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = Arc::new(MappedStructure::open(&base_path).unwrap()); + let mut overlay = OverlayStructure::new(mapped.clone()); + + // Record base state + let base_len = overlay.len(); + let base_tuples_0 = collect_tuples_overlay(&overlay, 0); + let base_tuples_1 = collect_tuples_overlay(&overlay, 1); + + // Apply operations (mutate the overlay) + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Rollback + overlay.rollback(); + + // Should match base again + prop_assert_eq!(overlay.len(), base_len, "len() should match base after rollback"); + prop_assert!(overlay.is_clean(), "should be clean after rollback"); + + let after_tuples_0 = collect_tuples_overlay(&overlay, 0); + let after_tuples_1 = collect_tuples_overlay(&overlay, 1); + + prop_assert_eq!(base_tuples_0, after_tuples_0, "Relation 0 should match base after rollback"); + prop_assert_eq!(base_tuples_1, after_tuples_1, "Relation 1 should match base after rollback"); + } + + /// Assert then retract should result in no change (for overlay-only tuples). + #[test] + fn assert_then_retract_is_noop( + num_elements in 2usize..10, + rel_idx_a in 0usize..10, + rel_idx_b in 0usize..10, + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + + // Create base with elements but no relation tuples + let mut base = Structure::new(1); + for _ in 0..num_elements { + base.add_element(&mut universe, 0); + } + base.init_relations(&[2]); // binary relation + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Create a tuple + let idx_a = rel_idx_a % num_elements; + let idx_b = rel_idx_b % num_elements; + let tuple = vec![Slid::from_usize(idx_a), Slid::from_usize(idx_b)]; + + // Should start with no tuples + let initial_tuples = collect_tuples_overlay(&overlay, 0); + prop_assert!(initial_tuples.is_empty(), "Should start empty"); + + // Assert + overlay.assert_relation(0, tuple.clone()); + let after_assert = collect_tuples_overlay(&overlay, 0); + prop_assert!(after_assert.contains(&tuple), "Should contain tuple after assert"); + + // Retract + overlay.retract_relation(0, tuple.clone()); + let after_retract = collect_tuples_overlay(&overlay, 0); + prop_assert!(!after_retract.contains(&tuple), "Should not contain tuple after retract"); + + // Should be clean (no net change) + prop_assert!(overlay.is_clean(), "Should be clean after assert+retract of new tuple"); + } + + /// Retracting a base tuple should hide it from iteration. + #[test] + fn retract_hides_base_tuple( + num_elements in 3usize..10, + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + + // Create base with elements and a relation tuple + let mut base = Structure::new(1); + for _ in 0..num_elements { + base.add_element(&mut universe, 0); + } + base.init_relations(&[2]); // binary relation + let base_tuple = vec![Slid::from_usize(0), Slid::from_usize(1)]; + base.assert_relation(0, base_tuple.clone()); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Should see the base tuple + let initial = collect_tuples_overlay(&overlay, 0); + prop_assert!(initial.contains(&base_tuple), "Should see base tuple initially"); + + // Retract it + overlay.retract_relation(0, base_tuple.clone()); + + // Should no longer see it + let after = collect_tuples_overlay(&overlay, 0); + prop_assert!(!after.contains(&base_tuple), "Should not see base tuple after retract"); + + // But overlay should not be clean (we have a retraction) + prop_assert!(!overlay.is_clean(), "Should not be clean with a retraction"); + } + + /// Multiple commits should produce identical results. + #[test] + fn double_commit_is_idempotent( + num_sorts in 1usize..3, + num_elements_per_sort in 1usize..5, + ops in overlay_ops(2, 2, vec![1, 2], 15), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit1_path = dir.path().join("commit1.structure"); + let commit2_path = dir.path().join("commit2.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Commit twice + let committed1 = overlay.commit(&commit1_path).unwrap(); + let committed2 = overlay.commit(&commit2_path).unwrap(); + + // Both should have the same content + prop_assert_eq!(committed1.len(), committed2.len(), "len() should match"); + + for rel_id in 0..arities.len() { + let tuples1 = collect_tuples_mapped(&committed1, rel_id); + let tuples2 = collect_tuples_mapped(&committed2, rel_id); + prop_assert_eq!(tuples1, tuples2, "Relation {} should match", rel_id); + } + } +} + +// ============================================================================ +// ADDITIONAL TARGETED TESTS +// ============================================================================ + +#[test] +fn test_empty_overlay_commit() { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + + // Create base with some content + let mut base = Structure::new(2); + base.add_element(&mut universe, 0); + base.add_element(&mut universe, 1); + base.init_relations(&[1]); + base.assert_relation(0, vec![Slid::from_usize(0)]); + save_structure(&base, &base_path).unwrap(); + + // Create overlay but don't modify it + let mapped = MappedStructure::open(&base_path).unwrap(); + let overlay = OverlayStructure::new(Arc::new(mapped)); + + assert!(overlay.is_clean()); + + // Commit should produce identical structure + let committed = overlay.commit(&commit_path).unwrap(); + + assert_eq!(committed.len(), 2); + assert_eq!(collect_tuples_mapped(&committed, 0).len(), 1); +} + +#[test] +fn test_overlay_with_mixed_element_tuples() { + // Test tuples that reference both base and overlay elements + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + + // Create base with one element + let mut base = Structure::new(1); + let (base_elem, _) = base.add_element(&mut universe, 0); + base.init_relations(&[2]); // binary relation + save_structure(&base, &base_path).unwrap(); + + // Create overlay and add an element + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + let new_luid = universe.intern(Uuid::now_v7()); + let new_elem = overlay.add_element(new_luid, 0); + + // Assert a tuple mixing base and overlay elements + let mixed_tuple = vec![base_elem, new_elem]; + overlay.assert_relation(0, mixed_tuple.clone()); + + // Verify we can see it + let tuples = collect_tuples_overlay(&overlay, 0); + assert!(tuples.contains(&mixed_tuple)); + + // Commit and verify + let committed = overlay.commit(&commit_path).unwrap(); + let committed_tuples = collect_tuples_mapped(&committed, 0); + assert_eq!(committed_tuples.len(), 1); +} diff --git a/tests/proptest_patch.proptest-regressions b/tests/proptest_patch.proptest-regressions new file mode 100644 index 0000000..5fb747e --- /dev/null +++ b/tests/proptest_patch.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 3a0157c22c0fb9016c677f4a6e7765c1e0294d6e50ff98b0f608c276d432927a # shrinks to (old, mut universe) = (Structure { theory_luid: None, luids: [0], luid_to_slid: {0: 0}, sorts: [1], carriers: [RoaringTreemap<[]>, RoaringTreemap<[0]>], functions: [], nested: {} }, Universe { index: {019b1e9d-0b71-7471-94d9-cef7c2d2959d}, path: None, dirty: true }), (new, mut universe2) = (Structure { theory_luid: None, luids: [0], luid_to_slid: {0: 0}, sorts: [1], carriers: [RoaringTreemap<[]>, RoaringTreemap<[0]>], functions: [], nested: {} }, Universe { index: {019b1e9d-0b71-7471-94d9-cf28a3b61209}, path: None, dirty: true }) diff --git a/tests/proptest_patch.rs b/tests/proptest_patch.rs new file mode 100644 index 0000000..f2b40e8 --- /dev/null +++ b/tests/proptest_patch.rs @@ -0,0 +1,334 @@ +//! Property tests for Patch algebra (diff/apply roundtrips) + +mod generators; + +use generators::{StructureParams, check_structure_invariants, structures_equivalent}; +use geolog::core::Structure; +use geolog::naming::NamingIndex; +use geolog::patch::{Patch, apply_patch, diff, to_initial_patch}; +use geolog::universe::Universe; +use proptest::prelude::*; +use std::collections::HashSet; + +proptest! { + /// Empty patch is identity: apply_patch(s, empty) == s + #[test] + fn empty_patch_is_identity( + (structure, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + let empty_patch = Patch::new(None, structure.num_sorts(), structure.num_functions(), structure.relations.len()); + let mut naming = NamingIndex::new(); + + let result = apply_patch(&structure, &empty_patch, &mut universe, &mut naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + prop_assert_eq!(result.len(), structure.len()); + prop_assert_eq!(result.num_sorts(), structure.num_sorts()); + + // Check same UUIDs + prop_assert!(structures_equivalent(&result, &structure, &universe, &universe)); + } + + /// diff(s, s) produces empty patch + #[test] + fn diff_same_is_empty( + (structure, universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + let naming = NamingIndex::new(); + + let patch = diff(&structure, &structure, &universe, &naming, &naming); + + prop_assert!(patch.is_empty()); + } + + /// to_initial_patch creates patch that builds structure from empty + #[test] + fn initial_patch_builds_from_empty( + (structure, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + // Build naming for structure elements + let mut naming = NamingIndex::new(); + for &luid in &structure.luids { + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", luid)]); + } + } + + let patch = to_initial_patch(&structure, &universe, &naming); + + // Apply to empty structure + let empty = Structure::new(structure.num_sorts()); + let mut result_naming = NamingIndex::new(); + let result = apply_patch(&empty, &patch, &mut universe, &mut result_naming); + + prop_assert!(result.is_ok()); + let result = result.unwrap(); + + // Should have same number of elements + prop_assert_eq!(result.len(), structure.len()); + + // Should have same UUIDs + prop_assert!(structures_equivalent(&result, &structure, &universe, &universe)); + } + + /// Element additions are tracked in patch + #[test] + fn additions_tracked( + num_elements in 1usize..10, + ) { + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + + let old = Structure::new(2); + let mut new = Structure::new(2); + + for i in 0..num_elements { + let (_, luid) = new.add_element(&mut universe, i % 2); + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let old_naming = NamingIndex::new(); + let patch = diff(&old, &new, &universe, &old_naming, &naming); + + prop_assert_eq!(patch.elements.additions.len(), num_elements); + prop_assert!(patch.elements.deletions.is_empty()); + } + + /// Element deletions are tracked in patch + #[test] + fn deletions_tracked( + num_elements in 1usize..10, + ) { + let mut universe = Universe::new(); + let mut old_naming = NamingIndex::new(); + + let mut old = Structure::new(2); + for i in 0..num_elements { + let (_, luid) = old.add_element(&mut universe, i % 2); + if let Some(uuid) = universe.get(luid) { + old_naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let new = Structure::new(2); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + + prop_assert_eq!(patch.elements.deletions.len(), num_elements); + prop_assert!(patch.elements.additions.is_empty()); + } + + /// Element patch has disjoint additions and deletions + #[test] + fn element_patch_disjoint( + (old, universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 5, + }), + (new, _) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 5, + }) + ) { + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + + // Additions and deletions should be disjoint + let additions: HashSet<_> = patch.elements.additions.keys().collect(); + let deletions: HashSet<_> = patch.elements.deletions.iter().collect(); + + let intersection: Vec<_> = additions.intersection(&deletions).collect(); + prop_assert!(intersection.is_empty()); + } + + /// NamingPatch tracks name additions for new elements + #[test] + fn naming_patch_additions( + num_elements in 1usize..8, + ) { + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + + let old = Structure::new(2); + let mut new = Structure::new(2); + + for i in 0..num_elements { + let (_, luid) = new.add_element(&mut universe, 0); + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let old_naming = NamingIndex::new(); + let patch = diff(&old, &new, &universe, &old_naming, &naming); + + // Naming patch should have names for new elements + prop_assert_eq!(patch.names.additions.len(), num_elements); + } + + /// Patch inversion swaps additions/deletions + #[test] + fn inversion_swaps_elements( + (old, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 4, + }) + ) { + // Create a new structure with some different elements + let mut new = Structure::new(2); + new.add_element(&mut universe, 0); + new.add_element(&mut universe, 1); + + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let inverted = patch.invert(); + + // Inverted patch swaps source/target commits + prop_assert_eq!(inverted.source_commit, Some(patch.target_commit)); + + // Additions become deletions (by key count) + prop_assert_eq!( + inverted.elements.deletions.len(), + patch.elements.additions.len() + ); + } + + /// Double inversion preserves target_commit (but creates new source) + #[test] + fn double_inversion_target_preserved( + (structure, universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 3, + }) + ) { + let naming = NamingIndex::new(); + let patch = to_initial_patch(&structure, &universe, &naming); + + let inverted = patch.invert(); + let double_inverted = inverted.invert(); + + // Original target becomes source after double inversion + // (because each inversion swaps source ↔ target) + prop_assert_eq!(double_inverted.source_commit, Some(inverted.target_commit)); + } + + /// Result of apply_patch maintains structure invariants + #[test] + fn apply_patch_maintains_invariants( + (old, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }), + (new, _) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }) + ) { + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let mut result_naming = NamingIndex::new(); + + let result = apply_patch(&old, &patch, &mut universe, &mut result_naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + prop_assert!(check_structure_invariants(&result).is_ok()); + } +} + +// More focused roundtrip tests + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// THE KEY PROPERTY: diff then apply is identity + /// diff(old, new) |> apply_patch(old, _) ≈ new + /// + /// We test this by starting with a structure and modifying it (adding/removing elements) + /// to create `new`, ensuring both share the same Universe. + #[test] + fn diff_apply_roundtrip( + (base, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 4, + }), + additions in proptest::collection::vec(0usize..2, 0..4), + deletions_count in 0usize..3, + ) { + // Create `old` as a clone of base + let old = base.clone(); + + // Build naming for old structure + let mut old_naming = NamingIndex::new(); + for &luid in &old.luids { + if let Some(uuid) = universe.get(luid) { + old_naming.insert(uuid, vec![format!("old_elem_{}", luid)]); + } + } + + // Create `new` by modifying base: add some elements, potentially skip some old ones + let mut new = Structure::new(base.num_sorts()); + let mut new_naming = NamingIndex::new(); + + // Keep some elements from old (skip the first `deletions_count`) + let keep_count = base.len().saturating_sub(deletions_count); + for slid in 0..keep_count { + let luid = base.luids[slid]; + let sort_id = base.sorts[slid]; + new.add_element_with_luid(luid, sort_id); + + if let Some(uuid) = universe.get(luid) { + new_naming.insert(uuid, vec![format!("kept_elem_{}", luid)]); + } + } + + // Add new elements + for sort_id in additions { + let (_, luid) = new.add_element(&mut universe, sort_id); + if let Some(uuid) = universe.get(luid) { + new_naming.insert(uuid, vec![format!("new_elem_{}", luid)]); + } + } + + // Now diff and apply + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let mut result_naming = NamingIndex::new(); + + let result = apply_patch(&old, &patch, &mut universe, &mut result_naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + + // Result should have same number of elements as new + prop_assert_eq!(result.len(), new.len()); + + // Result should have same UUIDs as new (both use the same universe) + let result_uuids: HashSet<_> = result.luids.iter() + .filter_map(|&luid| universe.get(luid)) + .collect(); + let new_uuids: HashSet<_> = new.luids.iter() + .filter_map(|&luid| universe.get(luid)) + .collect(); + + prop_assert_eq!(result_uuids, new_uuids); + } +} diff --git a/tests/proptest_query.proptest-regressions b/tests/proptest_query.proptest-regressions new file mode 100644 index 0000000..cd0e0be --- /dev/null +++ b/tests/proptest_query.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 58732b0034992e09c980d677405cd7448269d1b30d5deb09f018a60e30c56215 # shrinks to structure = Structure { theory_luid: None, luids: [], luid_to_slid: {}, sorts: [], carriers: [RoaringTreemap<[]>, RoaringTreemap<[]>, RoaringTreemap<[0]>, RoaringTreemap<[]>], functions: [], relations: [], parents: {}, nested: {} }, query = Filter { input: Scan { sort_idx: 2 }, pred: Or(FuncEqConst { func_idx: 0, arg_col: 0, expected: Slid(0) }, True) } diff --git a/tests/proptest_query.rs b/tests/proptest_query.rs new file mode 100644 index 0000000..7a9b924 --- /dev/null +++ b/tests/proptest_query.rs @@ -0,0 +1,946 @@ +//! Property tests for query operations. +//! +//! Verifies that execute_optimized produces the same results as execute (naive). + +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::query::{JoinCond, Predicate, QueryOp, execute, execute_optimized}; +use proptest::prelude::*; + +// ============================================================================ +// QueryOp Generators +// ============================================================================ + +/// Generate arbitrary Slid values (within reasonable range) +fn arb_slid() -> impl Strategy { + (0..100usize).prop_map(Slid::from_usize) +} + +/// Generate a simple structure with multiple sorts and elements +fn arb_query_structure(num_sorts: usize, max_per_sort: usize) -> impl Strategy { + prop::collection::vec( + prop::collection::vec(0..50u64, 0..=max_per_sort), + num_sorts, + ) + .prop_map(|sort_elements| { + let mut structure = Structure::new(sort_elements.len()); + for (sort_idx, elements) in sort_elements.iter().enumerate() { + for &elem in elements { + structure.carriers[sort_idx].insert(elem); + } + } + structure + }) +} + +/// Generate a scan operation +fn arb_scan(max_sort: usize) -> impl Strategy { + (0..max_sort).prop_map(|sort_idx| QueryOp::Scan { sort_idx }) +} + +/// Generate a constant tuple +fn arb_constant() -> impl Strategy { + prop::collection::vec(arb_slid(), 1..=3) + .prop_map(|tuple| QueryOp::Constant { tuple }) +} + +/// Generate empty +fn arb_empty() -> impl Strategy { + Just(QueryOp::Empty) +} + +/// Generate a simple query (scan, constant, or empty) +fn arb_simple_query(max_sort: usize) -> impl Strategy { + prop_oneof![ + arb_scan(max_sort), + arb_constant(), + arb_empty(), + ] +} + +/// Generate a join condition for given arity +fn arb_join_cond(left_arity: usize, right_arity: usize) -> impl Strategy { + if left_arity == 0 || right_arity == 0 { + Just(JoinCond::Cross).boxed() + } else { + prop_oneof![ + Just(JoinCond::Cross), + (0..left_arity, 0..right_arity) + .prop_map(|(left_col, right_col)| JoinCond::Equi { left_col, right_col }), + ] + .boxed() + } +} + +/// Generate a join of two scans +fn arb_scan_join(max_sort: usize) -> impl Strategy { + (0..max_sort, 0..max_sort) + .prop_flat_map(move |(left_sort, right_sort)| { + arb_join_cond(1, 1).prop_map(move |cond| QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond, + }) + }) +} + +/// Generate a union of two simple queries +fn arb_union(max_sort: usize) -> impl Strategy { + (arb_simple_query(max_sort), arb_simple_query(max_sort)) + .prop_map(|(left, right)| QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }) +} + +/// Generate a negate of a simple query +fn arb_negate(max_sort: usize) -> impl Strategy { + arb_simple_query(max_sort).prop_map(|input| QueryOp::Negate { + input: Box::new(input), + }) +} + +/// Generate a distinct of a simple query +fn arb_distinct(max_sort: usize) -> impl Strategy { + arb_simple_query(max_sort).prop_map(|input| QueryOp::Distinct { + input: Box::new(input), + }) +} + +/// Generate a simple predicate (no recursion, no function predicates) +/// Use this for tests with structures that don't have functions. +fn arb_simple_predicate_no_funcs() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0..5usize, 0..5usize).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + (0..5usize, arb_slid()).prop_map(|(col, val)| Predicate::ColEqConst { col, val }), + ] +} + +/// Generate a simple predicate (no recursion) - includes function predicates +/// Use this for to_relalg compilation tests where functions don't need to evaluate. +fn arb_simple_predicate() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0..5usize, 0..5usize).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + (0..5usize, arb_slid()).prop_map(|(col, val)| Predicate::ColEqConst { col, val }), + (0..3usize, 0..5usize, 0..5usize) + .prop_map(|(func_idx, arg_col, result_col)| Predicate::FuncEq { func_idx, arg_col, result_col }), + (0..3usize, 0..5usize, arb_slid()) + .prop_map(|(func_idx, arg_col, expected)| Predicate::FuncEqConst { func_idx, arg_col, expected }), + ] +} + +/// Generate a predicate with possible And/Or nesting (no function predicates) +fn arb_predicate_no_funcs() -> impl Strategy { + arb_simple_predicate_no_funcs().prop_recursive(2, 8, 2, |inner| { + prop_oneof![ + inner.clone(), + (inner.clone(), inner.clone()).prop_map(|(l, r)| Predicate::And(Box::new(l), Box::new(r))), + (inner.clone(), inner).prop_map(|(l, r)| Predicate::Or(Box::new(l), Box::new(r))), + ] + }) +} + +/// Generate a predicate with possible And/Or nesting (includes function predicates) +fn arb_predicate() -> impl Strategy { + arb_simple_predicate().prop_recursive(2, 8, 2, |inner| { + prop_oneof![ + inner.clone(), + (inner.clone(), inner.clone()).prop_map(|(l, r)| Predicate::And(Box::new(l), Box::new(r))), + (inner.clone(), inner).prop_map(|(l, r)| Predicate::Or(Box::new(l), Box::new(r))), + ] + }) +} + +/// Generate a filter with arbitrary predicate (no function predicates) +/// Safe for testing against structures without functions. +fn arb_filter_safe(max_sort: usize) -> impl Strategy { + (arb_scan(max_sort), arb_predicate_no_funcs()) + .prop_map(|(input, pred)| QueryOp::Filter { + input: Box::new(input), + pred, + }) +} + +/// Generate a filter with column equality predicate (simple version) +fn arb_filter_col_eq_const(max_sort: usize) -> impl Strategy { + (arb_scan(max_sort), arb_slid()) + .prop_map(|(input, val)| QueryOp::Filter { + input: Box::new(input), + pred: Predicate::ColEqConst { col: 0, val }, + }) +} + +/// Generate a query without DBSP operators (for comparing naive vs optimized) +/// Uses arb_filter_safe to avoid function predicates that require functions in the structure. +fn arb_query_no_dbsp(max_sort: usize) -> impl Strategy { + prop_oneof![ + 4 => arb_scan(max_sort), + 2 => arb_constant(), + 1 => arb_empty(), + 3 => arb_scan_join(max_sort), + 2 => arb_union(max_sort), + 1 => arb_negate(max_sort), + 1 => arb_distinct(max_sort), + 2 => arb_filter_col_eq_const(max_sort), + 3 => arb_filter_safe(max_sort), + ] +} + +// ============================================================================ +// Property Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(500))] + + /// execute_optimized should produce identical results to execute for any query + #[test] + fn optimized_matches_naive( + structure in arb_query_structure(4, 10), + query in arb_query_no_dbsp(4) + ) { + let naive_result = execute(&query, &structure); + let optimized_result = execute_optimized(&query, &structure); + + // Same number of unique tuples + prop_assert_eq!( + naive_result.len(), + optimized_result.len(), + "Length mismatch for query {:?}", + query + ); + + // Same multiplicities for each tuple + for (tuple, mult) in naive_result.iter() { + prop_assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "Multiplicity mismatch for tuple {:?}", + tuple + ); + } + } + + /// Equi-join should be symmetric in a sense: swapping left/right and columns + /// should produce equivalent results (after accounting for tuple order) + #[test] + fn equijoin_symmetric( + structure in arb_query_structure(2, 8), + left_sort in 0..2usize, + right_sort in 0..2usize, + ) { + let join1 = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join2 = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: right_sort }), + right: Box::new(QueryOp::Scan { sort_idx: left_sort }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let result1 = execute_optimized(&join1, &structure); + let result2 = execute_optimized(&join2, &structure); + + // Should have same number of tuples (with columns swapped) + prop_assert_eq!(result1.len(), result2.len()); + } + + /// Nested equijoins: (A ⋈ B) ⋈ C should work correctly + #[test] + fn nested_equijoin( + structure in arb_query_structure(3, 6), + ) { + let join_ab = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join_abc = QueryOp::Join { + left: Box::new(join_ab.clone()), + right: Box::new(QueryOp::Scan { sort_idx: 2 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join_abc, &structure); + let optimized_result = execute_optimized(&join_abc, &structure); + + prop_assert_eq!(naive_result.len(), optimized_result.len()); + + for (tuple, mult) in naive_result.iter() { + prop_assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "Mismatch in nested join" + ); + } + } + + /// Cross join should produce |A| * |B| results + #[test] + fn cross_join_cardinality( + structure in arb_query_structure(2, 5), + ) { + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + + let result = execute_optimized(&join, &structure); + let expected_size = structure.carriers[0].len() as usize * structure.carriers[1].len() as usize; + + prop_assert_eq!(result.len(), expected_size); + } + + /// Union is commutative: A ∪ B = B ∪ A + #[test] + fn union_commutative( + structure in arb_query_structure(2, 5), + ) { + let union1 = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + }; + + let union2 = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 1 }), + right: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + + let result1 = execute_optimized(&union1, &structure); + let result2 = execute_optimized(&union2, &structure); + + prop_assert_eq!(result1.len(), result2.len()); + + for (tuple, mult) in result1.iter() { + prop_assert_eq!( + result2.tuples.get(tuple), + Some(mult), + "Union commutativity failed" + ); + } + } + + /// Distinct is idempotent: distinct(distinct(x)) = distinct(x) + #[test] + fn distinct_idempotent( + structure in arb_query_structure(1, 10), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + + let distinct1 = QueryOp::Distinct { + input: Box::new(scan.clone()), + }; + + let distinct2 = QueryOp::Distinct { + input: Box::new(QueryOp::Distinct { + input: Box::new(scan), + }), + }; + + let result1 = execute_optimized(&distinct1, &structure); + let result2 = execute_optimized(&distinct2, &structure); + + prop_assert_eq!(result1.len(), result2.len()); + + for (tuple, mult) in result1.iter() { + prop_assert_eq!( + result2.tuples.get(tuple), + Some(mult), + "Distinct idempotency failed" + ); + } + } + + /// Negate twice is identity: negate(negate(x)) = x + #[test] + fn negate_involution( + structure in arb_query_structure(1, 10), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + + let double_negate = QueryOp::Negate { + input: Box::new(QueryOp::Negate { + input: Box::new(scan.clone()), + }), + }; + + let result_original = execute_optimized(&scan, &structure); + let result_double_neg = execute_optimized(&double_negate, &structure); + + prop_assert_eq!(result_original.len(), result_double_neg.len()); + + for (tuple, mult) in result_original.iter() { + prop_assert_eq!( + result_double_neg.tuples.get(tuple), + Some(mult), + "Negate involution failed" + ); + } + } +} + +// ============================================================================ +// RelAlgIR Compilation Property Tests +// ============================================================================ + +mod to_relalg_tests { + use geolog::core::ElaboratedTheory; + use geolog::query::{Predicate, QueryOp, to_relalg::compile_to_relalg}; + use geolog::universe::Universe; + use geolog::repl::ReplState; + use proptest::prelude::*; + use std::rc::Rc; + + /// Load the RelAlgIR theory for testing + fn load_relalg_theory() -> Rc { + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone() + } + + /// Generate a simple QueryOp without Constant/Apply (which need target context) + fn arb_simple_query_op() -> impl Strategy { + prop_oneof![ + // Scan + (0..10usize).prop_map(|sort_idx| QueryOp::Scan { sort_idx }), + // Empty + Just(QueryOp::Empty), + ] + } + + /// Generate a nested QueryOp (depth 2) + fn arb_nested_query_op() -> impl Strategy { + arb_simple_query_op().prop_flat_map(|base| { + prop_oneof![ + // Filter with various predicates + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::True, + }), + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::False, + }), + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::ColEqCol { left: 0, right: 0 }, + }), + // Negate + Just(QueryOp::Negate { + input: Box::new(base.clone()), + }), + // Distinct + Just(QueryOp::Distinct { + input: Box::new(base.clone()), + }), + // Project + prop::collection::vec(0..3usize, 1..=3).prop_map(move |columns| QueryOp::Project { + input: Box::new(base.clone()), + columns, + }), + ] + }) + } + + proptest! { + /// Compiling simple QueryOps to RelAlgIR should not panic + #[test] + fn compile_simple_query_no_panic(plan in arb_simple_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Should not panic - may error for Constant/Apply but shouldn't crash + let _ = compile_to_relalg(&plan, &relalg_theory, &mut universe); + } + + /// Compiling nested QueryOps to RelAlgIR should not panic + #[test] + fn compile_nested_query_no_panic(plan in arb_nested_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Should not panic + let _ = compile_to_relalg(&plan, &relalg_theory, &mut universe); + } + + /// Compiled instances should have at least output wire + #[test] + fn compile_produces_valid_instance(plan in arb_simple_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + if let Ok(instance) = compile_to_relalg(&plan, &relalg_theory, &mut universe) { + // Instance should have elements + prop_assert!(!instance.structure.is_empty(), "Instance should have elements"); + // Should have named elements including output wire + prop_assert!(!instance.names.is_empty(), "Instance should have named elements"); + } + } + + /// Compiling binary operations should work + #[test] + fn compile_binary_ops_no_panic( + left_sort in 0..5usize, + right_sort in 0..5usize, + ) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Join (cross) + let join_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: geolog::query::JoinCond::Cross, + }; + let _ = compile_to_relalg(&join_plan, &relalg_theory, &mut universe); + + // Join (equi) + let equi_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: geolog::query::JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + let _ = compile_to_relalg(&equi_plan, &relalg_theory, &mut universe); + + // Union + let union_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + }; + let _ = compile_to_relalg(&union_plan, &relalg_theory, &mut universe); + } + + /// Compiling DBSP operators should work + #[test] + fn compile_dbsp_ops_no_panic(sort_idx in 0..5usize, state_id in 0..3usize) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let scan = QueryOp::Scan { sort_idx }; + + // Delay + let delay_plan = QueryOp::Delay { + input: Box::new(scan.clone()), + state_id, + }; + let _ = compile_to_relalg(&delay_plan, &relalg_theory, &mut universe); + + // Diff + let diff_plan = QueryOp::Diff { + input: Box::new(scan.clone()), + state_id, + }; + let _ = compile_to_relalg(&diff_plan, &relalg_theory, &mut universe); + + // Integrate + let integrate_plan = QueryOp::Integrate { + input: Box::new(scan), + state_id, + }; + let _ = compile_to_relalg(&integrate_plan, &relalg_theory, &mut universe); + } + + /// Compiling all predicate types should work + #[test] + fn compile_all_predicate_types_no_panic(pred in super::arb_predicate()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let filter_plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred, + }; + + // Should compile without panic + let _ = compile_to_relalg(&filter_plan, &relalg_theory, &mut universe); + } + } +} + +// ============================================================================ +// Chase Algorithm Proptests +// ============================================================================ + +mod chase_proptest { + use super::*; + use geolog::core::{Context, DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term, Theory, VecRelation}; + use geolog::cc::CongruenceClosure; + use geolog::query::chase::{chase_step, chase_fixpoint}; + use geolog::universe::Universe; + + /// Generate a simple theory with one sort and one unary relation + fn simple_relation_theory() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + } + } + + proptest! { + #[test] + fn chase_step_no_panic_on_empty_axioms( + num_elements in 0..10usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); + s + }; + let theory = simple_relation_theory(); + + // Empty axioms should not change anything + let mut cc = CongruenceClosure::new(); + let changed = chase_step(&[], &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + prop_assert!(!changed); + } + + #[test] + fn chase_step_adds_to_relation( + num_elements in 1..10usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); // Unary relation + s + }; + let theory = simple_relation_theory(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + // First chase step should add elements + let mut cc = CongruenceClosure::new(); + let changed = chase_step(std::slice::from_ref(&axiom), &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + + if num_elements > 0 { + prop_assert!(changed); + prop_assert_eq!(structure.relations[0].len(), num_elements); + } + + // Second chase step should not change anything + let changed2 = chase_step(&[axiom], &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + prop_assert!(!changed2); + } + + #[test] + fn chase_fixpoint_converges( + num_elements in 1..8usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); // Unary relation + s + }; + let theory = simple_relation_theory(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + // Chase should converge in exactly 2 iterations: + // 1. Add all elements to relation + // 2. Verify no more changes + let iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + prop_assert_eq!(iterations, 2); + prop_assert_eq!(structure.relations[0].len(), num_elements); + } + + /// Test reflexivity axiom: forall x. |- [lo: x, hi: x] leq + /// Should create diagonal tuples for all elements + #[test] + fn chase_reflexivity_creates_diagonal( + num_elements in 1..8usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + // Binary relation: leq : [lo: V, hi: V] -> Prop + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("leq".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Axiom: forall x : V. |- [lo: x, hi: x] leq + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ])), + }; + + let iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // Should have exactly num_elements diagonal tuples + prop_assert_eq!(structure.relations[0].len(), num_elements); + prop_assert!(iterations <= 3); // Should converge quickly + } + + /// Test transitivity axiom: [lo: x, hi: y] leq, [lo: y, hi: z] leq |- [lo: x, hi: z] leq + /// Classic transitive closure - should derive all reachable pairs + #[test] + fn chase_transitivity_computes_closure( + chain_length in 2..5usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + // Create a chain: 0 -> 1 -> 2 -> ... -> n-1 + for i in 0..chain_length { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("leq".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Seed the chain edges: 0->1, 1->2, ..., (n-2)->(n-1) + use geolog::id::Slid; + for i in 0..(chain_length - 1) { + structure.relations[0].insert(vec![ + Slid::from_usize(i), + Slid::from_usize(i + 1), + ]); + } + + // Transitivity axiom + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ])), + Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ])), + ]), + conclusion: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ])), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // For a chain of length n, transitive closure has n*(n-1)/2 pairs + // (all pairs (i,j) where i < j) + let expected_tuples = chain_length * (chain_length - 1) / 2; + prop_assert_eq!(structure.relations[0].len(), expected_tuples); + } + + /// Test existential conclusion creates fresh witnesses + /// ax/witness : forall x : V. |- exists y : V. [lo: x, hi: y] R + #[test] + fn chase_existential_creates_witnesses( + num_elements in 1..5usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Axiom: forall x : V. |- exists y : V. [lo: x, hi: y] R + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Exists( + "y".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]))), + ), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // Each original element should have at least one witness + // So we should have at least num_elements tuples + prop_assert!(structure.relations[0].len() >= num_elements); + } + + /// Test equality conclusion merges elements via CC + /// ax/collapse : forall x, y : V. [lo: x, hi: y] R |- x = y + #[test] + fn chase_equality_conclusion_reduces_carrier( + num_pairs in 1..4usize, + ) { + let mut universe = Universe::new(); + let num_elements = num_pairs * 2; // Each pair will merge + + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Seed pairs: (0,1), (2,3), (4,5), ... + // Each pair will be collapsed by the equality axiom + use geolog::id::Slid; + for i in 0..num_pairs { + structure.relations[0].insert(vec![ + Slid::from_usize(i * 2), + Slid::from_usize(i * 2 + 1), + ]); + } + + // Axiom: forall x, y : V. [lo: x, hi: y] R |- x = y + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ])), + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // After canonicalization, carrier should have fewer elements + // Each pair merges into one, so we should have num_pairs elements + prop_assert_eq!(structure.carriers[0].len() as usize, num_pairs); + } + } +} diff --git a/tests/proptest_query_backend.rs b/tests/proptest_query_backend.rs new file mode 100644 index 0000000..290c072 --- /dev/null +++ b/tests/proptest_query_backend.rs @@ -0,0 +1,870 @@ +//! Property tests for query backend. +//! +//! Generates random structures and queries, then verifies the naive backend +//! produces correct results by comparing against a reference implementation. + +use proptest::prelude::*; +use std::collections::HashSet; + +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::query::backend::{execute, Bag, JoinCond, Predicate, QueryOp}; + +/// Generate a random structure with given number of sorts. +fn arb_structure(num_sorts: usize, max_elements_per_sort: usize) -> impl Strategy { + // For each sort, generate a set of element indices + let sort_elements = prop::collection::vec( + prop::collection::btree_set(0u64..1000, 0..=max_elements_per_sort), + num_sorts, + ); + + sort_elements.prop_map(move |elements_per_sort| { + let mut structure = Structure::new(num_sorts); + for (sort_idx, elements) in elements_per_sort.into_iter().enumerate() { + for elem in elements { + structure.carriers[sort_idx].insert(elem); + } + } + structure + }) +} + +/// Reference implementation for Scan: iterate all elements +fn reference_scan(structure: &Structure, sort_idx: usize) -> HashSet> { + let mut result = HashSet::new(); + if let Some(carrier) = structure.carriers.get(sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)]); + } + } + result +} + +/// Reference implementation for Filter +fn reference_filter( + input: &HashSet>, + pred: &Predicate, + _structure: &Structure, +) -> HashSet> { + input + .iter() + .filter(|tuple| reference_eval_predicate(pred, tuple)) + .cloned() + .collect() +} + +fn reference_eval_predicate(pred: &Predicate, tuple: &[Slid]) -> bool { + match pred { + Predicate::True => true, + Predicate::False => false, + Predicate::ColEqConst { col, val } => tuple.get(*col) == Some(val), + Predicate::ColEqCol { left, right } => { + tuple.get(*left) == tuple.get(*right) && tuple.get(*left).is_some() + } + Predicate::And(a, b) => { + reference_eval_predicate(a, tuple) && reference_eval_predicate(b, tuple) + } + Predicate::Or(a, b) => { + reference_eval_predicate(a, tuple) || reference_eval_predicate(b, tuple) + } + Predicate::FuncEq { .. } => true, // Skip function predicates in reference (need structure access) + Predicate::FuncEqConst { .. } => true, // Skip function predicates in reference + } +} + +/// Reference implementation for Cross Join +fn reference_cross_join( + left: &HashSet>, + right: &HashSet>, +) -> HashSet> { + let mut result = HashSet::new(); + for l in left { + for r in right { + let mut combined = l.clone(); + combined.extend(r.iter().cloned()); + result.insert(combined); + } + } + result +} + +/// Reference implementation for Union +fn reference_union( + left: &HashSet>, + right: &HashSet>, +) -> HashSet> { + left.union(right).cloned().collect() +} + +/// Convert Bag to HashSet (ignoring multiplicities, for comparison) +fn bag_to_set(bag: &Bag) -> HashSet> { + bag.iter() + .filter(|(_, mult)| **mult > 0) + .map(|(tuple, _)| tuple.clone()) + .collect() +} + +/// Generate a random predicate +fn arb_predicate() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0usize..3, 0usize..100).prop_map(|(col, val)| Predicate::ColEqConst { + col, + val: Slid::from_usize(val), + }), + (0usize..3, 0usize..3).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + ] +} + +/// Generate a base query (no recursion) +fn arb_base_query() -> impl Strategy { + prop_oneof![ + (0usize..3).prop_map(|sort_idx| QueryOp::Scan { sort_idx }), + Just(QueryOp::Empty), + prop::collection::vec(0usize..100, 1..=2) + .prop_map(|tuple| QueryOp::Constant { + tuple: tuple.into_iter().map(Slid::from_usize).collect() + }), + ] +} + +/// Generate a random query plan using prop_recursive +fn arb_query_op() -> impl Strategy { + arb_base_query().prop_recursive( + 3, // max depth + 64, // max nodes + 10, // items per collection + |inner| { + prop_oneof![ + // Keep some base cases at each level + arb_base_query(), + // Unary operations + (inner.clone(), arb_predicate()) + .prop_map(|(input, pred)| QueryOp::Filter { + input: Box::new(input), + pred, + }), + inner.clone().prop_map(|input| QueryOp::Distinct { + input: Box::new(input), + }), + inner.clone().prop_map(|input| QueryOp::Negate { + input: Box::new(input), + }), + // Binary operations + (inner.clone(), inner.clone()) + .prop_map(|(left, right)| QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }), + (inner.clone(), inner) + .prop_map(|(left, right)| QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }), + ] + } + ) +} + +proptest! { + /// Test that optimizer preserves semantics for randomly generated plans. + #[test] + fn test_optimize_preserves_semantics( + structure in arb_structure(3, 5), + plan in arb_query_op(), + ) { + use geolog::query::optimize; + + let unoptimized_result = execute(&plan, &structure); + let optimized = optimize(&plan); + let optimized_result = execute(&optimized, &structure); + + prop_assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + } + + /// Test that Scan produces all elements of a sort. + #[test] + fn test_scan_correct( + structure in arb_structure(3, 10), + sort_idx in 0usize..3, + ) { + let plan = QueryOp::Scan { sort_idx }; + let result = execute(&plan, &structure); + + let reference = reference_scan(&structure, sort_idx); + let actual = bag_to_set(&result); + + prop_assert_eq!(actual, reference); + } + + /// Test that Filter with True predicate returns all input. + #[test] + fn test_filter_true_is_identity( + structure in arb_structure(2, 8), + sort_idx in 0usize..2, + ) { + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::True, + }; + + let scan_result = execute(&scan, &structure); + let filter_result = execute(&filter, &structure); + + prop_assert_eq!(bag_to_set(&scan_result), bag_to_set(&filter_result)); + } + + /// Test that Filter with False predicate returns empty. + #[test] + fn test_filter_false_is_empty( + structure in arb_structure(2, 8), + sort_idx in 0usize..2, + ) { + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + + let result = execute(&filter, &structure); + prop_assert!(result.is_empty()); + } + + /// Test that cross join produces correct cardinality. + #[test] + fn test_cross_join_cardinality( + structure in arb_structure(2, 5), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left.clone()), + right: Box::new(right.clone()), + cond: JoinCond::Cross, + }; + + let left_result = execute(&left, &structure); + let right_result = execute(&right, &structure); + let join_result = execute(&join, &structure); + + let expected_size = left_result.len() * right_result.len(); + prop_assert_eq!(join_result.len(), expected_size); + } + + /// Test that cross join matches reference. + #[test] + fn test_cross_join_correct( + structure in arb_structure(2, 4), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + let result = execute(&join, &structure); + + let ref_left = reference_scan(&structure, 0); + let ref_right = reference_scan(&structure, 1); + let reference = reference_cross_join(&ref_left, &ref_right); + + prop_assert_eq!(bag_to_set(&result), reference); + } + + /// Test that Union is commutative. + #[test] + fn test_union_commutative( + structure in arb_structure(2, 5), + ) { + let a = QueryOp::Scan { sort_idx: 0 }; + let b = QueryOp::Scan { sort_idx: 1 }; + + let union_ab = QueryOp::Union { + left: Box::new(a.clone()), + right: Box::new(b.clone()), + }; + let union_ba = QueryOp::Union { + left: Box::new(b), + right: Box::new(a), + }; + + let result_ab = execute(&union_ab, &structure); + let result_ba = execute(&union_ba, &structure); + + // As sets, they should be equal (multiplicities may differ) + prop_assert_eq!(bag_to_set(&result_ab), bag_to_set(&result_ba)); + } + + /// Test that Distinct is idempotent. + #[test] + fn test_distinct_idempotent( + structure in arb_structure(1, 8), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let distinct1 = QueryOp::Distinct { + input: Box::new(scan), + }; + let distinct2 = QueryOp::Distinct { + input: Box::new(distinct1.clone()), + }; + + let result1 = execute(&distinct1, &structure); + let result2 = execute(&distinct2, &structure); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2)); + } + + /// Test that Empty produces no results. + #[test] + fn test_empty_is_empty( + structure in arb_structure(1, 5), + ) { + let empty = QueryOp::Empty; + let result = execute(&empty, &structure); + prop_assert!(result.is_empty()); + } + + /// Test that Constant produces exactly one tuple. + #[test] + fn test_constant_singleton( + tuple in prop::collection::vec(0usize..100, 1..=3), + ) { + let structure = Structure::new(1); // Empty structure + let slid_tuple: Vec = tuple.iter().map(|&i| Slid::from_usize(i)).collect(); + let constant = QueryOp::Constant { tuple: slid_tuple.clone() }; + let result = execute(&constant, &structure); + + prop_assert_eq!(result.len(), 1); + prop_assert!(result.tuples.contains_key(&slid_tuple)); + } + + /// Test filter with constant equality. + #[test] + fn test_filter_col_eq_const( + structure in arb_structure(1, 10), + filter_val in 0usize..1000, + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(filter_val), + }, + }; + + let scan_result = execute(&scan, &structure); + let filter_result = execute(&filter, &structure); + + // Reference: manually filter + let reference: HashSet> = bag_to_set(&scan_result) + .into_iter() + .filter(|tuple| tuple[0] == Slid::from_usize(filter_val)) + .collect(); + + prop_assert_eq!(bag_to_set(&filter_result), reference); + } + + /// Test filter matches reference implementation for compound predicates. + #[test] + fn test_filter_matches_reference( + structure in arb_structure(1, 10), + ) { + // Get all elements as single-column tuples + let input = reference_scan(&structure, 0); + + // Test with True predicate + let filtered_true = reference_filter(&input, &Predicate::True, &structure); + prop_assert_eq!(filtered_true, input.clone()); + + // Test with False predicate + let filtered_false = reference_filter(&input, &Predicate::False, &structure); + prop_assert!(filtered_false.is_empty()); + } + + /// Test union matches reference implementation. + #[test] + fn test_union_matches_reference( + structure in arb_structure(2, 5), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let union = QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }; + + let result = execute(&union, &structure); + + let ref_left = reference_scan(&structure, 0); + let ref_right = reference_scan(&structure, 1); + let reference = reference_union(&ref_left, &ref_right); + + prop_assert_eq!(bag_to_set(&result), reference); + } + + /// Test that Negate(Negate(x)) = x. + #[test] + fn test_negate_involutive( + structure in arb_structure(1, 8), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan.clone()), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + + let original = execute(&scan, &structure); + let double_negated = execute(&negate2, &structure); + + prop_assert_eq!(bag_to_set(&original), bag_to_set(&double_negated)); + } + + /// Test that Project preserves all tuples (just reduces columns). + #[test] + fn test_project_same_size( + structure in arb_structure(2, 4), + ) { + // Cross join creates (a, b) tuples + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + // Project to first column only + let project = QueryOp::Project { + input: Box::new(join.clone()), + columns: vec![0], + }; + + let join_result = execute(&join, &structure); + let project_result = execute(&project, &structure); + + // Projected result should have same or fewer distinct tuples + // (could be fewer due to duplicate first elements) + prop_assert!(bag_to_set(&project_result).len() <= join_result.len()); + } +} + +#[test] +fn test_basic_operations_smoke() { + // Simple smoke test to ensure the proptest infrastructure works + let mut structure = Structure::new(2); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + structure.carriers[1].insert(10); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let result = execute(&scan, &structure); + assert_eq!(result.len(), 2); +} + +#[test] +fn test_pattern_compile_scan() { + use geolog::query::Pattern; + + // Create a structure with one sort + let mut structure = Structure::new(1); + structure.carriers[0].insert(5); + structure.carriers[0].insert(10); + structure.carriers[0].insert(15); + + // Create a simple pattern: scan sort 0, no constraints, return element + let pattern = Pattern::new(0); + + // Compile and execute + let plan = pattern.compile(); + let result = execute(&plan, &structure); + + // Should get all 3 elements + assert_eq!(result.len(), 3); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(5)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(10)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(15)])); +} + +/// Test that optimize preserves semantics for filter with True predicate. +#[test] +fn test_optimize_filter_true_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + structure.carriers[0].insert(3); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::True, + }; + + let unoptimized_result = execute(&filter, &structure); + let optimized = optimize(&filter); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that optimize preserves semantics for filter with False predicate. +#[test] +fn test_optimize_filter_false_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + + let unoptimized_result = execute(&filter, &structure); + let optimized = optimize(&filter); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + assert!(unoptimized_result.is_empty()); + assert!(optimized_result.is_empty()); +} + +/// Test that double negation optimization preserves semantics. +#[test] +fn test_optimize_double_negate_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(10); + structure.carriers[0].insert(20); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + + let unoptimized_result = execute(&negate2, &structure); + let optimized = optimize(&negate2); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that union with empty optimization preserves semantics. +#[test] +fn test_optimize_union_empty_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(5); + structure.carriers[0].insert(15); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let union = QueryOp::Union { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + }; + + let unoptimized_result = execute(&union, &structure); + let optimized = optimize(&union); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that join with empty optimization preserves semantics. +#[test] +fn test_optimize_join_empty_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(2); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let join = QueryOp::Join { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + cond: JoinCond::Cross, + }; + + let unoptimized_result = execute(&join, &structure); + let optimized = optimize(&join); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + assert!(unoptimized_result.is_empty()); + assert!(optimized_result.is_empty()); +} + +#[test] +fn test_pattern_compile_with_function_filter() { + use geolog::query::Pattern; + use geolog::universe::Universe; + + // Create a structure with one sort and properly add elements + let mut structure = Structure::new(1); + let mut universe = Universe::new(); + + // Add 3 elements to sort 0 + let (slid0, _) = structure.add_element(&mut universe, 0); + let (slid1, _) = structure.add_element(&mut universe, 0); + let (slid2, _) = structure.add_element(&mut universe, 0); + + // Initialize function storage for 1 function with domain sort 0 + structure.init_functions(&[Some(0)]); + + // Function 0: maps elem0→slid10, elem1→slid20, elem2→slid10 + // We need target elements to map to - add them to a different "virtual" sort + // For simplicity, we'll use constant Slid values that represent the results + let slid10 = Slid::from_usize(10); + let slid20 = Slid::from_usize(20); + + structure.define_function(0, slid0, slid10).unwrap(); + structure.define_function(0, slid1, slid20).unwrap(); + structure.define_function(0, slid2, slid10).unwrap(); + + // Pattern: find elements where func(elem) = 10 + let pattern = Pattern::new(0) + .filter(0, slid10); + + // Compile and execute + let plan = pattern.compile(); + let result = execute(&plan, &structure); + + // Should get elements 0 and 2 (both map to 10) + assert_eq!(result.len(), 2); + assert!(result.tuples.contains_key(&vec![slid0])); + assert!(result.tuples.contains_key(&vec![slid2])); + // Element 1 (maps to 20) should not be included + assert!(!result.tuples.contains_key(&vec![slid1])); +} + +// ============================================================================ +// DBSP Temporal Operator Property Tests +// ============================================================================ + +use geolog::query::backend::StreamContext; +use geolog::query::backend::execute_stream; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// Delay at timestep 0 always produces empty output + #[test] + fn test_delay_initial_empty_proptest( + structure in arb_structure(2, 5), + sort_idx in 0usize..2, + ) { + let mut ctx = StreamContext::new(); + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Scan { sort_idx }), + state_id: 0, + }; + + // At timestep 0, delay should output empty + let result = execute_stream(&plan, &structure, &mut ctx); + prop_assert!(result.is_empty(), "Delay at t=0 should be empty"); + } + + /// Delay outputs previous timestep's value + #[test] + fn test_delay_outputs_previous_proptest( + structure in arb_structure(1, 8), + ) { + let mut ctx = StreamContext::new(); + let scan = QueryOp::Scan { sort_idx: 0 }; + let delay = QueryOp::Delay { + input: Box::new(scan.clone()), + state_id: 0, + }; + + // Step 0: capture input, output empty + let _ = execute_stream(&delay, &structure, &mut ctx); + ctx.step(); + + // Step 1: should output what was input at step 0 + let result = execute_stream(&delay, &structure, &mut ctx); + let expected = reference_scan(&structure, 0); + + prop_assert_eq!(bag_to_set(&result), expected, "Delay should output previous input"); + } + + /// ∫(δ(x)) = x for stable input (fundamental DBSP identity) + #[test] + fn test_integrate_diff_identity( + structure in arb_structure(1, 10), + ) { + let mut ctx = StreamContext::new(); + + // ∫(δ(scan)) + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + + // Step 0: should equal scan + let result = execute_stream(&plan, &structure, &mut ctx); + let expected = reference_scan(&structure, 0); + prop_assert_eq!(bag_to_set(&result), expected.clone(), "∫(δ(scan)) should equal scan at t=0"); + + ctx.step(); + + // Step 1: still should equal scan (no changes) + let result = execute_stream(&plan, &structure, &mut ctx); + prop_assert_eq!(bag_to_set(&result), expected, "∫(δ(scan)) should equal scan at t=1"); + } + + /// Diff of stable input becomes empty after first timestep + #[test] + fn test_diff_stable_becomes_empty( + structure in arb_structure(1, 8), + ) { + let mut ctx = StreamContext::new(); + let plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // Step 0: diff = scan - {} = scan (all elements are "new") + let result0 = execute_stream(&plan, &structure, &mut ctx); + let expected0 = reference_scan(&structure, 0); + prop_assert_eq!(bag_to_set(&result0), expected0); + ctx.step(); + + // Step 1: diff = scan - scan = {} (no changes) + let result1 = execute_stream(&plan, &structure, &mut ctx); + prop_assert!(result1.is_empty(), "Diff of stable input should be empty"); + } + + /// Integrate accumulates multiplicities across timesteps + #[test] + fn test_integrate_accumulates( + tuple in prop::collection::vec(0usize..100, 1..=2), + num_steps in 1usize..5, + ) { + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let slid_tuple: Vec = tuple.iter().map(|&i| Slid::from_usize(i)).collect(); + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { tuple: slid_tuple.clone() }), + state_id: 0, + }; + + for step in 0..num_steps { + let result = execute_stream(&plan, &structure, &mut ctx); + + // After step i, multiplicity should be i+1 + let expected_mult = (step + 1) as i64; + let actual_mult = result.tuples.get(&slid_tuple).copied().unwrap_or(0); + prop_assert_eq!(actual_mult, expected_mult, "Multiplicity at step {}", step); + + ctx.step(); + } + } + + /// Negate and Integrate compose correctly: ∫(negate(δ(x))) + ∫(δ(x)) = 0 + #[test] + fn test_negate_integrate_diff_cancellation( + structure in arb_structure(1, 5), + ) { + let mut ctx1 = StreamContext::new(); + let mut ctx2 = StreamContext::new(); + + let diff = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // ∫(δ(scan)) + let int_pos = QueryOp::Integrate { + input: Box::new(diff.clone()), + state_id: 1, + }; + + // ∫(negate(δ(scan))) + let int_neg = QueryOp::Integrate { + input: Box::new(QueryOp::Negate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 2, + }), + }), + state_id: 3, + }; + + // Execute both for a couple steps + let result_pos = execute_stream(&int_pos, &structure, &mut ctx1); + let result_neg = execute_stream(&int_neg, &structure, &mut ctx2); + + // Union should cancel to zero + let combined = result_pos.union(&result_neg); + prop_assert!(combined.is_empty() || combined.iter().all(|(_, m)| *m == 0), + "∫(δ) + ∫(¬δ) should cancel"); + } + + /// DBSP filter distributes: Filter(Diff(x)) = Diff(Filter(x)) for stable input + /// (This is a key DBSP optimization: incrementalize then filter = filter then incrementalize) + #[test] + fn test_dbsp_filter_distribution( + structure in arb_structure(1, 10), + filter_val in 0usize..100, + ) { + let filter_slid = Slid::from_usize(filter_val); + let mut ctx1 = StreamContext::new(); + let mut ctx2 = StreamContext::new(); + + // Filter(Diff(Scan)) + let plan1 = QueryOp::Filter { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + pred: Predicate::ColEqConst { col: 0, val: filter_slid }, + }; + + // Diff(Filter(Scan)) + let plan2 = QueryOp::Diff { + input: Box::new(QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { col: 0, val: filter_slid }, + }), + state_id: 1, + }; + + // Both should produce same results + let result1 = execute_stream(&plan1, &structure, &mut ctx1); + let result2 = execute_stream(&plan2, &structure, &mut ctx2); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2), + "Filter(Diff(x)) = Diff(Filter(x))"); + + ctx1.step(); + ctx2.step(); + + // Should remain equal at next timestep + let result1 = execute_stream(&plan1, &structure, &mut ctx1); + let result2 = execute_stream(&plan2, &structure, &mut ctx2); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2), + "Filter(Diff(x)) = Diff(Filter(x)) at t=1"); + } +} diff --git a/tests/proptest_solver.rs b/tests/proptest_solver.rs new file mode 100644 index 0000000..04016b6 --- /dev/null +++ b/tests/proptest_solver.rs @@ -0,0 +1,382 @@ +//! Property tests for the geometric logic solver +//! +//! Tests key properties: +//! - solve(trivial_theory) always finds a model (empty structure) +//! - solve(inconsistent_theory) is always UNSAT +//! - enumerate_models(empty, T) = solve(T) + +mod generators; + +use std::rc::Rc; + +use geolog::core::{ + Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory, +}; +use geolog::solver::{solve, enumerate_models, Budget, EnumerationResult}; +use geolog::universe::Universe; +use proptest::prelude::*; + +// ============================================================================ +// Theory Generators +// ============================================================================ + +/// Generate a theory with no axioms (trivially satisfiable by empty model) +fn arb_trivial_theory() -> impl Strategy> { + (1usize..=5).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Trivial".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }) + }) +} + +/// Generate an inconsistent theory (True ⊢ False) +fn arb_inconsistent_theory() -> impl Strategy> { + (1usize..=3).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }) + }) +} + +/// Generate a theory with an existential axiom +fn arb_existential_theory() -> impl Strategy> { + (1usize..=3, 0usize..=2).prop_map(|(num_sorts, rel_count)| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + // Add unary relations + for i in 0..rel_count { + sig.add_relation(format!("R{}", i), DerivedSort::Base(0)); + } + + let mut axioms = vec![]; + + // Add unconditional existential: |- ∃x:S0. x = x + // This just requires creating at least one element + if num_sorts > 0 { + axioms.push(Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }); + } + + // Generate axiom names + let axiom_names: Vec = (0..axioms.len()) + .map(|i| format!("ax/exists_{}", i)) + .collect(); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Existential".to_string(), + signature: sig, + axioms, + axiom_names, + }, + }) + }) +} + +// ============================================================================ +// Property Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(100))] + + /// Trivial theories (no axioms) are always solved with empty model + #[test] + fn trivial_theory_always_solved(theory in arb_trivial_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Empty model should have all carriers empty + for sort_idx in 0..model.num_sorts() { + prop_assert_eq!(model.carrier_size(sort_idx), 0); + } + } + _ => prop_assert!(false, "Trivial theory should always be solved"), + } + } + + /// Inconsistent theories (True ⊢ False) are always UNSAT + #[test] + fn inconsistent_theory_always_unsat(theory in arb_inconsistent_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Unsat { .. } => { + // Expected! + } + _ => prop_assert!(false, "Inconsistent theory should always be UNSAT"), + } + } + + /// solve(T) equals enumerate_models(empty, T) + #[test] + fn solve_equals_enumerate_empty(theory in arb_trivial_theory()) { + let budget = Budget::quick(); + + // Method 1: solve + let result1 = solve(theory.clone(), budget.clone()); + + // Method 2: enumerate_models with empty base + let num_sorts = theory.theory.signature.sorts.len(); + let empty_base = geolog::core::Structure::new(num_sorts); + let result2 = enumerate_models(empty_base, Universe::new(), theory, budget); + + // Both should produce equivalent results (both find models or both fail) + match (&result1, &result2) { + (EnumerationResult::Found { .. }, EnumerationResult::Found { .. }) => { + // Both found - good! + } + (EnumerationResult::Unsat { .. }, EnumerationResult::Unsat { .. }) => { + // Both UNSAT - good! + } + (EnumerationResult::Incomplete { .. }, EnumerationResult::Incomplete { .. }) => { + // Both incomplete - acceptable + } + _ => prop_assert!(false, "solve and enumerate_models should produce equivalent results"), + } + } + + /// Existential theory creates at least one element + #[test] + fn existential_creates_elements(theory in arb_existential_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // If theory has existential axioms, should have at least one element + if !theory.theory.axioms.is_empty() { + let has_elements = (0..model.num_sorts()) + .any(|s| model.carrier_size(s) > 0); + prop_assert!(has_elements, "Existential theory should have at least one element"); + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable - budget might be too small + } + EnumerationResult::Unsat { .. } => { + prop_assert!(false, "Existential theory should not be UNSAT"); + } + } + } +} + +/// Generate a theory with relations and implication axioms (Horn clauses) +fn arb_relation_theory() -> impl Strategy> { + (1usize..=2, 1usize..=3).prop_map(|(num_sorts, num_rels)| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + // Add unary relations on first sort + for i in 0..num_rels { + sig.add_relation(format!("R{}", i), DerivedSort::Base(0)); + } + + let mut axioms = vec![]; + + // Add existential axiom to ensure at least one element + axioms.push(Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // R0(x) + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }); + + // If we have R1, add Horn clause: R0(x) |- R1(x) + if num_rels > 1 { + let ctx = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + axioms.push(Sequent { + context: ctx, + premise: Formula::Rel( + 0, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + conclusion: Formula::Rel( + 1, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + }); + } + + let axiom_names: Vec = (0..axioms.len()) + .map(|i| format!("ax/rel_{}", i)) + .collect(); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Relations".to_string(), + signature: sig, + axioms, + axiom_names, + }, + }) + }) +} + +/// Generate a theory with a function and equality axiom +fn arb_function_theory() -> impl Strategy> { + (1usize..=2).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + + // Add function f : S0 -> S0 + sig.add_function("f".to_string(), DerivedSort::Base(0), DerivedSort::Base(0)); + + // Add unconditional existential: |- ∃x:S0. f(x) = x + // This requires creating at least one fixed point + // BUT we need the tensor compiler to handle f(x) = x correctly + let axioms = vec![ + Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }, + ]; + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "FunctionTheory".to_string(), + signature: sig, + axioms, + axiom_names: vec!["ax/fixpoint".to_string()], + }, + }) + }) +} + +// ============================================================================ +// Focused Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// Function theories with fixed-point existentials work + #[test] + fn function_fixed_point_theory(theory in arb_function_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Should have created at least one element that is its own fixed point + if !theory.theory.axioms.is_empty() { + let has_elements = (0..model.num_sorts()) + .any(|s| model.carrier_size(s) > 0); + prop_assert!(has_elements, "Function theory should have at least one element"); + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable - budget might be too small + } + EnumerationResult::Unsat { .. } => { + // This is acceptable! The axiom ∃x. f(x)=x might be UNSAT + // if we can't construct such an x with the solver's strategy. + // Actually this shouldn't happen for a fresh function. + } + } + } + + /// Relation theories with Horn clauses propagate correctly + #[test] + fn relation_horn_clause_propagation(theory in arb_relation_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Should have at least one element in R0 + prop_assert!(model.carrier_size(0) > 0, "Should have elements"); + + // If theory has 2+ relations and a Horn clause R0(x) |- R1(x), + // then any element in R0 should also be in R1 + if theory.theory.signature.relations.len() > 1 { + // Check that R1 is populated + // (We can't easily verify the full Horn clause semantics here + // without access to relation contents, but we can check it runs) + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable + } + EnumerationResult::Unsat { .. } => { + prop_assert!(false, "Relation theory should not be UNSAT"); + } + } + } + + /// Budget limits are respected + #[test] + fn budget_limits_respected(theory in arb_existential_theory()) { + // Very small budget + let tiny_budget = Budget::new(1, 1); + let result = solve(theory.clone(), tiny_budget); + + // Should either solve quickly or timeout/incomplete + match result { + EnumerationResult::Found { time_ms, .. } => { + // If solved, should be fast + prop_assert!(time_ms < 100.0, "Solved within reasonable time"); + } + EnumerationResult::Incomplete { time_ms, .. } => { + // Should respect budget + prop_assert!(time_ms < 100.0, "Incomplete within reasonable time"); + } + EnumerationResult::Unsat { time_ms } => { + // Should respect budget + prop_assert!(time_ms < 100.0, "UNSAT within reasonable time"); + } + } + } +} diff --git a/tests/proptest_structure.rs b/tests/proptest_structure.rs new file mode 100644 index 0000000..0453fff --- /dev/null +++ b/tests/proptest_structure.rs @@ -0,0 +1,239 @@ +//! Property tests for Structure invariants + +mod generators; + +use generators::{StructureOp, StructureParams, check_structure_invariants}; +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::universe::Universe; +use proptest::prelude::*; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(2048))] + /// Empty structure maintains invariants + #[test] + fn empty_structure_invariants(num_sorts in 1usize..10) { + let structure = Structure::new(num_sorts); + prop_assert!(check_structure_invariants(&structure).is_ok()); + prop_assert_eq!(structure.len(), 0); + prop_assert_eq!(structure.num_sorts(), num_sorts); + } + + /// Structure maintains invariants after adding elements + #[test] + fn structure_invariants_after_adds( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 4, + max_elements_per_sort: 10, + }) + ) { + prop_assert!(check_structure_invariants(&structure).is_ok()); + } + + /// add_element correctly sets up bijection + #[test] + fn add_element_bijection( + num_sorts in 1usize..5, + sort_id in any::(), + ) { + let sort_id = sort_id.index(num_sorts); + + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + let (slid, luid) = structure.add_element(&mut universe, sort_id); + + // Forward: slid → luid + prop_assert_eq!(structure.luids[slid.index()], luid); + + // Reverse: luid → slid + prop_assert_eq!(structure.luid_to_slid.get(&luid), Some(&slid)); + prop_assert_eq!(structure.lookup_luid(luid), Some(slid)); + + // Sort is correct + prop_assert_eq!(structure.sorts[slid.index()], sort_id); + + // Carrier contains the element + prop_assert!(structure.carriers[sort_id].contains(slid.index() as u64)); + } + + /// Carrier membership is exclusive (element in exactly one carrier) + #[test] + fn carrier_membership_exclusive( + ops in generators::arb_structure_ops(5, 20) + ) { + let mut universe = Universe::new(); + let mut structure = Structure::new(5); + + for op in ops { + match op { + StructureOp::AddElement { sort_id } => { + structure.add_element(&mut universe, sort_id); + } + } + } + + // Check each element appears in exactly one carrier + for slid in 0..structure.len() { + let sort_id = structure.sorts[slid]; + let mut found_in = Vec::new(); + + for (carrier_id, carrier) in structure.carriers.iter().enumerate() { + if carrier.contains(slid as u64) { + found_in.push(carrier_id); + } + } + + prop_assert_eq!( + found_in.len(), 1, + "slid {} should be in exactly one carrier, found in {:?}", + slid, found_in + ); + prop_assert_eq!(found_in[0], sort_id); + } + } + + /// sort_local_id is consistent with carrier rank + #[test] + fn sort_local_id_consistency( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + for slid_idx in 0..structure.len() { + let slid = Slid::from_usize(slid_idx); + let sort_id = structure.sorts[slid_idx]; + let sort_slid = structure.sort_local_id(slid); + + // sort_slid should be in range [0, carrier_size) + let carrier_size = structure.carrier_size(sort_id); + prop_assert!( + sort_slid.index() < carrier_size, + "sort_slid {} should be < carrier_size {}", + sort_slid, carrier_size + ); + } + } + + /// carrier_size matches number of elements with that sort + #[test] + fn carrier_size_matches_count( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 4, + max_elements_per_sort: 12, + }) + ) { + for sort_id in 0..structure.num_sorts() { + let carrier_size = structure.carrier_size(sort_id); + let count = structure.sorts.iter().filter(|&&s| s == sort_id).count(); + prop_assert_eq!(carrier_size, count); + } + } + + /// add_element_with_luid preserves existing element identity + #[test] + fn add_with_existing_luid_identity(num_sorts in 1usize..5) { + let mut universe = Universe::new(); + let mut structure1 = Structure::new(num_sorts); + + // Create element in first structure + let (slid1, luid1) = structure1.add_element(&mut universe, 0); + + // Create second structure and add element with same luid + let mut structure2 = Structure::new(num_sorts); + let slid2 = structure2.add_element_with_luid(luid1, 0); + + // Should have same luid + prop_assert_eq!(structure2.luids[slid2.index()], luid1); + prop_assert_eq!(structure2.lookup_luid(luid1), Some(slid2)); + + // Both structures should maintain invariants + prop_assert!(check_structure_invariants(&structure1).is_ok()); + prop_assert!(check_structure_invariants(&structure2).is_ok()); + + let _ = slid1; // silence warning + } + + /// get_luid returns correct luid for slid + #[test] + fn get_luid_correctness( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 10, + }) + ) { + for slid_idx in 0..structure.len() { + let slid = Slid::from_usize(slid_idx); + let luid = structure.get_luid(slid); + prop_assert_eq!(structure.luids[slid_idx], luid); + prop_assert_eq!(structure.lookup_luid(luid), Some(slid)); + } + } + + /// Total elements equals sum of carrier sizes + #[test] + fn total_equals_carrier_sum( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 5, + max_elements_per_sort: 8, + }) + ) { + let carrier_total: usize = (0..structure.num_sorts()) + .map(|s| structure.carrier_size(s)) + .sum(); + + prop_assert_eq!(structure.len(), carrier_total); + } + + /// Sequential add_elements produce sequential slids + #[test] + fn sequential_slids(ops in generators::arb_structure_ops(3, 15)) { + let mut universe = Universe::new(); + let mut structure = Structure::new(3); + let mut expected_slid_idx: usize = 0; + + for op in ops { + match op { + StructureOp::AddElement { sort_id } => { + let (slid, _) = structure.add_element(&mut universe, sort_id); + prop_assert_eq!(slid, Slid::from_usize(expected_slid_idx)); + expected_slid_idx += 1; + } + } + } + + prop_assert_eq!(structure.len(), expected_slid_idx); + } +} + +// Additional focused tests + +proptest! { + /// Function initialization creates correct storage + #[test] + fn function_init_correct_size( + (mut structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }) + ) { + // Initialize functions with domain sort IDs + let domain_sort_ids: Vec> = vec![Some(0), Some(1), None]; + structure.init_functions(&domain_sort_ids); + + prop_assert_eq!(structure.num_functions(), 3); + + // Check sizes match carrier sizes + prop_assert_eq!( + structure.functions[0].len(), + structure.carrier_size(0) + ); + prop_assert_eq!( + structure.functions[1].len(), + structure.carrier_size(1) + ); + // Function 2 has None domain, so size should be 0 + prop_assert_eq!(structure.functions[2].len(), 0); + } +} diff --git a/tests/proptest_tensor.rs b/tests/proptest_tensor.rs new file mode 100644 index 0000000..10961c5 --- /dev/null +++ b/tests/proptest_tensor.rs @@ -0,0 +1,476 @@ +//! Property tests for tensor operations +//! +//! Tests algebraic properties of tensor operations using proptest. + +mod generators; + +use generators::{TensorParams, arb_sparse_tensor, arb_tensor_pair_same_dims, arb_sparse_tensor_with_dims}; +use geolog::tensor::{SparseTensor, TensorExpr, conjunction, exists, conjunction_all, disjunction_all}; +use proptest::prelude::*; + +// ============================================================================ +// SparseTensor Basic Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(1024))] + + /// Empty tensor has no tuples + #[test] + fn empty_tensor_is_empty(dims in proptest::collection::vec(1usize..10, 0..4)) { + let tensor = SparseTensor::empty(dims.clone()); + prop_assert!(tensor.is_empty()); + prop_assert_eq!(tensor.len(), 0); + prop_assert_eq!(tensor.dims, dims); + } + + /// Scalar true contains the empty tuple + #[test] + fn scalar_true_contains_empty(_seed in any::()) { + let tensor = SparseTensor::scalar(true); + prop_assert!(tensor.contains(&[])); + prop_assert_eq!(tensor.len(), 1); + prop_assert!(tensor.dims.is_empty()); + } + + /// Scalar false is empty + #[test] + fn scalar_false_is_empty(_seed in any::()) { + let tensor = SparseTensor::scalar(false); + prop_assert!(!tensor.contains(&[])); + prop_assert!(tensor.is_empty()); + } + + /// Insert/remove roundtrip + #[test] + fn insert_remove_roundtrip( + dims in proptest::collection::vec(1usize..5, 1..3), + tuple_idx in any::(), + ) { + let mut tensor = SparseTensor::empty(dims.clone()); + + // Generate a valid tuple + let tuple: Vec = dims.iter() + .map(|&d| tuple_idx.index(d.max(1))) + .collect(); + + prop_assert!(!tensor.contains(&tuple)); + tensor.insert(tuple.clone()); + prop_assert!(tensor.contains(&tuple)); + tensor.remove(&tuple); + prop_assert!(!tensor.contains(&tuple)); + } + + /// Generated tensor has valid tuples (within dimension bounds) + #[test] + fn generated_tensor_valid_tuples( + tensor in arb_sparse_tensor(TensorParams::default()) + ) { + for tuple in tensor.iter() { + prop_assert_eq!(tuple.len(), tensor.dims.len()); + for (i, &val) in tuple.iter().enumerate() { + prop_assert!(val < tensor.dims[i], "tuple value {} >= dim {}", val, tensor.dims[i]); + } + } + } +} + +// ============================================================================ +// TensorExpr Product Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(512))] + + /// Product of empty tensors is empty + #[test] + fn product_with_empty_is_empty( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let empty = SparseTensor::empty(vec![3]); + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(tensor), + TensorExpr::leaf(empty), + ]); + let result = expr.materialize(); + prop_assert!(result.is_empty()); + } + + /// Product with scalar true is identity (dims extended but tuples preserved) + #[test] + fn product_with_scalar_true( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let scalar_true = SparseTensor::scalar(true); + let orig_len = tensor.len(); + let orig_dims = tensor.dims.clone(); + + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(tensor), + TensorExpr::leaf(scalar_true), + ]); + let result = expr.materialize(); + + prop_assert_eq!(result.len(), orig_len); + prop_assert_eq!(result.dims, orig_dims); + } + + /// Empty product is scalar true + #[test] + fn empty_product_is_scalar_true(_seed in any::()) { + let expr = TensorExpr::Product(vec![]); + let result = expr.materialize(); + prop_assert!(result.contains(&[])); + prop_assert_eq!(result.len(), 1); + } + + /// Product dimensions are concatenation + #[test] + fn product_dims_concatenate( + t1 in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 4, max_tuples: 5 }), + t2 in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 4, max_tuples: 5 }), + ) { + let expected_dims: Vec = t1.dims.iter().chain(t2.dims.iter()).copied().collect(); + + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(t1), + TensorExpr::leaf(t2), + ]); + let result = expr.materialize(); + + prop_assert_eq!(result.dims, expected_dims); + } +} + +// ============================================================================ +// Sum (Disjunction) Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(512))] + + /// Sum is commutative + #[test] + fn sum_commutative( + (t1, t2) in arb_tensor_pair_same_dims(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum1 = TensorExpr::Sum(vec![ + TensorExpr::leaf(t1.clone()), + TensorExpr::leaf(t2.clone()), + ]).materialize(); + + let sum2 = TensorExpr::Sum(vec![ + TensorExpr::leaf(t2), + TensorExpr::leaf(t1), + ]).materialize(); + + prop_assert_eq!(sum1, sum2); + } + + /// Sum is idempotent (T ∨ T = T) + #[test] + fn sum_idempotent( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(tensor.clone()), + TensorExpr::leaf(tensor.clone()), + ]).materialize(); + + prop_assert_eq!(sum, tensor); + } + + /// Sum with empty is identity + #[test] + fn sum_with_empty_is_identity( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let empty = SparseTensor::empty(tensor.dims.clone()); + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(tensor.clone()), + TensorExpr::leaf(empty), + ]).materialize(); + + prop_assert_eq!(sum, tensor); + } + + /// Empty sum is scalar false + #[test] + fn empty_sum_is_scalar_false(_seed in any::()) { + let sum = TensorExpr::Sum(vec![]).materialize(); + prop_assert!(sum.is_empty()); + } + + /// Sum extent is union of extents + #[test] + fn sum_is_union( + (t1, t2) in arb_tensor_pair_same_dims(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(t1.clone()), + TensorExpr::leaf(t2.clone()), + ]).materialize(); + + // Every tuple in t1 should be in sum + for tuple in t1.iter() { + prop_assert!(sum.contains(tuple)); + } + + // Every tuple in t2 should be in sum + for tuple in t2.iter() { + prop_assert!(sum.contains(tuple)); + } + + // Every tuple in sum should be in t1 or t2 + for tuple in sum.iter() { + prop_assert!(t1.contains(tuple) || t2.contains(tuple)); + } + } +} + +// ============================================================================ +// Conjunction Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Conjunction with scalar true is identity (modulo variable naming) + #[test] + fn conjunction_with_true( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let scalar_true = SparseTensor::scalar(true); + + let (expr, result_vars) = conjunction( + TensorExpr::leaf(tensor.clone()), + &vars, + TensorExpr::leaf(scalar_true), + &[], + ); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// Conjunction with scalar false is empty + #[test] + fn conjunction_with_false( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let scalar_false = SparseTensor::scalar(false); + + let (expr, _result_vars) = conjunction( + TensorExpr::leaf(tensor), + &vars, + TensorExpr::leaf(scalar_false), + &[], + ); + let result = expr.materialize(); + + prop_assert!(result.is_empty()); + } + + /// Conjunction is commutative (on shared variables) + #[test] + fn conjunction_commutative( + t1 in arb_sparse_tensor_with_dims(vec![3, 4], 5), + t2 in arb_sparse_tensor_with_dims(vec![4, 5], 5), + ) { + let vars1 = vec!["x".to_string(), "y".to_string()]; + let vars2 = vec!["y".to_string(), "z".to_string()]; + + let (expr1, _vars_result1) = conjunction( + TensorExpr::leaf(t1.clone()), + &vars1, + TensorExpr::leaf(t2.clone()), + &vars2, + ); + + let (expr2, _vars_result2) = conjunction( + TensorExpr::leaf(t2), + &vars2, + TensorExpr::leaf(t1), + &vars1, + ); + + let result1 = expr1.materialize(); + let result2 = expr2.materialize(); + + // Same number of tuples (though variable order may differ) + prop_assert_eq!(result1.len(), result2.len()); + } +} + +// ============================================================================ +// Exists (Contraction) Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Exists on non-existent variable is identity + #[test] + fn exists_nonexistent_var( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(TensorExpr::leaf(tensor.clone()), &vars, "z"); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// Exists reduces arity by 1 + #[test] + fn exists_reduces_arity( + tensor in arb_sparse_tensor_with_dims(vec![3, 4], 8) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(TensorExpr::leaf(tensor), &vars, "y"); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vec!["x"]); + prop_assert_eq!(result.arity(), 1); + prop_assert_eq!(result.dims, vec![3]); + } + + /// Exists on scalar is identity + #[test] + fn exists_on_scalar(value in any::()) { + let tensor = SparseTensor::scalar(value); + let (expr, result_vars) = exists(TensorExpr::leaf(tensor.clone()), &[], "x"); + let result = expr.materialize(); + + prop_assert!(result_vars.is_empty()); + prop_assert_eq!(result, tensor); + } + + /// Double exists is same as single exists (idempotent on same var) + #[test] + fn exists_idempotent( + tensor in arb_sparse_tensor_with_dims(vec![3, 4], 8) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + + let (expr1, vars1) = exists(TensorExpr::leaf(tensor.clone()), &vars, "y"); + let (expr2, vars2) = exists(expr1, &vars1, "y"); + + let result = expr2.materialize(); + + prop_assert_eq!(vars2, vec!["x"]); + prop_assert_eq!(result.arity(), 1); + } +} + +// ============================================================================ +// Fusion Tests (Contract(Product(...))) +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Fused join produces same result as naive evaluation + #[test] + fn fused_join_correctness( + t1 in arb_sparse_tensor_with_dims(vec![5, 5], 10), + t2 in arb_sparse_tensor_with_dims(vec![5, 5], 10), + ) { + let vars1 = vec!["x".to_string(), "y".to_string()]; + let vars2 = vec!["y".to_string(), "z".to_string()]; + + // This creates Contract(Product(...)) which gets fused + let (conj_expr, conj_vars) = conjunction( + TensorExpr::leaf(t1.clone()), + &vars1, + TensorExpr::leaf(t2.clone()), + &vars2, + ); + + let (result_expr, _result_vars) = exists(conj_expr, &conj_vars, "y"); + let result = result_expr.materialize(); + + // Verify result is correct by checking each tuple + for tuple in result.iter() { + let x = tuple[0]; + let z = tuple[1]; + + // Should exist some y such that t1(x,y) and t2(y,z) + let mut found = false; + for y in 0..5 { + if t1.contains(&[x, y]) && t2.contains(&[y, z]) { + found = true; + break; + } + } + prop_assert!(found, "tuple {:?} in result but no witness y", tuple); + } + + // And every valid (x,z) should be in result + for x in 0..5 { + for z in 0..5 { + let mut should_be_in_result = false; + for y in 0..5 { + if t1.contains(&[x, y]) && t2.contains(&[y, z]) { + should_be_in_result = true; + break; + } + } + prop_assert_eq!( + result.contains(&[x, z]), + should_be_in_result, + "({}, {}) expected {} but got {}", + x, z, should_be_in_result, result.contains(&[x, z]) + ); + } + } + } +} + +// ============================================================================ +// Disjunction Helper Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// disjunction_all with empty is scalar false + #[test] + fn disjunction_all_empty(_seed in any::()) { + let (expr, vars) = disjunction_all(vec![]); + let result = expr.materialize(); + + prop_assert!(vars.is_empty()); + prop_assert!(result.is_empty()); + } + + /// disjunction_all with single element is identity + #[test] + fn disjunction_all_single( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = disjunction_all(vec![ + (TensorExpr::leaf(tensor.clone()), vars.clone()) + ]); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// conjunction_all with empty is scalar true + #[test] + fn conjunction_all_empty(_seed in any::()) { + let (expr, vars) = conjunction_all(vec![]); + let result = expr.materialize(); + + prop_assert!(vars.is_empty()); + prop_assert!(result.contains(&[])); + prop_assert_eq!(result.len(), 1); + } +} diff --git a/tests/proptest_universe.rs b/tests/proptest_universe.rs new file mode 100644 index 0000000..4fbb0ec --- /dev/null +++ b/tests/proptest_universe.rs @@ -0,0 +1,159 @@ +//! Property tests for Universe (UUID ↔ Luid bijection) + +mod generators; + +use geolog::id::{Luid, NumericId, Uuid}; +use geolog::universe::Universe; +use proptest::prelude::*; +use std::collections::HashSet; +use tempfile::tempdir; + +proptest! { + /// Interning the same UUID twice returns the same Luid + #[test] + fn intern_idempotent(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid1 = universe.intern(uuid); + let luid2 = universe.intern(uuid); + + prop_assert_eq!(luid1, luid2); + } + + /// Interning then looking up returns the original UUID + #[test] + fn intern_lookup_roundtrip(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid = universe.intern(uuid); + let retrieved = universe.get(luid); + + prop_assert_eq!(retrieved, Some(uuid)); + } + + /// Reverse lookup (UUID → Luid) works correctly + #[test] + fn reverse_lookup_roundtrip(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid = universe.intern(uuid); + let found_luid = universe.lookup(&uuid); + + prop_assert_eq!(found_luid, Some(luid)); + } + + /// After bulk interning, bijection holds for all entries + #[test] + fn bijection_after_bulk_intern(uuids in proptest::collection::vec(generators::arb_uuid(), 1..50)) { + let mut universe = Universe::new(); + + // Intern all UUIDs + let luids: Vec<_> = uuids.iter().map(|&uuid| universe.intern(uuid)).collect(); + + // Forward direction: Luid → UUID + for (&uuid, &luid) in uuids.iter().zip(luids.iter()) { + prop_assert_eq!(universe.get(luid), Some(uuid)); + } + + // Reverse direction: UUID → Luid + for &uuid in &uuids { + prop_assert!(universe.lookup(&uuid).is_some()); + } + + // Uniqueness: unique UUIDs produce unique Luids + let unique_uuids: HashSet<_> = uuids.iter().collect(); + let unique_luids: HashSet<_> = luids.iter().collect(); + // Note: Luids may have fewer unique values if there are duplicate UUIDs + prop_assert!(unique_luids.len() <= unique_uuids.len()); + } + + /// Luids are assigned sequentially starting from 0 + #[test] + fn luids_sequential(count in 1usize..20) { + let mut universe = Universe::new(); + + for i in 0..count { + let uuid = Uuid::now_v7(); + let luid = universe.intern(uuid); + prop_assert_eq!(luid, Luid::from_usize(i), "Luid {} should be {}", luid, i); + } + } + + /// Save and load preserves all mappings + #[test] + fn save_load_roundtrip(uuids in generators::arb_unique_uuids(10)) { + let dir = tempdir().unwrap(); + let path = dir.path().join("universe.bin"); + + // Save + let original_luids: Vec<_>; + { + let mut universe = Universe::with_path(&path); + original_luids = uuids.iter().map(|&uuid| universe.intern(uuid)).collect(); + universe.save().unwrap(); + } + + // Load + { + let loaded = Universe::load(&path).unwrap(); + + // Check all mappings preserved + for (&uuid, &expected_luid) in uuids.iter().zip(original_luids.iter()) { + let retrieved = loaded.get(expected_luid); + prop_assert_eq!(retrieved, Some(uuid)); + + let found_luid = loaded.lookup(&uuid); + prop_assert_eq!(found_luid, Some(expected_luid)); + } + } + } + + /// Dirty flag is set after intern, cleared after save + #[test] + fn dirty_flag_consistency(uuid in generators::arb_uuid()) { + let dir = tempdir().unwrap(); + let path = dir.path().join("universe.bin"); + + let mut universe = Universe::with_path(&path); + + // Initially clean + prop_assert!(!universe.is_dirty()); + + // Dirty after intern + universe.intern(uuid); + prop_assert!(universe.is_dirty()); + + // Clean after save + universe.save().unwrap(); + prop_assert!(!universe.is_dirty()); + } + + /// Iterator yields all interned UUIDs in order + #[test] + fn iter_yields_all(uuids in generators::arb_unique_uuids(15)) { + let mut universe = Universe::new(); + + for &uuid in &uuids { + universe.intern(uuid); + } + + let iter_results: Vec<_> = universe.iter().collect(); + + prop_assert_eq!(iter_results.len(), uuids.len()); + + for (i, (luid, uuid)) in iter_results.iter().enumerate() { + prop_assert_eq!(*luid, Luid::from_usize(i)); + prop_assert_eq!(*uuid, uuids[i]); + } + } +} + +// Non-property unit tests for edge cases + +#[test] +fn test_load_nonexistent() { + let dir = tempdir().unwrap(); + let path = dir.path().join("nonexistent.bin"); + let universe = Universe::load(&path).expect("load should succeed for nonexistent"); + assert!(universe.is_empty()); +} diff --git a/tests/unit_chase.rs b/tests/unit_chase.rs new file mode 100644 index 0000000..55d514a --- /dev/null +++ b/tests/unit_chase.rs @@ -0,0 +1,426 @@ +//! Unit tests for tensor-backed chase algorithm + +use geolog::core::{ + Context, DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term, Theory, +}; +use geolog::query::chase::chase_fixpoint; +use geolog::universe::Universe; + +/// Create a simple test theory with one sort and one unary relation +fn simple_theory_with_relation() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + } +} + +/// Create a preorder-like theory with binary leq relation, reflexivity and transitivity +fn preorder_theory() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("X".to_string()); + + // Binary relation with product domain: leq : [x: X, y: X] -> Prop + let domain = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ]); + sig.add_relation("leq".to_string(), domain); + + // Reflexivity axiom: forall x : X. |- [x: x, y: x] leq + let refl_axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + // Transitivity axiom: forall x, y, z : X. [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq + let trans_axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ]), + ), + ]), + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + Theory { + name: "Preorder".to_string(), + signature: sig, + axioms: vec![refl_axiom, trans_axiom], + axiom_names: vec!["ax/refl".to_string(), "ax/trans".to_string()], + } +} + +#[test] +fn test_chase_adds_relation_from_true_premise() { + // Axiom: forall x : V. |- R(x) + // This should add all elements to R + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add some elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation + structure.init_relations(&[1]); + + // Run chase + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Should add all 3 elements to R + assert_eq!(structure.get_relation(0).len(), 3); + assert!(structure.query_relation(0, &[a])); + assert!(structure.query_relation(0, &[b])); + assert!(structure.query_relation(0, &[c])); + + // Should converge in 2 iterations + assert_eq!(iterations, 2); +} + +#[test] +fn test_chase_fixpoint_empty_structure() { + let theory = simple_theory_with_relation(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + structure.init_relations(&[1]); + + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &theory.signature, 100).unwrap(); + + // Empty structure: no elements, so nothing to add + assert_eq!(iterations, 1); + assert_eq!(structure.get_relation(0).len(), 0); +} + +#[test] +fn test_chase_preorder_reflexivity() { + // Test that chase correctly computes reflexive closure + let theory = preorder_theory(); + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation with arity 2 + structure.init_relations(&[2]); + + // Run chase + let iterations = chase_fixpoint( + &theory.axioms, + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + // Should have exactly 3 reflexive tuples + let relation = structure.get_relation(0); + assert_eq!(relation.len(), 3, "Should have exactly 3 reflexive tuples"); + + // Check reflexive pairs exist + assert!(structure.query_relation(0, &[a, a]), "Should have (a,a)"); + assert!(structure.query_relation(0, &[b, b]), "Should have (b,b)"); + assert!(structure.query_relation(0, &[c, c]), "Should have (c,c)"); + + // Check non-reflexive pairs do NOT exist + assert!(!structure.query_relation(0, &[a, b]), "Should NOT have (a,b)"); + assert!(!structure.query_relation(0, &[a, c]), "Should NOT have (a,c)"); + assert!(!structure.query_relation(0, &[b, a]), "Should NOT have (b,a)"); + + // Should complete in 2 iterations + assert_eq!(iterations, 2); +} + +#[test] +fn test_chase_transitive_closure() { + // Test that chase correctly computes transitive closure + let theory = preorder_theory(); + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 elements: a < b < c (chain order) + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation with arity 2 + structure.init_relations(&[2]); + + // Manually add initial ordering: a ≤ b and b ≤ c + structure.get_relation_mut(0).insert(vec![a, b]); + structure.get_relation_mut(0).insert(vec![b, c]); + + // Run chase + let _iterations = chase_fixpoint( + &theory.axioms, + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + // Expected: 3 reflexive + 2 initial + 1 transitive (a,c) = 6 + let relation = structure.get_relation(0); + assert_eq!(relation.len(), 6, "Should have 6 tuples"); + + // Check reflexive pairs + assert!(structure.query_relation(0, &[a, a])); + assert!(structure.query_relation(0, &[b, b])); + assert!(structure.query_relation(0, &[c, c])); + + // Check initial ordering + assert!(structure.query_relation(0, &[a, b])); + assert!(structure.query_relation(0, &[b, c])); + + // Check transitive closure! + assert!(structure.query_relation(0, &[a, c]), "Should have (a,c) from transitivity"); + + // Should NOT have backwards edges + assert!(!structure.query_relation(0, &[b, a])); + assert!(!structure.query_relation(0, &[c, b])); + assert!(!structure.query_relation(0, &[c, a])); +} + +#[test] +fn test_chase_conjunction_in_conclusion() { + // Axiom: forall x : V. |- R(x) ∧ S(x) + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + sig.add_relation("S".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Conj(vec![ + Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + Formula::Rel(1, Term::Var("x".to_string(), DerivedSort::Base(0))), + ]), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + structure.init_relations(&[1, 1]); + + let _iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Both relations should have both elements + assert_eq!(structure.get_relation(0).len(), 2); + assert_eq!(structure.get_relation(1).len(), 2); + assert!(structure.query_relation(0, &[a])); + assert!(structure.query_relation(0, &[b])); + assert!(structure.query_relation(1, &[a])); + assert!(structure.query_relation(1, &[b])); +} + +#[test] +fn test_chase_relation_premise() { + // Axiom: forall x, y : V. R(x, y) |- S(x, y) + // Copy tuples from R to S + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + let domain = DerivedSort::Product(vec![ + ("a".to_string(), DerivedSort::Base(0)), + ("b".to_string(), DerivedSort::Base(0)), + ]); + sig.add_relation("R".to_string(), domain.clone()); + sig.add_relation("S".to_string(), domain); + + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Rel( + 0, + Term::Record(vec![ + ("a".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("b".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + conclusion: Formula::Rel( + 1, + Term::Record(vec![ + ("a".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("b".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + structure.init_relations(&[2, 2]); + + // Add some tuples to R + structure.get_relation_mut(0).insert(vec![a, b]); + structure.get_relation_mut(0).insert(vec![b, a]); + + let _iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // S should have the same tuples as R + assert_eq!(structure.get_relation(1).len(), 2); + assert!(structure.query_relation(1, &[a, b])); + assert!(structure.query_relation(1, &[b, a])); +} + +/// Test chase with existential premise (the feature that motivated tensor-backed chase!) +#[test] +fn test_chase_existential_premise() { + // Theory: Graph with reachability + // Axiom: forall v0, v1 : V. (exists e : E. src(e) = v0 ∧ tgt(e) = v1) |- reachable(v0, v1) + let mut sig = Signature::default(); + let v_sort = sig.add_sort("V".to_string()); + let e_sort = sig.add_sort("E".to_string()); + + // src, tgt : E -> V + sig.add_function("src".to_string(), DerivedSort::Base(e_sort), DerivedSort::Base(v_sort)); + sig.add_function("tgt".to_string(), DerivedSort::Base(e_sort), DerivedSort::Base(v_sort)); + + // reachable : [from: V, to: V] -> Prop + let reach_domain = DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(v_sort)), + ("to".to_string(), DerivedSort::Base(v_sort)), + ]); + sig.add_relation("reachable".to_string(), reach_domain); + + // Axiom: (exists e : E. src(e) = v0 ∧ tgt(e) = v1) |- reachable(v0, v1) + let axiom = Sequent { + context: Context { + vars: vec![ + ("v0".to_string(), DerivedSort::Base(v_sort)), + ("v1".to_string(), DerivedSort::Base(v_sort)), + ], + }, + premise: Formula::Exists( + "e".to_string(), + DerivedSort::Base(e_sort), + Box::new(Formula::Conj(vec![ + Formula::Eq( + Term::App(0, Box::new(Term::Var("e".to_string(), DerivedSort::Base(e_sort)))), + Term::Var("v0".to_string(), DerivedSort::Base(v_sort)), + ), + Formula::Eq( + Term::App(1, Box::new(Term::Var("e".to_string(), DerivedSort::Base(e_sort)))), + Term::Var("v1".to_string(), DerivedSort::Base(v_sort)), + ), + ])), + ), + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("from".to_string(), Term::Var("v0".to_string(), DerivedSort::Base(v_sort))), + ("to".to_string(), Term::Var("v1".to_string(), DerivedSort::Base(v_sort))), + ]), + ), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts: V and E + + // Add vertices: a, b, c + let (a, _) = structure.add_element(&mut universe, v_sort); + let (b, _) = structure.add_element(&mut universe, v_sort); + let (c, _) = structure.add_element(&mut universe, v_sort); + + // Add edges: e1 (a->b), e2 (b->c) + let (e1, _) = structure.add_element(&mut universe, e_sort); + let (e2, _) = structure.add_element(&mut universe, e_sort); + + // Initialize functions and relations + structure.init_functions(&[Some(e_sort), Some(e_sort)]); // src, tgt both have domain E + structure.init_relations(&[2]); // reachable is binary + + // Define src and tgt + structure.define_function(0, e1, a).unwrap(); // src(e1) = a + structure.define_function(1, e1, b).unwrap(); // tgt(e1) = b + structure.define_function(0, e2, b).unwrap(); // src(e2) = b + structure.define_function(1, e2, c).unwrap(); // tgt(e2) = c + + // Run chase + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Should derive reachable(a,b) and reachable(b,c) + assert_eq!(structure.get_relation(0).len(), 2, "Should have 2 reachable pairs"); + assert!(structure.query_relation(0, &[a, b]), "Should have reachable(a,b)"); + assert!(structure.query_relation(0, &[b, c]), "Should have reachable(b,c)"); + + // Should NOT have other pairs + assert!(!structure.query_relation(0, &[a, c]), "Should NOT have reachable(a,c) without transitive closure axiom"); + + println!("Chase with existential premise completed in {} iterations", iterations); +} diff --git a/tests/unit_elaborate.rs b/tests/unit_elaborate.rs new file mode 100644 index 0000000..eb3653a --- /dev/null +++ b/tests/unit_elaborate.rs @@ -0,0 +1,837 @@ +//! Unit tests for theory and instance elaboration + +use geolog::ast; +use geolog::core::DerivedSort; +use geolog::elaborate::{ElabError, ElaborationContext, Env, elaborate_instance_ctx, elaborate_theory}; +use geolog::id::{NumericId, Slid}; +use geolog::parse; +use geolog::repl::InstanceEntry; +use geolog::universe::Universe; +use std::collections::HashMap; +use std::rc::Rc; + +#[test] +fn test_elaborate_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + src : P -> T; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "PetriNet"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 1); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_parameterized_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // First elaborate PetriNet + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then elaborate Marking (which depends on PetriNet) + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "Marking"); + assert_eq!(elab.params.len(), 1); + assert_eq!(elab.params[0].name, "N"); + assert_eq!(elab.params[0].theory_name, "PetriNet"); + // Signature now includes param sorts: N/P, N/T (from PetriNet) + token (local) + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!(elab.theory.signature.lookup_sort("N/P").is_some()); + assert!(elab.theory.signature.lookup_sort("N/T").is_some()); + assert!(elab.theory.signature.lookup_sort("token").is_some()); + // Functions: just token/of (PetriNet had no functions in this test) + assert_eq!(elab.theory.signature.functions.len(), 1); + assert!(elab.theory.signature.lookup_func("token/of").is_some()); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_theory_with_axiom() { + let input = r#" +theory Iso { + X : Sort; + Y : Sort; + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "Iso"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 2); + assert_eq!(elab.theory.axioms.len(), 1); + + // Check the axiom structure + let ax = &elab.theory.axioms[0]; + assert_eq!(ax.context.vars.len(), 1); + assert_eq!(ax.context.vars[0].0, "x"); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_axiom_function_type_error() { + // x is of sort X, but bwd expects Y + let input = r#" +theory BadIso { + X : Sort; + Y : Sort; + fwd : X -> Y; + bwd : Y -> X; + bad : forall x : X. |- x bwd = x; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let result = elaborate_theory(&mut env, t); + assert!(result.is_err(), "expected type error in axiom"); + + let err = result.unwrap_err(); + match err { + ElabError::TypeMismatch { expected, got } => { + // expected Y (bwd's domain), got X + assert_eq!(expected, DerivedSort::Base(1)); // Y + assert_eq!(got, DerivedSort::Base(0)); // X + } + other => panic!("expected TypeMismatch error, got: {}", other), + } + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_axiom_equality_type_error() { + // LHS is X, RHS is Y — can't compare different sorts + let input = r#" +theory BadEq { + X : Sort; + Y : Sort; + fwd : X -> Y; + bad : forall x : X. |- x = x fwd; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let result = elaborate_theory(&mut env, t); + assert!(result.is_err(), "expected type error in equality"); + + let err = result.unwrap_err(); + match err { + ElabError::TypeMismatch { expected, got } => { + // LHS is X, RHS is Y + assert_eq!(expected, DerivedSort::Base(0)); // X + assert_eq!(got, DerivedSort::Base(1)); // Y + } + other => panic!("expected TypeMismatch error, got: {}", other), + } + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_instance() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // First elaborate PetriNet theory + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then elaborate ExampleNet instance + if let ast::Declaration::Instance(inst) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = + elaborate_instance_ctx(&mut ctx, inst).expect("instance elaboration failed"); + let structure = result.structure; + + // Elements are created in order: A(0), B(1), C(2), ab(3), ab_in(4), ab_out(5) + assert_eq!(structure.len(), 6); // A, B, C, ab, ab_in, ab_out + + // Check carriers + assert_eq!(structure.carrier_size(0), 3); // P: A, B, C + assert_eq!(structure.carrier_size(1), 1); // T: ab + assert_eq!(structure.carrier_size(2), 1); // in: ab_in + assert_eq!(structure.carrier_size(3), 1); // out: ab_out + + // Check function definitions using the new columnar API + // Elements by slid: A=0, B=1, C=2, ab=3, ab_in=4, ab_out=5 + let a_slid = Slid::from_usize(0); + let ab_slid = Slid::from_usize(3); + let ab_in_slid = Slid::from_usize(4); + + // Get the sort-local ID for ab_in + let ab_in_sort_slid = structure.sort_local_id(ab_in_slid); + + // in/src is function 0, ab_in maps to A + assert_eq!(structure.get_function(0, ab_in_sort_slid), Some(a_slid)); + // in/tgt is function 1, ab_in maps to ab + assert_eq!(structure.get_function(1, ab_in_sort_slid), Some(ab_slid)); + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_partial_function_error() { + // This instance is missing the definition for ab_in in/tgt + // (ab_in is in the domain of in/tgt but has no value defined) + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; + in/tgt : in -> T; +} + +instance PartialNet : PetriNet = { + A : P; + ab : T; + ab_in : in; + ab_in in/src = A; + // Missing: ab_in in/tgt = ab; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // First elaborate PetriNet theory + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then try to elaborate the partial instance — should fail + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected error for partial function"); + + let err = result.unwrap_err(); + match err { + ElabError::PartialFunction { + func_name, + missing_elements, + } => { + assert_eq!(func_name, "in/tgt"); + assert_eq!(missing_elements, vec!["ab_in"]); + } + other => panic!("expected PartialFunction error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_domain_type_error() { + // ab is of sort T, but in/src expects domain sort `in` + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; +} + +instance BadNet : PetriNet = { + A : P; + ab : T; + ab in/src = A; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected domain type error"); + + let err = result.unwrap_err(); + match err { + ElabError::DomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + assert_eq!(func_name, "in/src"); + assert_eq!(element_name, "ab"); + assert_eq!(expected_sort, "in"); + assert_eq!(actual_sort, "T"); + } + other => panic!("expected DomainMismatch error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_codomain_type_error() { + // ab is of sort T, but in/src has codomain P + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; +} + +instance BadNet : PetriNet = { + A : P; + ab : T; + ab_in : in; + ab_in in/src = ab; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected codomain type error"); + + let err = result.unwrap_err(); + match err { + ElabError::CodomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + assert_eq!(func_name, "in/src"); + assert_eq!(element_name, "ab"); + assert_eq!(expected_sort, "P"); + assert_eq!(actual_sort, "T"); + } + other => panic!("expected CodomainMismatch error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_elaborate_theory_extends() { + // Simple single-level extends + let input = r#" +theory Base { + X : Sort; + f : X -> X; +} + +theory Child extends Base { + Y : Sort; + g : Y -> Base/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child (extends Base) + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + assert_eq!(elab.theory.name, "Child"); + + // Child should have: Base/X (inherited), Y (own) + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert!( + elab.theory.signature.lookup_sort("Base/X").is_some(), + "should have Base/X" + ); + assert!( + elab.theory.signature.lookup_sort("Y").is_some(), + "should have Y" + ); + + // Functions: Base/f (inherited), g (own) + assert_eq!(elab.theory.signature.functions.len(), 2); + assert!( + elab.theory.signature.lookup_func("Base/f").is_some(), + "should have Base/f" + ); + assert!( + elab.theory.signature.lookup_func("g").is_some(), + "should have g" + ); + + // Check g's domain/codomain are correct + let g_id = elab.theory.signature.lookup_func("g").unwrap(); + let g_sym = &elab.theory.signature.functions[g_id]; + let y_id = elab.theory.signature.lookup_sort("Y").unwrap(); + let base_x_id = elab.theory.signature.lookup_sort("Base/X").unwrap(); + assert_eq!(g_sym.domain, DerivedSort::Base(y_id)); + assert_eq!(g_sym.codomain, DerivedSort::Base(base_x_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_transitive_extends() { + // Transitive extends with requalification: + // Grandchild extends Child extends Base + // Grandchild should have: Base/X (from grandparent, NOT Child/Base/X), Child/Y, Z + let input = r#" +theory Base { + X : Sort; + f : X -> X; +} + +theory Child extends Base { + Y : Sort; +} + +theory Grandchild extends Child { + Z : Sort; + h : Z -> Base/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Grandchild + if let ast::Declaration::Theory(t) = &file.declarations[2].node { + let elab = elaborate_theory(&mut env, t).expect("Grandchild elaboration failed"); + assert_eq!(elab.theory.name, "Grandchild"); + + // Grandchild should have: Base/X, Child/Y, Z + // NOT: Child/Base/X (that would be wrong requalification) + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("Base/X").is_some(), + "should have Base/X (preserved from grandparent)" + ); + assert!( + elab.theory.signature.lookup_sort("Child/Y").is_some(), + "should have Child/Y" + ); + assert!( + elab.theory.signature.lookup_sort("Z").is_some(), + "should have Z" + ); + + // Should NOT have these wrong names + assert!( + elab.theory.signature.lookup_sort("Child/Base/X").is_none(), + "should NOT have Child/Base/X" + ); + + // Functions: Base/f (preserved), h (own) + assert_eq!(elab.theory.signature.functions.len(), 2); + assert!( + elab.theory.signature.lookup_func("Base/f").is_some(), + "should have Base/f (preserved)" + ); + assert!( + elab.theory.signature.lookup_func("h").is_some(), + "should have h" + ); + + // Check h's domain/codomain + let h_id = elab.theory.signature.lookup_func("h").unwrap(); + let h_sym = &elab.theory.signature.functions[h_id]; + let z_id = elab.theory.signature.lookup_sort("Z").unwrap(); + let base_x_id = elab.theory.signature.lookup_sort("Base/X").unwrap(); + assert_eq!(h_sym.domain, DerivedSort::Base(z_id)); + assert_eq!(h_sym.codomain, DerivedSort::Base(base_x_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_instance_of_extended_theory() { + // Test that instances of extended theories work correctly + let input = r#" +theory Base { + X : Sort; +} + +theory Child extends Base { + Y : Sort; + f : Y -> Base/X; +} + +instance C : Child = { + a : Base/X; + b : Y; + b f = a; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // Elaborate theories + for decl in &file.declarations[0..2] { + if let ast::Declaration::Theory(t) = &decl.node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + } + + // Elaborate instance + if let ast::Declaration::Instance(inst) = &file.declarations[2].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = + elaborate_instance_ctx(&mut ctx, inst).expect("instance elaboration failed"); + let structure = result.structure; + + // Should have 2 elements: a and b + assert_eq!(structure.len(), 2); + assert_eq!(structure.carrier_size(0), 1); // Base/X: a + assert_eq!(structure.carrier_size(1), 1); // Y: b + + // Check name mappings + assert!( + result.name_to_slid.contains_key("a"), + "should have element 'a'" + ); + assert!( + result.name_to_slid.contains_key("b"), + "should have element 'b'" + ); + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_nested_parameterized_theories() { + // Test deep nesting: C depends on B which depends on A + // theory A { X : Sort; } + // theory (N : A instance) B { Y : Sort; f : Y -> N/X; } + // theory (M : B instance) C { Z : Sort; g : Z -> M/Y; h : Z -> M/N/X; } + // + // C should have sorts: M/N/X (from A via B), M/Y (from B), Z (own) + let input = r#" +theory A { X : Sort; } + +theory (N : A instance) B { + Y : Sort; + f : Y -> N/X; +} + +theory (M : B instance) C { + Z : Sort; + g : Z -> M/Y; + h : Z -> M/N/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate A + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("A elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate B + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("B elaboration failed"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert!(elab.theory.signature.lookup_sort("N/X").is_some()); + assert!(elab.theory.signature.lookup_sort("Y").is_some()); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate C + if let ast::Declaration::Theory(t) = &file.declarations[2].node { + let elab = elaborate_theory(&mut env, t).expect("C elaboration failed"); + assert_eq!(elab.theory.name, "C"); + + // C should have: M/N/X, M/Y, Z + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("M/N/X").is_some(), + "should have M/N/X (from A via B)" + ); + assert!( + elab.theory.signature.lookup_sort("M/Y").is_some(), + "should have M/Y (from B)" + ); + assert!( + elab.theory.signature.lookup_sort("Z").is_some(), + "should have Z (own sort)" + ); + + // Functions: M/f (from B), g, h (own) + assert_eq!(elab.theory.signature.functions.len(), 3); + assert!( + elab.theory.signature.lookup_func("M/f").is_some(), + "should have M/f" + ); + assert!( + elab.theory.signature.lookup_func("g").is_some(), + "should have g" + ); + assert!( + elab.theory.signature.lookup_func("h").is_some(), + "should have h" + ); + + // Check h's domain/codomain are correct + let h_id = elab.theory.signature.lookup_func("h").unwrap(); + let h_sym = &elab.theory.signature.functions[h_id]; + let z_id = elab.theory.signature.lookup_sort("Z").unwrap(); + let mnx_id = elab.theory.signature.lookup_sort("M/N/X").unwrap(); + assert_eq!(h_sym.domain, DerivedSort::Base(z_id)); + assert_eq!(h_sym.codomain, DerivedSort::Base(mnx_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_extends_with_naming_convention_slashes() { + // This test verifies the fix for the naming convention bug where + // function names like "Func/dom" (using "/" as DomainSort/descriptor) + // were incorrectly treated as grandparent-qualified names. + // + // The fix checks if the prefix before "/" is a sort in the parent theory. + // If so, it's a naming convention, not a grandparent qualifier. + let input = r#" +theory Base { + Func : Sort; + Rel : Sort; + Func/dom : Func -> Rel; + Func/cod : Func -> Rel; + Rel/type : Rel -> Func; +} + +theory Child extends Base { + Op : Sort; + Op/func : Op -> Base/Func; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + // Base has 2 sorts (Func, Rel) and 3 functions (Func/dom, Func/cod, Rel/type) + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 3); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + assert_eq!(elab.theory.name, "Child"); + + // Child should have: Base/Func, Base/Rel, Op + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("Base/Func").is_some(), + "should have Base/Func" + ); + assert!( + elab.theory.signature.lookup_sort("Base/Rel").is_some(), + "should have Base/Rel" + ); + assert!( + elab.theory.signature.lookup_sort("Op").is_some(), + "should have Op" + ); + + // Functions should be: Base/Func/dom, Base/Func/cod, Base/Rel/type, Op/func + // NOT: Func/dom (which would be wrong - missing Base/ prefix) + assert_eq!(elab.theory.signature.functions.len(), 4); + assert!( + elab.theory.signature.lookup_func("Base/Func/dom").is_some(), + "should have Base/Func/dom (naming convention slash preserved)" + ); + assert!( + elab.theory.signature.lookup_func("Base/Func/cod").is_some(), + "should have Base/Func/cod" + ); + assert!( + elab.theory.signature.lookup_func("Base/Rel/type").is_some(), + "should have Base/Rel/type" + ); + assert!( + elab.theory.signature.lookup_func("Op/func").is_some(), + "should have Op/func" + ); + + // Should NOT have these wrong names (without Base/ prefix) + assert!( + elab.theory.signature.lookup_func("Func/dom").is_none(), + "should NOT have Func/dom (missing prefix)" + ); + assert!( + elab.theory.signature.lookup_func("Rel/type").is_none(), + "should NOT have Rel/type (missing prefix)" + ); + + // Verify Base/Func/dom has correct domain/codomain + let func_dom_id = elab.theory.signature.lookup_func("Base/Func/dom").unwrap(); + let func_dom_sym = &elab.theory.signature.functions[func_dom_id]; + let base_func_id = elab.theory.signature.lookup_sort("Base/Func").unwrap(); + let base_rel_id = elab.theory.signature.lookup_sort("Base/Rel").unwrap(); + assert_eq!( + func_dom_sym.domain, + DerivedSort::Base(base_func_id), + "Base/Func/dom domain should be Base/Func" + ); + assert_eq!( + func_dom_sym.codomain, + DerivedSort::Base(base_rel_id), + "Base/Func/dom codomain should be Base/Rel" + ); + + // Verify Op/func has correct domain/codomain + let op_func_id = elab.theory.signature.lookup_func("Op/func").unwrap(); + let op_func_sym = &elab.theory.signature.functions[op_func_id]; + let op_id = elab.theory.signature.lookup_sort("Op").unwrap(); + assert_eq!( + op_func_sym.domain, + DerivedSort::Base(op_id), + "Op/func domain should be Op" + ); + assert_eq!( + op_func_sym.codomain, + DerivedSort::Base(base_func_id), + "Op/func codomain should be Base/Func" + ); + } else { + panic!("expected theory"); + } +} diff --git a/tests/unit_meta.rs b/tests/unit_meta.rs new file mode 100644 index 0000000..52105a2 --- /dev/null +++ b/tests/unit_meta.rs @@ -0,0 +1,265 @@ +//! Unit tests for GeologMeta theory and structure conversion + +use geolog::core::{Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory}; +use geolog::meta::{geolog_meta, structure_to_theory, theory_to_structure}; +use geolog::naming::NamingIndex; +use geolog::universe::Universe; + +#[test] +fn test_theory_to_structure() { + // Create a simple theory + let mut sig = Signature::new(); + sig.add_sort("P".to_string()); + sig.add_sort("T".to_string()); + sig.add_function( + "src".to_string(), + DerivedSort::Base(1), // T + DerivedSort::Base(0), // P + ); + + let theory = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "PetriNet".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }; + + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&theory, &mut universe, &mut naming); + + // Check basic structure properties + assert!(!structure.is_empty()); + + // Check we have elements in the structure + // Should have: 1 Theory, 2 Srt, 1 Func, plus DSort/BaseDS elements + assert!( + structure.len() > 5, + "Expected more than 5 elements, got {}", + structure.len() + ); + + // Verify names were registered in naming index + assert!(naming.lookup_unique("PetriNet").is_some()); + assert!(naming.lookup_unique("P").is_some()); + assert!(naming.lookup_unique("T").is_some()); + assert!(naming.lookup_unique("src").is_some()); +} + +#[test] +fn test_geolog_meta_parses() { + // Just ensure GeologMeta itself can be loaded + let meta = geolog_meta(); + assert_eq!(meta.theory.name, "GeologMeta"); + + // Should have lots of sorts and functions (no Name sort anymore) + assert!( + meta.theory.signature.sorts.len() >= 25, + "Expected many sorts, got {}", + meta.theory.signature.sorts.len() + ); + assert!( + meta.theory.signature.functions.len() >= 40, + "Expected many functions, got {}", + meta.theory.signature.functions.len() + ); + assert!( + meta.theory.signature.relations.len() >= 3, + "Expected some relations" + ); +} + +#[test] +fn test_theory_roundtrip() { + // Create a theory with sorts, functions, and a relation + let mut sig = Signature::new(); + let p_id = sig.add_sort("P".to_string()); + let t_id = sig.add_sort("T".to_string()); + sig.add_function( + "src".to_string(), + DerivedSort::Base(t_id), + DerivedSort::Base(p_id), + ); + sig.add_function( + "tgt".to_string(), + DerivedSort::Base(t_id), + DerivedSort::Base(p_id), + ); + // Add a relation with record domain + sig.add_relation( + "enabled".to_string(), + DerivedSort::Product(vec![ + ("place".to_string(), DerivedSort::Base(p_id)), + ("trans".to_string(), DerivedSort::Base(t_id)), + ]), + ); + + let original = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "PetriNet".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }; + + // Convert to structure + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&original, &mut universe, &mut naming); + + // Convert back + let reconstructed = + structure_to_theory(&structure, &universe, &naming).expect("roundtrip should succeed"); + + // Verify basic properties match + assert_eq!(reconstructed.theory.name, "PetriNet"); + assert_eq!(reconstructed.theory.signature.sorts.len(), 2); + assert_eq!(reconstructed.theory.signature.functions.len(), 2); + assert_eq!(reconstructed.theory.signature.relations.len(), 1); + + // Verify sort names + assert!(reconstructed.theory.signature.lookup_sort("P").is_some()); + assert!(reconstructed.theory.signature.lookup_sort("T").is_some()); + + // Verify function names + assert!(reconstructed.theory.signature.lookup_func("src").is_some()); + assert!(reconstructed.theory.signature.lookup_func("tgt").is_some()); + + // Verify relation name + assert!( + reconstructed + .theory + .signature + .lookup_rel("enabled") + .is_some() + ); +} + +#[test] +fn test_theory_roundtrip_with_axioms() { + // Create a preorder theory with reflexivity and transitivity axioms + let mut sig = Signature::new(); + let x_id = sig.add_sort("X".to_string()); + let x_sort = DerivedSort::Base(x_id); + + // Add a binary relation: leq : [x: X, y: X] -> Prop + let rel_domain = DerivedSort::Product(vec![ + ("x".to_string(), x_sort.clone()), + ("y".to_string(), x_sort.clone()), + ]); + let rel_id = sig.add_relation("leq".to_string(), rel_domain); + + // Reflexivity axiom: forall x:X. |- leq(x, x) + // Context: [x: X] + // Premise: True + // Conclusion: leq({x: x, y: x}) + let reflexivity = Sequent { + context: Context { + vars: vec![("x".to_string(), x_sort.clone())], + }, + premise: Formula::True, + conclusion: Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ]), + ), + }; + + // Transitivity axiom: forall x,y,z:X. leq(x,y), leq(y,z) |- leq(x,z) + // Context: [x: X, y: X, z: X] + // Premise: leq(x,y) ∧ leq(y,z) + // Conclusion: leq(x,z) + let transitivity = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), x_sort.clone()), + ("y".to_string(), x_sort.clone()), + ("z".to_string(), x_sort.clone()), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("y".to_string(), x_sort.clone())), + ]), + ), + Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("y".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("z".to_string(), x_sort.clone())), + ]), + ), + ]), + conclusion: Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("z".to_string(), x_sort.clone())), + ]), + ), + }; + + let original = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Preorder".to_string(), + signature: sig, + axioms: vec![reflexivity, transitivity], + axiom_names: vec!["ax/refl".to_string(), "ax/trans".to_string()], + }, + }; + + // Convert to structure + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&original, &mut universe, &mut naming); + + // Convert back + let reconstructed = + structure_to_theory(&structure, &universe, &naming).expect("roundtrip should succeed"); + + // Verify basic properties match + assert_eq!(reconstructed.theory.name, "Preorder"); + assert_eq!(reconstructed.theory.signature.sorts.len(), 1); + assert_eq!(reconstructed.theory.signature.relations.len(), 1); + assert_eq!( + reconstructed.theory.axioms.len(), + 2, + "Expected 2 axioms, got {}", + reconstructed.theory.axioms.len() + ); + + // Verify sort name + assert!(reconstructed.theory.signature.lookup_sort("X").is_some()); + + // Verify relation name + assert!(reconstructed.theory.signature.lookup_rel("leq").is_some()); + + // Verify axiom names round-trip correctly + assert_eq!( + reconstructed.theory.axiom_names.len(), + 2, + "Expected 2 axiom names, got {}", + reconstructed.theory.axiom_names.len() + ); + assert!( + reconstructed.theory.axiom_names.contains(&"ax/refl".to_string()), + "Expected axiom names to contain 'ax/refl', got {:?}", + reconstructed.theory.axiom_names + ); + assert!( + reconstructed.theory.axiom_names.contains(&"ax/trans".to_string()), + "Expected axiom names to contain 'ax/trans', got {:?}", + reconstructed.theory.axiom_names + ); +} diff --git a/tests/unit_parsing.rs b/tests/unit_parsing.rs new file mode 100644 index 0000000..9abee67 --- /dev/null +++ b/tests/unit_parsing.rs @@ -0,0 +1,163 @@ +//! Unit tests for lexer and parser + +use chumsky::Parser; +use geolog::ast::Declaration; +use geolog::lexer::{Token, lexer}; +use geolog::parse; + +// ============================================================================ +// Lexer tests +// ============================================================================ + +#[test] +fn test_lex_simple() { + let input = "theory PetriNet { P : Sort; }"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Theory, + Token::Ident("PetriNet".to_string()), + Token::LBrace, + Token::Ident("P".to_string()), + Token::Colon, + Token::Sort, + Token::Semicolon, + Token::RBrace, + ] + ); +} + +#[test] +fn test_lex_arrow_and_turnstile() { + let input = "in -> out |- x = y"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Ident("in".to_string()), + Token::Arrow, + Token::Ident("out".to_string()), + Token::Turnstile, + Token::Ident("x".to_string()), + Token::Eq, + Token::Ident("y".to_string()), + ] + ); +} + +#[test] +fn test_lex_path() { + let input = "N/P W/src/arc"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Ident("N".to_string()), + Token::Slash, + Token::Ident("P".to_string()), + Token::Ident("W".to_string()), + Token::Slash, + Token::Ident("src".to_string()), + Token::Slash, + Token::Ident("arc".to_string()), + ] + ); +} + +// ============================================================================ +// Parser tests +// ============================================================================ + +#[test] +fn test_parse_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + assert_eq!(file.declarations.len(), 1); +} + +#[test] +fn test_parse_function_decl() { + let input = r#" +theory PetriNet { + P : Sort; + in : Sort; + src : in -> P; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); +} + +#[test] +fn test_parse_parameterized_theory() { + let input = r#" +theory (N : PetriNet instance) Marking { + token : Sort; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Theory(t) = &file.declarations[0].node { + assert_eq!(t.params.len(), 1); + assert_eq!(t.params[0].name, "N"); + } else { + panic!("Expected theory declaration"); + } +} + +#[test] +fn test_parse_instance() { + let input = r#" +instance ExampleNet : PetriNet = { + A : P; + B : P; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Instance(i) = &file.declarations[0].node { + assert_eq!(i.name, "ExampleNet"); + assert_eq!(i.body.len(), 2); + } else { + panic!("Expected instance declaration"); + } +} + +#[test] +fn test_parse_nested_instance() { + let input = r#" +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + }; + target_marking = { + t : token; + }; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Instance(i) = &file.declarations[0].node { + assert_eq!(i.name, "problem0"); + assert_eq!(i.body.len(), 2); + } else { + panic!("Expected instance declaration"); + } +} diff --git a/tests/unit_pretty.rs b/tests/unit_pretty.rs new file mode 100644 index 0000000..28fcf33 --- /dev/null +++ b/tests/unit_pretty.rs @@ -0,0 +1,36 @@ +//! Unit tests for pretty-printing roundtrips + +use geolog::parse; +use geolog::pretty::pretty_print; + +#[test] +fn test_roundtrip_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + src : in -> P; +} +"#; + let parsed = parse(input).expect("parse failed"); + let printed = pretty_print(&parsed); + let reparsed = parse(&printed).expect("reparse failed"); + + // Compare structure (ignoring spans) + assert_eq!(parsed.declarations.len(), reparsed.declarations.len()); +} + +#[test] +fn test_roundtrip_instance() { + let input = r#" +instance ExampleNet : PetriNet = { + A : P; + B : P; +} +"#; + let parsed = parse(input).expect("parse failed"); + let printed = pretty_print(&parsed); + let reparsed = parse(&printed).expect("reparse failed"); + + assert_eq!(parsed.declarations.len(), reparsed.declarations.len()); +} diff --git a/tests/unit_relations.rs b/tests/unit_relations.rs new file mode 100644 index 0000000..2ca3b27 --- /dev/null +++ b/tests/unit_relations.rs @@ -0,0 +1,183 @@ +//! Unit tests for relation storage + +use geolog::core::{RelationStorage, Structure, VecRelation}; +use geolog::id::{NumericId, Slid}; +use geolog::universe::Universe; +use geolog::serialize::{load_structure, save_structure}; +use tempfile::tempdir; + +/// Helper to create Slid from integer +fn slid(n: usize) -> Slid { + Slid::from_usize(n) +} + +#[test] +fn test_vec_relation_basic() { + let mut rel = VecRelation::new(2); + + // Insert a tuple + assert!(rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + + // Check containment + assert!(rel.contains(&[slid(0), slid(1)])); + assert!(!rel.contains(&[slid(1), slid(0)])); + assert!(!rel.contains(&[slid(0), slid(0)])); + + // Insert another tuple + assert!(rel.insert(vec![slid(1), slid(0)])); + assert_eq!(rel.len(), 2); + + // Duplicate insert returns false + assert!(!rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 2); +} + +#[test] +fn test_vec_relation_remove() { + let mut rel = VecRelation::new(2); + + rel.insert(vec![slid(0), slid(1)]); + rel.insert(vec![slid(1), slid(2)]); + assert_eq!(rel.len(), 2); + + // Remove existing tuple + assert!(rel.remove(&[slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + assert!(!rel.contains(&[slid(0), slid(1)])); + assert!(rel.contains(&[slid(1), slid(2)])); + + // Remove non-existent tuple + assert!(!rel.remove(&[slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + + // Re-insert removed tuple (should reuse tuple ID) + assert!(rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 2); + assert!(rel.contains(&[slid(0), slid(1)])); +} + +#[test] +fn test_vec_relation_iter() { + let mut rel = VecRelation::new(2); + + rel.insert(vec![slid(0), slid(1)]); + rel.insert(vec![slid(1), slid(2)]); + rel.insert(vec![slid(2), slid(3)]); + + let tuples: Vec<_> = rel.iter().collect(); + assert_eq!(tuples.len(), 3); + + // Remove middle tuple + rel.remove(&[slid(1), slid(2)]); + + let tuples: Vec<_> = rel.iter().collect(); + assert_eq!(tuples.len(), 2); +} + +#[test] +fn test_structure_relations() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements to two sorts + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (x, _) = structure.add_element(&mut universe, 1); + let (y, _) = structure.add_element(&mut universe, 1); + + // Initialize a binary relation (arity 2) + structure.init_relations(&[2]); + + // Assert some tuples + assert!(structure.assert_relation(0, vec![a, x])); + assert!(structure.assert_relation(0, vec![b, y])); + assert_eq!(structure.get_relation(0).len(), 2); + + // Query + assert!(structure.query_relation(0, &[a, x])); + assert!(!structure.query_relation(0, &[a, y])); + + // Retract + assert!(structure.retract_relation(0, &[a, x])); + assert!(!structure.query_relation(0, &[a, x])); +} + +#[test] +fn test_relation_serialization_roundtrip() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (x, _) = structure.add_element(&mut universe, 1); + + // Initialize relation and add tuples + structure.init_relations(&[2]); + structure.assert_relation(0, vec![a, x]); + structure.assert_relation(0, vec![b, x]); + + // Serialize and deserialize via StructureData + let data = geolog::serialize::StructureData::from_structure(&structure); + let restored = data.to_structure(); + + // Check relation was preserved + assert_eq!(restored.num_relations(), 1); + assert_eq!(restored.get_relation(0).len(), 2); + assert!(restored.query_relation(0, &[a, x])); + assert!(restored.query_relation(0, &[b, x])); + assert!(!restored.query_relation(0, &[a, b])); +} + +#[test] +fn test_relation_file_roundtrip() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 1); + + // Initialize relation and add tuples + structure.init_relations(&[2]); + structure.assert_relation(0, vec![a, b]); + + // Save to file + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + save_structure(&structure, &path).expect("save should succeed"); + + // Load from file + let loaded = load_structure(&path).expect("load should succeed"); + + // Check relation was preserved + assert_eq!(loaded.num_relations(), 1); + assert!(loaded.query_relation(0, &[a, b])); +} + +#[test] +fn test_unary_relation() { + let mut rel = VecRelation::new(1); + + rel.insert(vec![slid(42)]); + rel.insert(vec![slid(100)]); + + assert!(rel.contains(&[slid(42)])); + assert!(rel.contains(&[slid(100)])); + assert!(!rel.contains(&[slid(0)])); + assert_eq!(rel.len(), 2); +} + +#[test] +fn test_ternary_relation() { + let mut rel = VecRelation::new(3); + + rel.insert(vec![slid(1), slid(2), slid(3)]); + rel.insert(vec![slid(4), slid(5), slid(6)]); + + assert!(rel.contains(&[slid(1), slid(2), slid(3)])); + assert!(rel.contains(&[slid(4), slid(5), slid(6)])); + assert!(!rel.contains(&[slid(1), slid(2), slid(4)])); + assert_eq!(rel.len(), 2); +} diff --git a/tests/unit_version.rs b/tests/unit_version.rs new file mode 100644 index 0000000..79b7bfd --- /dev/null +++ b/tests/unit_version.rs @@ -0,0 +1,133 @@ +//! Unit tests for version control (commits, checkout, patches) + +use geolog::core::Structure; +use geolog::naming::NamingIndex; +use geolog::version::VersionedState; +use std::fs; +use std::path::PathBuf; +use tempfile::tempdir; + +fn temp_dir() -> PathBuf { + let dir = tempdir().unwrap(); + dir.keep() +} + +#[test] +fn test_new_versioned_state() { + let dir = temp_dir(); + let state = VersionedState::new(&dir); + assert!(state.head.is_none()); + assert_eq!(state.num_commits(), 0); + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_commit_and_checkout() { + let dir = temp_dir(); + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + // Create a structure using the state's universe + let mut s1 = Structure::new(2); + let (_, luid1) = s1.add_element(&mut state.universe, 0); + let (_, luid2) = s1.add_element(&mut state.universe, 1); + + // Register names + let uuid1 = state.universe.get(luid1).unwrap(); + let uuid2 = state.universe.get(luid2).unwrap(); + naming.insert(uuid1, vec!["foo".to_string()]); + naming.insert(uuid2, vec!["bar".to_string()]); + + // Commit it + let commit1 = state.commit(&s1, &naming).expect("commit should succeed"); + assert_eq!(state.num_commits(), 1); + assert_eq!(state.head, Some(commit1)); + + // Checkout and verify + let s1_checkout = state.checkout(commit1).expect("checkout should succeed"); + assert_eq!(s1_checkout.len(), 2); + + // Clean up + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_multiple_commits() { + let dir = temp_dir(); + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + // First commit + let mut s1 = Structure::new(2); + let (_, foo_luid) = s1.add_element(&mut state.universe, 0); + let foo_uuid = state.universe.get(foo_luid).unwrap(); + naming.insert(foo_uuid, vec!["foo".to_string()]); + let commit1 = state.commit(&s1, &naming).expect("commit 1"); + + // Second commit with more elements (preserving "foo" via its Luid) + let mut s2 = Structure::new(2); + s2.add_element_with_luid(foo_luid, 0); + let (_, bar_luid) = s2.add_element(&mut state.universe, 1); + let (_, baz_luid) = s2.add_element(&mut state.universe, 0); + + // Register names for new elements + let bar_uuid = state.universe.get(bar_luid).unwrap(); + let baz_uuid = state.universe.get(baz_luid).unwrap(); + naming.insert(bar_uuid, vec!["bar".to_string()]); + naming.insert(baz_uuid, vec!["baz".to_string()]); + + let commit2 = state.commit(&s2, &naming).expect("commit 2"); + + assert_eq!(state.num_commits(), 2); + + // Checkout first commit + let s1_checkout = state.checkout(commit1).expect("checkout commit1"); + assert_eq!(s1_checkout.len(), 1); + + // Checkout second commit + let s2_checkout = state.checkout(commit2).expect("checkout commit2"); + assert_eq!(s2_checkout.len(), 3); + + // List commits + let commits = state.list_commits(); + assert_eq!(commits.len(), 2); + assert_eq!(commits[0], commit1); + assert_eq!(commits[1], commit2); + + // Clean up + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_save_and_load_patches() { + let dir = temp_dir(); + + // Create state and commit + let commit_uuid; + { + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + let mut s = Structure::new(2); + let (_, foo_luid) = s.add_element(&mut state.universe, 0); + let foo_uuid = state.universe.get(foo_luid).unwrap(); + naming.insert(foo_uuid, vec!["foo".to_string()]); + + commit_uuid = state.commit(&s, &naming).expect("commit"); + } + + // Create new state and load + { + let mut state = VersionedState::new(&dir); + state.load_patches().expect("load patches"); + + assert_eq!(state.num_commits(), 1); + assert_eq!(state.head, Some(commit_uuid)); + + let s = state.checkout(commit_uuid).expect("checkout"); + assert_eq!(s.len(), 1); + } + + // Clean up + let _ = fs::remove_dir_all(&dir); +} diff --git a/tests/unit_workspace.rs b/tests/unit_workspace.rs new file mode 100644 index 0000000..ffaebba --- /dev/null +++ b/tests/unit_workspace.rs @@ -0,0 +1,68 @@ +//! Unit tests for structure serialization +//! +//! Tests for save/load functionality in the serialize module. + +use geolog::core::Structure; +use geolog::elaborate::InstanceEntry; +use geolog::id::{NumericId, Slid}; +use geolog::serialize::{load_structure, save_structure, StructureData}; +use geolog::universe::Universe; +use tempfile::tempdir; + +#[test] +fn test_structure_roundtrip() { + let mut universe = Universe::new(); + + let mut structure = Structure::new(2); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + + let data = StructureData::from_structure(&structure); + let restored = data.to_structure(); + + assert_eq!(restored.len(), 3); + assert_eq!(restored.num_sorts(), 2); +} + +#[test] +fn test_save_load_structure() { + let mut universe = Universe::new(); + + let mut structure = Structure::new(2); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + save_structure(&structure, &path).expect("save should succeed"); + + let loaded = load_structure(&path).expect("load should succeed"); + + assert_eq!(loaded.len(), 2); + assert_eq!(loaded.num_sorts(), 2); +} + +#[test] +fn test_instance_entry_element_management() { + let mut universe = Universe::new(); + + // Create a simple structure + let mut structure = Structure::new(1); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + + // Create instance entry + let mut entry = InstanceEntry::new(structure, "TestTheory".to_string(), "TestTheory".to_string()); + entry.register_element("a".to_string(), Slid::from_usize(0)); + entry.register_element("b".to_string(), Slid::from_usize(1)); + + // Verify element lookup works + assert_eq!(entry.get_element("a"), Some(Slid::from_usize(0))); + assert_eq!(entry.get_element("b"), Some(Slid::from_usize(1))); + assert_eq!(entry.get_element("c"), None); + + // Verify reverse lookup works + assert_eq!(entry.get_name(Slid::from_usize(0)), Some("a")); + assert_eq!(entry.get_name(Slid::from_usize(1)), Some("b")); +} diff --git a/theories/GeologMeta.geolog b/theories/GeologMeta.geolog new file mode 100644 index 0000000..92fa9ba --- /dev/null +++ b/theories/GeologMeta.geolog @@ -0,0 +1,400 @@ +// GeologMeta: A homoiconic representation of geolog theories +// +// An instance of GeologMeta IS a collection of geolog theories, complete with +// signatures, axioms, and well-formedness constraints. +// +// Key design principles: +// - All elements identified by UUID; human-readable names in separate NamingIndex +// - Child pointers go from parent to children (no products in domains) +// - Binding uses upward pointers from variable to binder +// - Transitive closure (ancestor) via Datalog-style axioms +// - Srt/theory, Func/theory enables multi-theory instances and theory parameters +// +// Naming convention: DomainSort/descriptor +// - Embeddings: VarT/term, EqF/formula (target sort) +// - Parent pointers: Srt/theory, Field/prod (container sort) +// - Projections: EqF/lhs, ProjT/field (field reference) + +theory GeologMeta { + + // ============================================================ + // THEORIES + // ============================================================ + + Theory : Sort; + + // Theory parameters: (N : PetriNet) means N is a Param + Param : Sort; + Param/theory : Param -> Theory; // which theory has this parameter + Param/type : Param -> Theory; // must instantiate this theory + + // ============================================================ + // SORTS (renamed to Srt to avoid keyword conflict) + // ============================================================ + + Srt : Sort; + Srt/theory : Srt -> Theory; + + // ============================================================ + // DERIVED SORTS (Base | Product) + // ============================================================ + + DSort : Sort; + + // Base case: wraps a Sort + BaseDS : Sort; + BaseDS/dsort : BaseDS -> DSort; + BaseDS/srt : BaseDS -> Srt; + + // Product case: [x: A, y: B, ...] + ProdDS : Sort; + ProdDS/dsort : ProdDS -> DSort; + + // Product fields (recursive: field type is DSort) + Field : Sort; + Field/prod : Field -> ProdDS; + Field/type : Field -> DSort; + + // ============================================================ + // FUNCTION SYMBOLS + // ============================================================ + + Func : Sort; + Func/theory : Func -> Theory; + Func/dom : Func -> DSort; + Func/cod : Func -> DSort; + + // ============================================================ + // RELATION SYMBOLS (predicates, no codomain) + // ============================================================ + + Rel : Sort; + Rel/theory : Rel -> Theory; + Rel/dom : Rel -> DSort; + + // ============================================================ + // BINDERS (for variable scoping) + // ============================================================ + // Variables point UP to their binder. Binders are introduced by + // Exists quantifiers or context variables. + + Binder : Sort; + Binder/type : Binder -> DSort; + + // ============================================================ + // TERMS + // ============================================================ + + Term : Sort; + + // Variable reference (points to binder) + VarT : Sort; + VarT/term : VarT -> Term; + VarT/binder : VarT -> Binder; // UPWARD pointer to introducing binder + + // Function application (unary - argument may be a record) + AppT : Sort; + AppT/term : AppT -> Term; + AppT/func : AppT -> Func; + AppT/arg : AppT -> Term; + + // Record construction [x = t1, y = t2, ...] + RecordT : Sort; + RecordT/term : RecordT -> Term; + + RecEntry : Sort; + RecEntry/record : RecEntry -> RecordT; + RecEntry/val : RecEntry -> Term; + RecEntry/field : RecEntry -> Field; // which field this entry is for + + // Projection t.field + ProjT : Sort; + ProjT/term : ProjT -> Term; + ProjT/base : ProjT -> Term; + ProjT/field : ProjT -> Field; // which field to project + + // ============================================================ + // FORMULAS + // ============================================================ + + Formula : Sort; + + // Relation application `t R` + RelF : Sort; + RelF/formula : RelF -> Formula; + RelF/arg : RelF -> Term; + RelF/rel : RelF -> Rel; + + // Truth + TrueF : Sort; + TrueF/formula : TrueF -> Formula; + + // Falsity + FalseF : Sort; + FalseF/formula : FalseF -> Formula; + + // Equality t1 = t2 + EqF : Sort; + EqF/formula : EqF -> Formula; + EqF/lhs : EqF -> Term; + EqF/rhs : EqF -> Term; + + // Conjunction (n-ary via arms) + ConjF : Sort; + ConjF/formula : ConjF -> Formula; + + ConjArm : Sort; + ConjArm/conj : ConjArm -> ConjF; + ConjArm/child : ConjArm -> Formula; + + // Disjunction (n-ary via arms) + DisjF : Sort; + DisjF/formula : DisjF -> Formula; + + DisjArm : Sort; + DisjArm/disj : DisjArm -> DisjF; + DisjArm/child : DisjArm -> Formula; + + // Existential quantification + ExistsF : Sort; + ExistsF/formula : ExistsF -> Formula; + ExistsF/binder : ExistsF -> Binder; // introduces this binder + ExistsF/body : ExistsF -> Formula; + + // ============================================================ + // SEQUENTS (axioms) + // ============================================================ + + Sequent : Sort; + Sequent/theory : Sequent -> Theory; + Sequent/premise : Sequent -> Formula; + Sequent/conclusion : Sequent -> Formula; + + // Context variables (universal quantification at sequent level) + CtxVar : Sort; + CtxVar/sequent : CtxVar -> Sequent; + CtxVar/binder : CtxVar -> Binder; // introduces this binder + + // ============================================================ + // NODE UNIVERSE (for ancestry/scoping) + // ============================================================ + // Unified sort for tracking parent-child in formula trees + + Node : Sort; + Term/node : Term -> Node; + Formula/node : Formula -> Node; + + // ============================================================ + // CHILD RELATION + // ============================================================ + // child(p, c) means c is an immediate child of p in the AST + + child : [parent: Node, child: Node] -> Prop; + + // ============================================================ + // ANCESTOR RELATION (transitive closure of child) + // ============================================================ + + ancestor : [anc: Node, desc: Node] -> Prop; + + // Datalog-style transitive closure axioms + ax/anc/base : forall p : Node, c : Node. + [parent: p, child: c] child |- [anc: p, desc: c] ancestor; + + ax/anc/step : forall a : Node, p : Node, c : Node. + [anc: a, desc: p] ancestor, [parent: p, child: c] child |- [anc: a, desc: c] ancestor; + + // ============================================================ + // CHILD AXIOMS (populate child from structure) + // ============================================================ + + // EqF children + ax/child/eq/lhs : forall e : EqF, t : Term. + e EqF/lhs = t |- [parent: e EqF/formula Formula/node, child: t Term/node] child; + ax/child/eq/rhs : forall e : EqF, t : Term. + e EqF/rhs = t |- [parent: e EqF/formula Formula/node, child: t Term/node] child; + + // ExistsF body + ax/child/exists : forall e : ExistsF, f : Formula. + e ExistsF/body = f |- [parent: e ExistsF/formula Formula/node, child: f Formula/node] child; + + // ConjF arms + ax/child/conj : forall a : ConjArm, c : ConjF, f : Formula. + a ConjArm/conj = c, a ConjArm/child = f |- + [parent: c ConjF/formula Formula/node, child: f Formula/node] child; + + // DisjF arms + ax/child/disj : forall a : DisjArm, d : DisjF, f : Formula. + a DisjArm/disj = d, a DisjArm/child = f |- + [parent: d DisjF/formula Formula/node, child: f Formula/node] child; + + // RelF argument + ax/child/rel : forall r : RelF, t : Term. + r RelF/arg = t |- [parent: r RelF/formula Formula/node, child: t Term/node] child; + + // AppT argument + ax/child/app : forall a : AppT, t : Term. + a AppT/arg = t |- [parent: a AppT/term Term/node, child: t Term/node] child; + + // ProjT base + ax/child/proj : forall p : ProjT, t : Term. + p ProjT/base = t |- [parent: p ProjT/term Term/node, child: t Term/node] child; + + // RecEntry value + ax/child/rec : forall e : RecEntry, r : RecordT, t : Term. + e RecEntry/record = r, e RecEntry/val = t |- + [parent: r RecordT/term Term/node, child: t Term/node] child; + + // ============================================================ + // IN-SEQUENT RELATION (for context variable scoping) + // ============================================================ + + in_seq : [node: Node, seq: Sequent] -> Prop; + + ax/in_seq/premise : forall s : Sequent, f : Formula. + s Sequent/premise = f |- [node: f Formula/node, seq: s] in_seq; + + ax/in_seq/conclusion : forall s : Sequent, f : Formula. + s Sequent/conclusion = f |- [node: f Formula/node, seq: s] in_seq; + + ax/in_seq/desc : forall n : Node, m : Node, s : Sequent. + [node: n, seq: s] in_seq, [anc: n, desc: m] ancestor |- [node: m, seq: s] in_seq; + + // ============================================================ + // BINDING WELL-FORMEDNESS CONSTRAINTS + // ============================================================ + // These axioms ensure variables point to valid binders. + // An instance satisfies these iff scoping is correct. + + // Exists-bound: binder's exists must be an ancestor of the var + ax/wf/exists : forall v : VarT, b : Binder, e : ExistsF. + v VarT/binder = b, e ExistsF/binder = b |- + [anc: e ExistsF/formula Formula/node, desc: v VarT/term Term/node] ancestor; + + // Context-bound: var must be in the same sequent as the ctx var + ax/wf/ctx : forall v : VarT, b : Binder, cv : CtxVar, s : Sequent. + v VarT/binder = b, cv CtxVar/binder = b, cv CtxVar/sequent = s |- + [node: v VarT/term Term/node, seq: s] in_seq; + + // ============================================================ + // COMMITS (version control checkpoints) + // ============================================================ + // Commits form a DAG. Each commit represents a point-in-time + // snapshot of all name bindings. + + Commit : Sort; + Commit/parent : Commit -> Commit; // previous commit (optional for initial) + // Note: For merge commits, we'd need a relation for multiple parents + + // ============================================================ + // NAME BINDINGS (mutable pointers via append-only log) + // ============================================================ + // A NameBinding records that, as of a given commit, a name + // points to a specific theory or instance version. + // + // Names are strings stored in NamingIndex (by UUID). + // The "current" binding for a name is the most recent one + // reachable from HEAD commit. + + NameBinding : Sort; + NameBinding/commit : NameBinding -> Commit; // when this binding was made + + // What the name points to (exactly one of these is defined): + NameBinding/theory : NameBinding -> Theory; + NameBinding/instance : NameBinding -> Instance; + + // ============================================================ + // INSTANCES (immutable, patch-based versioning) + // ============================================================ + // An Instance is an immutable snapshot. "Modifying" an instance + // creates a new Instance with parent pointer and delta. + // + // To materialize: chase parent chain, union additions, apply retractions. + + Instance : Sort; + Instance/parent : Instance -> Instance; // base version (optional for v0) + Instance/theory : Instance -> Theory; // which theory this instantiates + + // ============================================================ + // INSTANCE ELEMENTS (delta: additions) + // ============================================================ + // Elements added in a specific instance version. + // The actual UUID is tracked via the element's Luid in Universe. + + Elem : Sort; + Elem/instance : Elem -> Instance; // which version introduced this + Elem/sort : Elem -> Srt; // which sort of the theory + + // ============================================================ + // ELEMENT RETRACTIONS (delta: tombstones) + // ============================================================ + // Marks an element as retracted in a specific version. + // The element still exists in the log, but is filtered from materialized view. + + ElemRetract : Sort; + ElemRetract/instance : ElemRetract -> Instance; // which version retracted + ElemRetract/elem : ElemRetract -> Elem; // what was retracted + + // ============================================================ + // FUNCTION VALUES (delta: additions) + // ============================================================ + // Records a function value: func(arg) = result + + FuncVal : Sort; + FuncVal/instance : FuncVal -> Instance; // which version defined this + FuncVal/func : FuncVal -> Func; // which function + FuncVal/arg : FuncVal -> Elem; // domain element (or product elem) + FuncVal/result : FuncVal -> Elem; // codomain element + + // NOTE: No FuncValRetract sort - function values are IMMUTABLE. + // To "change" a function value, retract the domain element and create a new one. + // This ensures the Monotonic Submodel Property (see incremental_index_design.md). + + // ============================================================ + // RELATION TUPLES (delta: additions) + // ============================================================ + // Records a relation tuple: rel(args...) holds + // + // All relations use product-domain encoding uniformly (even unary). + // Each tuple has RelTupleArg entries for each position in the domain. + + RelTuple : Sort; + RelTuple/instance : RelTuple -> Instance; // which version asserted this + RelTuple/rel : RelTuple -> Rel; // which relation + + // Relation tuple argument components (one per domain field) + RelTupleArg : Sort; + RelTupleArg/tuple : RelTupleArg -> RelTuple; // which tuple this belongs to + RelTupleArg/elem : RelTupleArg -> Elem; // element value for this position + RelTupleArg/position : RelTupleArg -> Field; // which field of the domain product + + // NOTE: No RelTupleRetract sort - relation tuples are IMMUTABLE. + // Relations are boolean-valued functions: R(a,b) is defined at element creation time. + // To "change" a relation value, retract the involved elements and create new ones. + // This ensures the Monotonic Submodel Property (see incremental_index_design.md). + + // ============================================================ + // THEORY VERSIONING (same pattern as instances) + // ============================================================ + // Theories are also immutable and patch-based. + // Theory/parent allows incremental theory extension. + + Theory/parent : Theory -> Theory; // base version (optional for v0) + + // Theory element retractions (for removing sorts/funcs/rels) + SrtRetract : Sort; + SrtRetract/theory : SrtRetract -> Theory; + SrtRetract/srt : SrtRetract -> Srt; + + FuncRetract : Sort; + FuncRetract/theory : FuncRetract -> Theory; + FuncRetract/func : FuncRetract -> Func; + + RelRetract : Sort; + RelRetract/theory : RelRetract -> Theory; + RelRetract/rel : RelRetract -> Rel; + + SequentRetract : Sort; + SequentRetract/theory : SequentRetract -> Theory; + SequentRetract/sequent : SequentRetract -> Sequent; +} diff --git a/theories/RelAlgIR.geolog b/theories/RelAlgIR.geolog new file mode 100644 index 0000000..c54e7b2 --- /dev/null +++ b/theories/RelAlgIR.geolog @@ -0,0 +1,592 @@ +// RelAlgIR: String Diagram IR for Relational Algebra +// +// Query plans are instances of this theory. The string diagram structure: +// - Wire elements are edges (carrying typed data streams) +// - Op elements are boxes (transforming data) +// - Composition is implicit via wire sharing (same Wire as output of one Op and input of another) +// - Cycles are allowed; well-formedness axioms ensure they contain delays +// +// See loose_thoughts/2026-01-19_19:45_relalg_ir_design.md for full design. +// +// This theory extends GeologMeta to get Srt, Func, Elem, etc. +// References use qualified names: GeologMeta/Srt, GeologMeta/Func, etc. + +theory RelAlgIR extends GeologMeta { + + // ============================================================ + // SCHEMAS (types of data on wires) + // ============================================================ + // Schemas describe the "shape" of tuples flowing on a wire. + // They mirror DSort but are specific to the relational algebra context. + + Schema : Sort; + + // Unit schema: empty tuple (for sources with no input) + UnitSchema : Sort; + UnitSchema/schema : UnitSchema -> Schema; + + // Base schema: single column of a given sort + BaseSchema : Sort; + BaseSchema/schema : BaseSchema -> Schema; + BaseSchema/srt : BaseSchema -> GeologMeta/Srt; + + // Product schema: S ⊗ T (concatenation of columns) + ProdSchema : Sort; + ProdSchema/schema : ProdSchema -> Schema; + ProdSchema/left : ProdSchema -> Schema; + ProdSchema/right : ProdSchema -> Schema; + + // ============================================================ + // WIRES (edges in the string diagram) + // ============================================================ + // Wires are first-class citizens. Each wire carries a stream of + // tuples with a given schema. Composition is encoded by the same + // Wire appearing as output of one Op and input of another. + + Wire : Sort; + Wire/schema : Wire -> Schema; + + // ============================================================ + // OPERATIONS (boxes in the string diagram) + // ============================================================ + + Op : Sort; + + // ------------------------------------------------------------ + // Sources (no input wires) + // ------------------------------------------------------------ + + // Scan: emit all elements of a sort + // () → BaseSchema(srt) + ScanOp : Sort; + ScanOp/op : ScanOp -> Op; + ScanOp/srt : ScanOp -> GeologMeta/Srt; + ScanOp/out : ScanOp -> Wire; + + // Constant: emit a single known element + // () → BaseSchema(elem's sort) + ConstOp : Sort; + ConstOp/op : ConstOp -> Op; + ConstOp/elem : ConstOp -> GeologMeta/Elem; + ConstOp/out : ConstOp -> Wire; + + // Empty: emit nothing (identity for union) + // () → S + EmptyOp : Sort; + EmptyOp/op : EmptyOp -> Op; + EmptyOp/out : EmptyOp -> Wire; + + // ------------------------------------------------------------ + // Unary operations (one input wire, one output wire) + // ------------------------------------------------------------ + + // Filter: keep tuples satisfying a predicate + // S → S + FilterOp : Sort; + FilterOp/op : FilterOp -> Op; + FilterOp/in : FilterOp -> Wire; + FilterOp/out : FilterOp -> Wire; + FilterOp/pred : FilterOp -> Pred; + + // Project: select and reorder columns + // S → T + ProjectOp : Sort; + ProjectOp/op : ProjectOp -> Op; + ProjectOp/in : ProjectOp -> Wire; + ProjectOp/out : ProjectOp -> Wire; + ProjectOp/mapping : ProjectOp -> ProjMapping; + + // Distinct: deduplicate tuples (collapse multiplicities to 0/1) + // S → S + DistinctOp : Sort; + DistinctOp/op : DistinctOp -> Op; + DistinctOp/in : DistinctOp -> Wire; + DistinctOp/out : DistinctOp -> Wire; + + // Negate: flip multiplicities (for computing differences) + // S → S + NegateOp : Sort; + NegateOp/op : NegateOp -> Op; + NegateOp/in : NegateOp -> Wire; + NegateOp/out : NegateOp -> Wire; + + // Apply function: add a column by applying a function + // S → S ⊗ BaseSchema(cod) + ApplyOp : Sort; + ApplyOp/op : ApplyOp -> Op; + ApplyOp/in : ApplyOp -> Wire; + ApplyOp/out : ApplyOp -> Wire; + ApplyOp/func : ApplyOp -> GeologMeta/Func; + ApplyOp/arg_col : ApplyOp -> ColRef; + + // ------------------------------------------------------------ + // Binary operations (two input wires, one output wire) + // ------------------------------------------------------------ + + // Join: combine tuples from two sources where condition holds + // S × T → S ⊗ T (filtered) + JoinOp : Sort; + JoinOp/op : JoinOp -> Op; + JoinOp/left_in : JoinOp -> Wire; + JoinOp/right_in : JoinOp -> Wire; + JoinOp/out : JoinOp -> Wire; + JoinOp/cond : JoinOp -> JoinCond; + + // Union: combine tuples from two sources (Z-set addition) + // S × S → S + UnionOp : Sort; + UnionOp/op : UnionOp -> Op; + UnionOp/left_in : UnionOp -> Wire; + UnionOp/right_in : UnionOp -> Wire; + UnionOp/out : UnionOp -> Wire; + + // ------------------------------------------------------------ + // DBSP Temporal Operators + // ------------------------------------------------------------ + // These operate on streams over discrete time. + // They are essential for incremental computation and feedback loops. + + // Delay: z⁻¹, output at time t is input at time t-1 + // S → S + // IMPORTANT: Delays break instantaneous cycles, making feedback well-founded. + DelayOp : Sort; + DelayOp/op : DelayOp -> Op; + DelayOp/in : DelayOp -> Wire; + DelayOp/out : DelayOp -> Wire; + + // Differentiate: δ = 1 - z⁻¹, compute changes since last timestep + // S → S (output is the delta/diff of input) + DiffOp : Sort; + DiffOp/op : DiffOp -> Op; + DiffOp/in : DiffOp -> Wire; + DiffOp/out : DiffOp -> Wire; + + // Integrate: ∫ = Σ, accumulate all inputs over time + // S → S (output is running sum of all inputs) + // NOTE: Has implicit delay semantics, also breaks instantaneous cycles. + IntegrateOp : Sort; + IntegrateOp/op : IntegrateOp -> Op; + IntegrateOp/in : IntegrateOp -> Wire; + IntegrateOp/out : IntegrateOp -> Wire; + + // ============================================================ + // PREDICATES (for filter conditions) + // ============================================================ + + Pred : Sort; + + // True: always satisfied + TruePred : Sort; + TruePred/pred : TruePred -> Pred; + + // False: never satisfied + FalsePred : Sort; + FalsePred/pred : FalsePred -> Pred; + + // Column equality: col_i = col_j + ColEqPred : Sort; + ColEqPred/pred : ColEqPred -> Pred; + ColEqPred/left : ColEqPred -> ColRef; + ColEqPred/right : ColEqPred -> ColRef; + + // Constant equality: col = constant element + ConstEqPred : Sort; + ConstEqPred/pred : ConstEqPred -> Pred; + ConstEqPred/col : ConstEqPred -> ColRef; + ConstEqPred/val : ConstEqPred -> GeologMeta/Elem; + + // Function result equality: f(col_arg) = col_result + FuncEqPred : Sort; + FuncEqPred/pred : FuncEqPred -> Pred; + FuncEqPred/func : FuncEqPred -> GeologMeta/Func; + FuncEqPred/arg : FuncEqPred -> ColRef; + FuncEqPred/result : FuncEqPred -> ColRef; + + // Function result equals constant: f(col_arg) = expected_elem + FuncConstEqPred : Sort; + FuncConstEqPred/pred : FuncConstEqPred -> Pred; + FuncConstEqPred/func : FuncConstEqPred -> GeologMeta/Func; + FuncConstEqPred/arg : FuncConstEqPred -> ColRef; + FuncConstEqPred/expected : FuncConstEqPred -> GeologMeta/Elem; + + // Conjunction: p ∧ q + AndPred : Sort; + AndPred/pred : AndPred -> Pred; + AndPred/left : AndPred -> Pred; + AndPred/right : AndPred -> Pred; + + // Disjunction: p ∨ q + OrPred : Sort; + OrPred/pred : OrPred -> Pred; + OrPred/left : OrPred -> Pred; + OrPred/right : OrPred -> Pred; + + // ============================================================ + // JOIN CONDITIONS + // ============================================================ + + JoinCond : Sort; + + // Equijoin: left.col_i = right.col_j + EquiJoinCond : Sort; + EquiJoinCond/cond : EquiJoinCond -> JoinCond; + EquiJoinCond/left_col : EquiJoinCond -> ColRef; + EquiJoinCond/right_col : EquiJoinCond -> ColRef; + + // Cross join: cartesian product (no condition) + CrossJoinCond : Sort; + CrossJoinCond/cond : CrossJoinCond -> JoinCond; + + // General predicate join + PredJoinCond : Sort; + PredJoinCond/cond : PredJoinCond -> JoinCond; + PredJoinCond/pred : PredJoinCond -> Pred; + + // ============================================================ + // COLUMN REFERENCES + // ============================================================ + // References to specific columns within a schema. + // Used in predicates and projections. + + ColRef : Sort; + ColRef/wire : ColRef -> Wire; // which wire's schema we're referencing + ColRef/path : ColRef -> ColPath; // path into the schema + + // Column path: navigate into nested product schemas + ColPath : Sort; + + // Here: we're at the target + HerePath : Sort; + HerePath/path : HerePath -> ColPath; + + // Left: descend into left of product + LeftPath : Sort; + LeftPath/path : LeftPath -> ColPath; + LeftPath/rest : LeftPath -> ColPath; + + // Right: descend into right of product + RightPath : Sort; + RightPath/path : RightPath -> ColPath; + RightPath/rest : RightPath -> ColPath; + + // ============================================================ + // PROJECTION MAPPINGS + // ============================================================ + // Specifies how to construct output columns from input columns. + + ProjMapping : Sort; + + // Projection entries (which input columns become which output columns) + ProjEntry : Sort; + ProjEntry/mapping : ProjEntry -> ProjMapping; + ProjEntry/source : ProjEntry -> ColRef; + ProjEntry/target_path : ProjEntry -> ColPath; + + // ============================================================ + // REACHABILITY RELATIONS (for cycle analysis) + // ============================================================ + + // w1 reaches w2 via some path through operations + reaches : [from: Wire, to: Wire] -> Prop; + + // Reachability through each operation type + ax/reaches/scan : forall s : ScanOp, w : Wire. + s ScanOp/out = w |- [from: w, to: w] reaches; // trivial self-reach for source + + ax/reaches/filter : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/project : forall p : ProjectOp, w1 : Wire, w2 : Wire. + p ProjectOp/in = w1, p ProjectOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/distinct : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/negate : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/apply : forall a : ApplyOp, w1 : Wire, w2 : Wire. + a ApplyOp/in = w1, a ApplyOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/join/left : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/left_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/join/right : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/right_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/union/left : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/left_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/union/right : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/right_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/delay : forall d : DelayOp, w1 : Wire, w2 : Wire. + d DelayOp/in = w1, d DelayOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/diff : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/integrate : forall i : IntegrateOp, w1 : Wire, w2 : Wire. + i IntegrateOp/in = w1, i IntegrateOp/out = w2 |- [from: w1, to: w2] reaches; + + // Transitive closure + ax/reaches/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [from: w1, to: w2] reaches, [from: w2, to: w3] reaches |- + [from: w1, to: w3] reaches; + + // ============================================================ + // INSTANTANEOUS REACHABILITY (paths without delay) + // ============================================================ + // This relation tracks paths that do NOT go through DelayOp or IntegrateOp. + // Used to detect "bad" feedback loops that would require instantaneous computation. + + reaches_instant : [from: Wire, to: Wire] -> Prop; + + // Same axioms as reaches, EXCEPT for DelayOp and IntegrateOp + ax/reaches_instant/filter : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/project : forall p : ProjectOp, w1 : Wire, w2 : Wire. + p ProjectOp/in = w1, p ProjectOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/distinct : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/negate : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/apply : forall a : ApplyOp, w1 : Wire, w2 : Wire. + a ApplyOp/in = w1, a ApplyOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/join/left : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/left_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/join/right : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/right_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/union/left : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/left_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/union/right : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/right_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + // NOTE: No axioms for DelayOp or IntegrateOp! + // They break instantaneous reachability. + + // DiffOp is instantaneous (it uses delay internally but outputs immediately) + ax/reaches_instant/diff : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + // Transitive closure + ax/reaches_instant/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [from: w1, to: w2] reaches_instant, [from: w2, to: w3] reaches_instant |- + [from: w1, to: w3] reaches_instant; + + // ============================================================ + // WELL-FORMEDNESS: NO INSTANTANEOUS CYCLES + // ============================================================ + // Every cycle must contain at least one DelayOp or IntegrateOp. + // This ensures feedback loops are computable via iteration. + + ax/wf/no_instant_cycle : forall w : Wire. + [from: w, to: w] reaches_instant |- false; + + // ============================================================ + // WELL-FORMEDNESS: SCHEMA CONSISTENCY + // ============================================================ + // Operations must connect wires with compatible schemas. + + // Filter preserves schema + ax/wf/filter_schema : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Distinct preserves schema + ax/wf/distinct_schema : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Negate preserves schema + ax/wf/negate_schema : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Delay preserves schema + ax/wf/delay_schema : forall d : DelayOp, w1 : Wire, w2 : Wire. + d DelayOp/in = w1, d DelayOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Diff preserves schema + ax/wf/diff_schema : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Integrate preserves schema + ax/wf/integrate_schema : forall i : IntegrateOp, w1 : Wire, w2 : Wire. + i IntegrateOp/in = w1, i IntegrateOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Union requires same schema on both inputs + ax/wf/union_schema_left : forall u : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wo |- + wl Wire/schema = wo Wire/schema; + + ax/wf/union_schema_right : forall u : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wo |- + wr Wire/schema = wo Wire/schema; + + // Scan output schema must be BaseSchema of the scanned sort + // (This requires existential in conclusion, which geometric logic supports) + ax/wf/scan_schema : forall s : ScanOp, srt : GeologMeta/Srt, w : Wire. + s ScanOp/srt = srt, s ScanOp/out = w |- + exists bs : BaseSchema. bs BaseSchema/srt = srt, w Wire/schema = bs BaseSchema/schema; + + // Join output schema is product of input schemas + ax/wf/join_schema : forall j : JoinOp, wl : Wire, wr : Wire, wo : Wire. + j JoinOp/left_in = wl, j JoinOp/right_in = wr, j JoinOp/out = wo |- + exists ps : ProdSchema. + ps ProdSchema/left = wl Wire/schema, + ps ProdSchema/right = wr Wire/schema, + wo Wire/schema = ps ProdSchema/schema; + + // ============================================================ + // ALGEBRAIC LAWS (for query optimization) + // ============================================================ + // These axioms express equivalences between query plans. + // An optimizer uses these to transform plans into more efficient forms. + // + // Notation: We describe semantic equivalence between operators. + // In practice, equivalence means the output wire produces the same Z-set. + // + // These are stated as properties (Prop-valued relations) rather than + // equational axioms, since geolog's geometric logic doesn't have + // built-in equality on terms. The optimizer interprets these as rewrite rules. + + // Wire equivalence: two wires produce the same Z-set + equiv : [a: Wire, b: Wire] -> Prop; + + // Reflexivity + ax/equiv/refl : forall w : Wire. + |- [a: w, b: w] equiv; + + // Symmetry + ax/equiv/sym : forall w1 : Wire, w2 : Wire. + [a: w1, b: w2] equiv |- [a: w2, b: w1] equiv; + + // Transitivity + ax/equiv/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [a: w1, b: w2] equiv, [a: w2, b: w3] equiv |- [a: w1, b: w3] equiv; + + // ------------------------------------------------------------ + // Filter Laws + // ------------------------------------------------------------ + + // Filter(True, x) ≡ x + ax/filter_true : forall f : FilterOp, t : TruePred, wi : Wire, wo : Wire. + f FilterOp/pred = t TruePred/pred, f FilterOp/in = wi, f FilterOp/out = wo |- + [a: wo, b: wi] equiv; + + // Filter(False, x) ≡ Empty + // (Every tuple is filtered out, result is empty) + // This would need EmptyOp with matching schema; omitted for simplicity. + + // Filter-Filter Fusion: Filter(p, Filter(q, x)) ≡ Filter(p ∧ q, x) + // Expressed as: If f2.in = f1.out, then there exists a fused filter. + ax/filter_fusion : forall f1 : FilterOp, f2 : FilterOp, + p1 : Pred, p2 : Pred, + wi : Wire, wm : Wire, wo : Wire. + f1 FilterOp/in = wi, f1 FilterOp/out = wm, f1 FilterOp/pred = p1, + f2 FilterOp/in = wm, f2 FilterOp/out = wo, f2 FilterOp/pred = p2 |- + exists f3 : FilterOp, pa : AndPred. + pa AndPred/left = p1, pa AndPred/right = p2, + f3 FilterOp/in = wi, f3 FilterOp/pred = pa AndPred/pred, + [a: wo, b: f3 FilterOp/out] equiv; + + // ------------------------------------------------------------ + // Distinct Laws + // ------------------------------------------------------------ + + // Distinct is idempotent: Distinct(Distinct(x)) ≡ Distinct(x) + ax/distinct_idem : forall d1 : DistinctOp, d2 : DistinctOp, + wi : Wire, wm : Wire, wo : Wire. + d1 DistinctOp/in = wi, d1 DistinctOp/out = wm, + d2 DistinctOp/in = wm, d2 DistinctOp/out = wo |- + [a: wo, b: wm] equiv; + + // ------------------------------------------------------------ + // Union Laws + // ------------------------------------------------------------ + + // Union is commutative: Union(x, y) ≡ Union(y, x) + ax/union_comm : forall u1 : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u1 UnionOp/left_in = wl, u1 UnionOp/right_in = wr, u1 UnionOp/out = wo |- + exists u2 : UnionOp. + u2 UnionOp/left_in = wr, u2 UnionOp/right_in = wl, + [a: wo, b: u2 UnionOp/out] equiv; + + // Union is associative: Union(x, Union(y, z)) ≡ Union(Union(x, y), z) + ax/union_assoc : forall u1 : UnionOp, u2 : UnionOp, + wa : Wire, wb : Wire, wc : Wire, wyz : Wire, wo : Wire. + u2 UnionOp/left_in = wb, u2 UnionOp/right_in = wc, u2 UnionOp/out = wyz, + u1 UnionOp/left_in = wa, u1 UnionOp/right_in = wyz, u1 UnionOp/out = wo |- + exists u3 : UnionOp, u4 : UnionOp, wab : Wire. + u3 UnionOp/left_in = wa, u3 UnionOp/right_in = wb, u3 UnionOp/out = wab, + u4 UnionOp/left_in = wab, u4 UnionOp/right_in = wc, + [a: wo, b: u4 UnionOp/out] equiv; + + // Union with Empty: Union(x, Empty) ≡ x + ax/union_empty_right : forall u : UnionOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, u UnionOp/left_in = wi, u UnionOp/right_in = we, u UnionOp/out = wo |- + [a: wo, b: wi] equiv; + + ax/union_empty_left : forall u : UnionOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, u UnionOp/left_in = we, u UnionOp/right_in = wi, u UnionOp/out = wo |- + [a: wo, b: wi] equiv; + + // ------------------------------------------------------------ + // Negate Laws + // ------------------------------------------------------------ + + // Double negation: Negate(Negate(x)) ≡ x + ax/negate_involution : forall n1 : NegateOp, n2 : NegateOp, + wi : Wire, wm : Wire, wo : Wire. + n1 NegateOp/in = wi, n1 NegateOp/out = wm, + n2 NegateOp/in = wm, n2 NegateOp/out = wo |- + [a: wo, b: wi] equiv; + + // ------------------------------------------------------------ + // Join Laws + // ------------------------------------------------------------ + + // Cross join is commutative (up to column reordering) + // Note: The output schemas differ, so this needs a projection to swap columns. + // Omitted for now as it requires more complex schema manipulation. + + // Join with Empty: Join(x, Empty) ≡ Empty + ax/join_empty_right : forall j : JoinOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, j JoinOp/left_in = wi, j JoinOp/right_in = we, j JoinOp/out = wo |- + exists e2 : EmptyOp. [a: wo, b: e2 EmptyOp/out] equiv; + + ax/join_empty_left : forall j : JoinOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, j JoinOp/left_in = we, j JoinOp/right_in = wi, j JoinOp/out = wo |- + exists e2 : EmptyOp. [a: wo, b: e2 EmptyOp/out] equiv; + + // ------------------------------------------------------------ + // DBSP Laws + // ------------------------------------------------------------ + + // Differentiation is inverse of integration (for streams of changes) + // Diff(Integrate(x)) ≡ x (for Δ-streams) + // Integrate(Diff(x)) ≡ x - x₀ (up to initial value) + // These are more subtle and depend on stream semantics; omitted for now. + + // Delay respects Union: z⁻¹(x ∪ y) ≡ z⁻¹(x) ∪ z⁻¹(y) + ax/delay_union : forall u : UnionOp, d : DelayOp, + wl : Wire, wr : Wire, wu : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wu, + d DelayOp/in = wu, d DelayOp/out = wo |- + exists dl : DelayOp, dr : DelayOp, u2 : UnionOp. + dl DelayOp/in = wl, dr DelayOp/in = wr, + u2 UnionOp/left_in = dl DelayOp/out, u2 UnionOp/right_in = dr DelayOp/out, + [a: wo, b: u2 UnionOp/out] equiv; +}