commit ac2f202594e23e5c4546faaef99dcb46257c17fa Author: Hassan Abedi Date: Thu Feb 26 11:50:51 2026 +0100 The base commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..807d598 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ + +# Use bd merge for beads JSONL files +.beads/issues.jsonl merge=beads diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6183e86 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +target/ +.claude/ +.idea + diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a0b6abf --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1282 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "ariadne" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44055e597c674aef7cb903b2b9f6e4cba1277ed0d2d61dae7cd52d7ffa81f8e2" +dependencies = [ + "unicode-width 0.1.14", + "yansi", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", + "uuid", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytemuck" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", + "stacker", +] + +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "egglog-concurrency" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7462d094fd0d9da45a7bd2c4b09ab530b8935ba060cd15c181d94e480f9add" +dependencies = [ + "arc-swap", + "rayon", +] + +[[package]] +name = "egglog-numeric-id" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f81100cddb02741105fe8c445f0f2dc66919dbf65aab380ff903ff54e458805" +dependencies = [ + "rayon", +] + +[[package]] +name = "egglog-union-find" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c4e41ab6ea1bec16de378bd2acaf374997a02ce7f88ef084f7b00f7d2be9e7b" +dependencies = [ + "crossbeam", + "egglog-concurrency", + "egglog-numeric-id", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "geolog" +version = "0.1.0" +dependencies = [ + "ariadne", + "chumsky", + "egglog-numeric-id", + "egglog-union-find", + "indexmap 2.12.1", + "insta", + "itertools", + "memmap2", + "nonminmax", + "proptest", + "rand", + "rkyv", + "roaring", + "rustyline", + "serde", + "tempfile", + "tinyvec", + "toml", + "uuid", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "insta" +version = "1.44.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698" +dependencies = [ + "console", + "once_cell", + "similar", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.178" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nonminmax" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41a1ba30985f2c6f9cd55cdf24e9e521ff4aa4b3d238349866e262c338a64c1" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "indexmap 1.9.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "roaring" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "rustyline" +version = "15.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" +dependencies = [ + "bitflags", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width 0.2.2", + "utf8parse", + "windows-sys 0.59.0", +] + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.12.1", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.111", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..337e032 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "geolog" +version = "0.1.0" +edition = "2024" + +[dependencies] +chumsky = "0.9" +ariadne = "0.4" # for nice error reporting +uuid = { version = "1", features = ["v7"] } +roaring = "0.10" +nonminmax = "0.1" +rkyv = { version = "0.7", features = ["validation", "uuid", "indexmap"] } +tinyvec = { version = "1.6", features = ["alloc"] } +indexmap = "2.0" +memmap2 = "0.9" +rustyline = "15" # readline for REPL +toml = "0.8" # workspace.toml parsing +serde = { version = "1", features = ["derive"] } # for toml +egglog-union-find = "1.0" # union-find for congruence closure +egglog-numeric-id = "1.0" # newtype IDs with define_id! macro +itertools = "0.13" # Either type for zero-copy iterators + +[dev-dependencies] +insta = "1.40" # snapshot testing +proptest = "1.4" # property-based testing +rand = "0.9.2" +tempfile = "3.10" # temp dirs for persistence tests + +[[bin]] +name = "geolog" +path = "src/bin/geolog.rs" diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..bed653b --- /dev/null +++ b/NOTES.md @@ -0,0 +1,321 @@ +# Geolog Project Notes + +## Overview + +**Geolog** is a **Geometric Logic REPL** — a type theory with semantics in topoi, designed for formal specifications using geometric logic. + +### Core Capabilities + +- **Geometric logic programming** — encode mathematical structures, relationships, and constraints +- **Database schema definition** — define sorts, functions, relations, and axioms +- **Model/instance creation** — create concrete finite models satisfying theory axioms +- **Automated inference** — chase algorithm for automatic fact derivation +- **Version control** — git-like commits and tracking for instances +- **Persistence** — append-only storage with optional disk persistence + +### Use Cases + +- Business process workflow orchestration +- Formal verification via diagrammatic rewriting +- Database query design +- Petri net reachability and process modeling + +--- + +## Tech Stack + +**Primary Language**: Rust (2021 edition, Cargo-based) + +### Key Dependencies + +| Crate | Version | Purpose | +|-------|---------|---------| +| `chumsky` | 0.9 | Parser combinator library | +| `ariadne` | 0.4 | Error reporting with source spans | +| `rkyv` | 0.7 | Zero-copy serialization | +| `rustyline` | 15 | REPL readline interface | +| `egglog-union-find` | 1.0 | Union-find for congruence closure | +| `roaring` | 0.10 | Bitmap library for sparse relations | +| `indexmap` | 2.0 | Order-preserving hash maps | +| `uuid` | 1 | UUID generation | +| `memmap2` | 0.9 | Memory-mapped file I/O | + +### Testing Frameworks + +- `insta` — snapshot testing +- `proptest` — property-based testing +- `tempfile` — temporary directory management + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ REPL (interactive CLI) | Batch file loading │ +├─────────────────────────────────────────────────────┤ +│ PARSING LAYER (Lexer → Parser → AST) │ +│ chumsky-based lexer & parser, source error reporting│ +├─────────────────────────────────────────────────────┤ +│ ELABORATION LAYER (AST → Core IR) │ +│ Type checking, name resolution, theory/instance │ +├─────────────────────────────────────────────────────┤ +│ CORE LAYER (Typed Representation) │ +│ Signature, Term, Formula, Structure, ElaboratedTheory│ +├─────────────────────────────────────────────────────┤ +│ STORAGE LAYER (Persistence) │ +│ Append-only GeologMeta store with version control │ +├─────────────────────────────────────────────────────┤ +│ QUERY & SOLVER LAYER (Execution) │ +│ Chase algorithm, congruence closure, relational │ +│ algebra compiler, SMT-style model enumeration │ +├─────────────────────────────────────────────────────┤ +│ TENSOR ALGEBRA (Axiom Checking) │ +│ Sparse tensor evaluation for axiom validation │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Directory Structure + +| Path | Purpose | +|------|---------| +| `src/bin/geolog.rs` | CLI entry point | +| `src/lib.rs` | Library root, exports `parse()` | +| `src/repl.rs` | Interactive REPL state machine | +| `src/lexer.rs` | Tokenization using chumsky | +| `src/parser.rs` | Token stream → AST | +| `src/ast.rs` | Abstract syntax tree types | +| `src/core.rs` | Core IR: Signature, Term, Formula, Structure | +| `src/elaborate/` | AST → Core elaboration | +| `src/store/` | Persistence layer (append-only) | +| `src/query/` | Chase algorithm, relational algebra | +| `src/solver/` | SMT-style model enumeration | +| `src/tensor/` | Sparse tensor algebra for axiom checking | +| `src/cc.rs` | Congruence closure (union-find) | +| `src/id.rs` | Luid/Slid identity system | +| `src/universe.rs` | Global element registry | +| `examples/geolog/` | 30+ example `.geolog` files | +| `tests/` | 25+ test files | +| `docs/` | ARCHITECTURE.md, SYNTAX.md | +| `proofs/` | Lean4 formalization | +| `fuzz/` | Fuzzing targets | + +--- + +## Main Components + +### Parsing & Syntax (~1,200 lines) + +- `lexer.rs` — tokenization +- `parser.rs` — token stream → AST +- `ast.rs` — AST types (Theory, Instance, Axiom, etc.) +- `error.rs` — error formatting with source spans +- `pretty.rs` — Core → Geolog source roundtrip printing + +### Elaboration (~2,200 lines) + +- `elaborate/mod.rs` — coordination +- `elaborate/theory.rs` — AST Theory → Core ElaboratedTheory +- `elaborate/instance.rs` — AST Instance → Core Structure +- `elaborate/env.rs` — environment with theory registry +- `elaborate/types.rs` — type expression evaluation +- `elaborate/error.rs` — type error reporting + +### Core Representation + +- `core.rs` — DerivedSort, Signature, Structure, Formula, Term, Sequent +- `id.rs` — Luid (global unique ID) and Slid (structure-local ID) +- `universe.rs` — global element registry with UUID ↔ Luid mapping +- `naming.rs` — bidirectional name ↔ Luid mapping + +### Storage Layer (~1,500 lines) + +- `store/mod.rs` — main Store struct +- `store/schema.rs` — cached sort/function/relation IDs +- `store/append.rs` — low-level element append operations +- `store/theory.rs` — theory CRUD +- `store/instance.rs` — instance CRUD +- `store/commit.rs` — git-like version control +- `store/materialize.rs` — indexed views for fast lookups + +### Query & Compilation (~3,500 lines) + +- `query/compile.rs` — Query → RelAlgIR plan compilation +- `query/to_relalg.rs` — Query → Relational Algebra IR +- `query/from_relalg.rs` — RelAlgIR → Executable QueryOp +- `query/chase.rs` — chase algorithm for fixpoint computation +- `query/backend.rs` — naive QueryOp executor +- `query/optimize.rs` — algebraic law rewriting + +### Solver & Model Enumeration (~1,300 lines) + +- `solver/mod.rs` — unified model enumeration API +- `solver/tree.rs` — explicit search tree for partial models +- `solver/tactics.rs` — automated search strategies: + - CheckTactic: axiom validation + - ForwardChainingTactic: Datalog-style inference + - PropagateEquationsTactic: congruence closure + - AutoTactic: composite fixpoint solver +- `solver/types.rs` — SearchNode, Obligation, NodeStatus types + +### Tensor Algebra (~2,600 lines) + +- `tensor/expr.rs` — lazy tensor expression trees +- `tensor/sparse.rs` — sparse tensor storage (RoaringBitmap-based) +- `tensor/builder.rs` — expression builders +- `tensor/compile.rs` — Formula → TensorExpr compilation +- `tensor/check.rs` — axiom checking via tensor evaluation + +--- + +## Key Entry Points + +1. **CLI**: `src/bin/geolog.rs` + ``` + Usage: geolog [-d ] [source_files...] + ``` + +2. **Parse Entry**: `src/lib.rs` exports `parse(input: &str) → Result` + +3. **REPL State**: `src/repl.rs` — `ReplState::process_line()` + +4. **Theory Elaboration**: `elaborate/theory.rs::elaborate_theory()` + +5. **Instance Elaboration**: `elaborate/instance.rs::elaborate_instance_ctx()` + +6. **Chase Algorithm**: `query/chase.rs::chase_fixpoint_with_cc()` + +7. **Model Enumeration**: `solver/mod.rs::enumerate_models()` + +--- + +## Design Decisions + +### Geometric Logic Foundation + +- **Axioms as Sequents**: `forall vars. premises |- conclusion` +- **Positive Conclusions**: Can have existentials, disjunctions, but never negations +- **Geometric Morphisms**: Preserved by design, enabling category-theoretic semantics + +### Identity System + +- **Luid** ("Local Universe ID"): Globally unique across all structures +- **Slid** ("Structure-Local ID"): Index within a single structure +- Bidirectional mapping enables persistent identity despite structure changes + +### Append-Only Storage + +- **GeologMeta**: Single homoiconic theory instance storing all data +- **Patch-based Versioning**: Each commit is a delta from parent +- **Never Delete**: Elements only tombstoned for perfect audit trails + +### Type System + +- **Postfix Application**: `x f` not `f(x)` — categorical style +- **Derived Sorts**: Products of base sorts for record domains +- **Product Domains**: Functions can take record arguments: `[x: M, y: M] -> M` +- **Relations → Prop**: Relations are functions to `Prop` (boolean predicates) + +### Chase Algorithm + +- **Fixpoint Iteration**: Derives all consequences until closure +- **Congruence Closure Integration**: Merges elements when axioms conclude `x = y` +- **Termination for Unit Laws**: Categories with unit laws no longer loop forever +- Uses tensor algebra for efficient axiom checking + +### Solver Architecture + +- **Explicit Search Tree**: Not implicit in call stack (AI-friendly for agent control) +- **Refinement Preorder**: Structures can grow (carriers, functions, relations) +- **Obligations vs Unsat**: Axiom obligation = need to witness conclusion (NOT failure) +- **True Unsat**: Only when deriving `⊢ False` from instantiated axioms +- **Tactics-based**: AutoTactic composes multiple tactics + +### Relational Algebra Compilation + +- **QueryOp Intermediate**: SQL-like operators (Scan, Filter, Join, Project, etc.) +- **Optimization Passes**: Filter fusion, projection pushdown +- **Store-aware**: Compiled directly to GeologMeta queries with indexing + +### Tensor Algebra for Axiom Checking + +- **Sparse Representation**: Roaring Bitmaps for efficient membership +- **Lazy Expression Trees**: Tensor products fused with contractions +- **Boolean Semiring**: AND for product, OR for sum + +--- + +## REPL Commands + +``` +:list, :inspect - Introspection +:add, :assert, :retract - Mutations +:query, :explain, :compile - Query analysis +:chase, :solve, :extend - Inference +:commit, :history - Version control +:source - Load programs +:help - Show help +``` + +--- + +## Parameterized Theories + +Theories can be parameterized by other instances: + +```geolog +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} +``` + +This enables rich type-theoretic modeling (e.g., Petri net reachability as dependent types). + +--- + +## Testing Infrastructure + +- **Property-based tests** (`proptest`): naming, overlay, patches, queries, structure, tensor, universe, solver +- **Unit tests**: parsing, elaboration, meta, pretty-printing, relations, version control, workspace +- **Integration tests**: 30+ `.geolog` example files +- **Fuzzing**: `fuzz/` directory with parser and REPL fuzzing targets + +--- + +## Project Status + +**Version**: 0.1.0 (Early production) + +### Completed + +- Core geometric logic implementation +- Parser, elaborator, and core IR +- Chase algorithm with equality saturation +- Solver with SMT-like model enumeration +- Persistence and version control +- Comprehensive test coverage + +### Active Development + +- Nested instance elaboration +- Homoiconic query plan representation +- Disjunction variable alignment for tensor builder +- Lean4 formalization of monotonic submodel proofs + +--- + +## Key Files Reference + +| File | Line Count (approx) | Description | +|------|---------------------|-------------| +| `src/core.rs` | ~800 | Core type definitions | +| `src/parser.rs` | ~600 | Parser implementation | +| `src/repl.rs` | ~1000 | REPL state machine | +| `src/query/chase.rs` | ~500 | Chase algorithm | +| `src/solver/mod.rs` | ~400 | Model enumeration API | +| `src/tensor/sparse.rs` | ~600 | Sparse tensor storage | +| `src/store/mod.rs` | ~400 | Storage coordination | diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee19784 --- /dev/null +++ b/README.md @@ -0,0 +1,1314 @@ +# Geolog + +> This README was synthesized automatically by Claude Opus 4.5. +> As was this entire project, really. + +**Geometric Logic REPL** - A language and runtime for formal specifications using geometric logic. + +Geolog aims to provide a highly customizable, efficient, concurrent, append-only, persistent memory and query infrastructure for everything from business process workflow orchestration to formal verification via diagrammatic rewriting. + +## Quick Start + +```bash +~/dev/geolog$ cargo install --path . + Compiling geolog v0.1.0 (/home/dev/geolog) + Finished release [optimized] target(s) in 12.34s + Installing ~/.cargo/bin/geolog + Installed package `geolog v0.1.0` (executable `geolog`) + +# Session 1: Define a theory +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + reachable : [from: V, to: V] -> Prop; + + ax/edge : forall e : E. |- [from: e src, to: e tgt] reachable; + ax/trans : forall x,y,z : V. + [from: x, to: y] reachable, [from: y, to: z] reachable + |- [from: x, to: z] reachable; +} +Defined theory Graph (2 sorts, 2 functions, 1 relations, 2 axioms) + +geolog> :quit +Goodbye! + +# Session 2: Create an instance with chase (theory auto-persisted!) +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> instance G : Graph = chase { + a, b, c : V; + e1, e2 : E; + e1 src = a; e1 tgt = b; + e2 src = b; e2 tgt = c; +} +Defined instance G : Graph (5 elements) + +geolog> :inspect G +instance G : Graph = { + // V (3): + a : V; + b : V; + c : V; + // E (2): + e1 : E; + e2 : E; + // src: + e1 src = a; + e2 src = b; + // tgt: + e1 tgt = b; + e2 tgt = c; + // reachable (3 tuples): + [from: a, to: b] reachable; + [from: b, to: c] reachable; + [from: a, to: c] reachable; +} + +geolog> :quit +Goodbye! + +# Session 3: Everything persisted automatically! +~/dev/geolog$ geolog -d foo +Workspace: foo +geolog> :list +Theories: + Graph (2 sorts, 2 functions, 1 relations, 2 axioms) +Instances: + G : Graph (5 elements) + +geolog> :inspect G +instance G : Graph = { + // V (3): + a : V; + b : V; + c : V; + // E (2): + e1 : E; + e2 : E; + // src: + e1 src = a; + e2 src = b; + // tgt: + e1 tgt = b; + e2 tgt = c; + // reachable (3 tuples): + [from: a, to: b] reachable; + [from: b, to: c] reachable; + [from: a, to: c] reachable; +} + +# Category theory with equality saturation +~/dev/geolog$ geolog examples/geolog/category.geolog +geolog> :show Arrow +instance Arrow : Category = { + // ob (2): + A : ob; + B : ob; + // mor (3): + f : mor; + #3 : mor; + #4 : mor; + // src: + f src = A; + #3 src = A; + #4 src = B; + // tgt: + f tgt = B; + #3 tgt = A; + #4 tgt = B; + // comp (4 tuples): + [f: f, g: #4, h: f] comp; + [f: #3, g: f, h: f] comp; + [f: #3, g: #3, h: #3] comp; + [f: #4, g: #4, h: #4] comp; + // id (2 tuples): + [a: A, f: #3] id; + [a: B, f: #4] id; +} +``` + +The `Arrow` instance declares only objects A, B and one morphism f : A → B. +The chase derives identity morphisms (#3 = idA, #4 = idB) and all compositions, +while **equality saturation** collapses infinite self-compositions via unit laws: +- `[f: #3, g: f, h: f]` means idA;f = f (left unit) +- `[f: f, g: #4, h: f]` means f;idB = f (right unit) +- `[f: #3, g: #3, h: #3]` means idA;idA = idA (collapsed by unit law) + +## Features + +- **Theories**: Define sorts (types), functions, relations, and axioms +- **Instances**: Create concrete models of theories +- **Parameterized Theories**: Theories can depend on instances of other theories +- **Nested Instances**: Inline instance definitions within instances +- **Relations**: Binary and n-ary predicates with product domains +- **Axioms**: Geometric sequents, automatically checked with tensor algebra +- **Chase Algorithm**: Automatic inference of derived facts +- **Interactive REPL**: Explore and modify instances dynamically +- **Version Control**: Commit and track changes to instances + +--- + +## Showcase: Petri Net Reachability as Dependent Types + +This showcase demonstrates geolog's core capabilities through a non-trivial domain: +encoding Petri net reachability as dependent types. A solution to a reachability +problem is NOT a yes/no boolean but a **constructive witness**: a diagrammatic proof +that tokens can flow from initial to target markings via a sequence of transition firings. + +**Key concepts demonstrated:** +- Parameterized theories (`Marking` depends on `PetriNet` instance) +- Nested instance types (`ReachabilityProblem` contains `Marking` instances) +- Sort-parameterized theories (`Iso` takes two sorts as parameters) +- Cross-instance references (solution's trace elements reference problem's tokens) + +> **Note**: This showcase is tested by `cargo test test_petri_net_showcase` and +> matches `examples/geolog/petri_net_showcase.geolog` exactly. + +### The Type-Theoretic Encoding + +```geolog +// ============================================================ +// THEORY: PetriNet - Places, transitions, and arcs +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace records transition firings and token flow via wires +// ============================================================ + +theory (N : PetriNet instance) Trace { + F : Sort; // Firings + F/of : F -> N/T; // Which transition each firing corresponds to + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence axioms (source/target arcs must match firing transitions) + ax/wire_src_coherent : forall w : W. |- w W/src_arc N/out/src = w W/src_firing F/of; + ax/wire_tgt_coherent : forall w : W. |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + ax/wire_place_coherent : forall w : W. |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals connect initial/target markings to firings + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: every input arc must be fed by a wire or input terminal + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured by a wire or output terminal + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// Isomorphism (bijection) between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A constructive witness that target is reachable from initial +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + // Bijection: input terminals <-> initial marking tokens + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection: output terminals <-> target marking tokens + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; +} +``` + +### Problem 0: Can we reach B from A with one token? + +```geolog +// ============================================================ +// The Petri Net: +// +---[ba]----+ +// v | +// (A) --[ab]->(B) --+ +// | | +// +----[abc]-------+--> (C) +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + // A -> B (via ab) + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + // A + B -> C (via abc) - note: two input arcs! + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// Initial: 1 token in A, Target: 1 token in B +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// SOLUTION 0: Yes! Fire transition 'ab' once. +// ============================================================ + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + f1 : F; + f1 F/of = ExampleNet/ab; + + // Input terminal feeds A-token into f1's ab_in arc + it : input_terminal; + it input_terminal/of = ExampleNet/A; + it input_terminal/tgt_firing = f1; + it input_terminal/tgt_arc = ExampleNet/ab_in; + + // Output terminal captures f1's B-token via ab_out arc + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + ot output_terminal/src_firing = f1; + ot output_terminal/src_arc = ExampleNet/ab_out; + }; + + initial_iso = { + trace/it fwd = problem0/initial_marking/tok; + problem0/initial_marking/tok bwd = trace/it; + }; + + target_iso = { + trace/ot fwd = problem0/target_marking/tok; + problem0/target_marking/tok bwd = trace/ot; + }; +} +``` + +### Problem 2: Can we reach C from two A-tokens? + +This is a more interesting case: the only path to C is via `abc`, which requires +tokens in BOTH A and B simultaneously. Starting with 2 tokens in A, we must +first move one to B, then fire `abc`. + +```geolog +// Initial: 2 tokens in A, Target: 1 token in C +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// SOLUTION 2: Fire 'ab' then 'abc'. +// +// Token flow diagram: +// [it1]--A-->[f1: ab]--B--wire-->[f2: abc]--C-->[ot] +// [it2]--A-----------------^ +// +// Step 1: Fire 'ab' to move one token A -> B +// Step 2: Fire 'abc' consuming one A-token and one B-token +// ============================================================ + +instance solution2 : ExampleNet problem2 Solution = { + trace = { + // Two firings + f1 : F; f1 F/of = ExampleNet/ab; // First: A -> B + f2 : F; f2 F/of = ExampleNet/abc; // Second: A + B -> C + + // Wire connecting f1's B-output to f2's B-input + w1 : W; + w1 W/src_firing = f1; + w1 W/src_arc = ExampleNet/ab_out; + w1 W/tgt_firing = f2; + w1 W/tgt_arc = ExampleNet/abc_in2; + + // Input terminal 1: feeds first A-token into f1 + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + // Input terminal 2: feeds second A-token into f2 + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal: captures f2's C-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + }; + + // Bijection: 2 input terminals <-> 2 initial tokens + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + // Bijection: 1 output terminal <-> 1 target token + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} +``` + +Each `Solution` instance is a **constructive diagrammatic proof**: +- The trace contains firing(s) of specific transitions +- Input terminals witness that initial tokens feed into firings +- Output terminals witness that firings produce target tokens +- The isomorphisms prove the token counts match exactly + +--- + +## Table of Contents + +1. [Basic Concepts](#basic-concepts) +2. [Theory Definitions](#theory-definitions) +3. [Instance Definitions](#instance-definitions) +4. [Relations and Axioms](#relations-and-axioms) +5. [The Chase Algorithm](#the-chase-algorithm) +6. [REPL Commands](#repl-commands) +7. [Complete Examples](#complete-examples) + +--- + +## Basic Concepts + +Geolog is based on **geometric logic**, a fragment of first-order logic that: +- Allows existential quantification in conclusions +- Allows disjunctions in conclusions +- Is preserved by geometric morphisms (structure-preserving maps) + +A **theory** defines: +- **Sorts**: Types of elements +- **Function symbols**: Function-typed variables with domain and codomain derived from sorts +- **Relation symbols**: Predicate-typed variables with domain derived from sorts, and codomain `-> Prop` +- **Axioms**: Geometric sequents (first universal quantifiers, then an implication between two propositions which are then purely positive) + +An **instance** is a concrete finite model, which means it assigns to each sort a finite set, to each function a finite function, and to each relation a Boolean-valued tensor, such that all axioms evaluate to true. + +--- + +## Theory Definitions + +### Simple Theory with Sorts and Functions + +```geolog +// Directed Graph: vertices and edges with source/target functions +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} +``` + +### Theory with Product Domain Functions + +```geolog +// Monoid: a set with an associative binary operation +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element + id : M -> M; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; +} +``` + +### REPL Session: Defining a Theory Inline + +``` +geolog> theory Counter { +...... C : Sort; +...... next : C -> C; +...... } +Defined theory Counter (1 sorts, 1 functions) + +geolog> :inspect Counter +theory Counter { + C : Sort; + next : C -> C; +} +``` + +--- + +## Instance Definitions + +### Basic Instance + +```geolog +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + // Vertices + A : V; + B : V; + C : V; + + // Edges + ab : E; + bc : E; + ca : E; + + // Edge endpoints (function definitions) + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} +``` + +### Instance with Product Domain Functions + +```geolog +// Boolean "And" monoid: {T, F} with T as identity +instance BoolAnd : Monoid = { + T : M; + F : M; + + // Identity: T is the identity element + T id = T; + F id = T; + + // Multiplication table for "and": + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} +``` + +### REPL Session: Loading and Inspecting + +``` +geolog> :source examples/geolog/graph.geolog +Loading examples/geolog/graph.geolog... +Defined theory Graph (2 sorts, 2 functions) + +geolog> :list +Theories: + Graph (2 sorts, 2 functions) +Instances: + Diamond : Graph (8 elements) + Arrow : Graph (3 elements) + Loop : Graph (2 elements) + Triangle : Graph (6 elements) + +geolog> :inspect Triangle +instance Triangle : Graph = { + // V (3): + A : V; + B : V; + C : V; + // E (3): + ab : E; + bc : E; + ca : E; + // src: + ab src = A; + bc src = B; + ca src = C; + // tgt: + ab tgt = B; + bc tgt = C; + ca tgt = A; +} + +geolog> :query Triangle V +Elements of V in Triangle: + A + B + C +``` + +--- + +## Relations and Axioms + +Relations are predicates on sorts, declared with `-> Prop`. + +### Unary Relations + +```geolog +theory TodoList { + Item : Sort; + + // Unary relations use simple arrow syntax + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; +} +``` + +### Binary Relations + +```geolog +theory Preorder { + X : Sort; + + // Binary relation: x ≤ y (field names document the relation) + leq : [lo: X, hi: X] -> Prop; + + // Reflexivity axiom: x ≤ x + ax/refl : forall x : X. + |- [lo: x, hi: x] leq; + + // Transitivity axiom: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [lo: x, hi: y] leq, [lo: y, hi: z] leq |- [lo: x, hi: z] leq; +} +``` + +### Asserting Relation Tuples in Instances + +```geolog +instance SampleTodos : TodoList = { + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Assert unary relation: buy_groceries is completed + buy_groceries completed; + + // Assert unary relation: cook_dinner is high priority + cook_dinner high_priority; + + // Binary relation using mixed positional/named syntax: + // First positional arg maps to 'item' field, named arg for 'on' + [cook_dinner, on: buy_groceries] depends; +} +``` + +### REPL Session: Asserting Relations Dynamically + +``` +geolog> :source examples/geolog/todo_list.geolog +Loading examples/geolog/todo_list.geolog... +Defined theory TodoList (1 sorts, 4 relations) + +geolog> :inspect SampleTodos +instance SampleTodos : TodoList = { + // Item (4): + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + // completed (1 tuples): + [buy_groceries] completed; + // high_priority (1 tuples): + [cook_dinner] high_priority; + // depends (1 tuples): + [cook_dinner, buy_groceries] depends; +} + +geolog> :assert SampleTodos completed cook_dinner +Asserted completed(cook_dinner) in instance 'SampleTodos' + +geolog> :inspect SampleTodos +instance SampleTodos : TodoList = { + // Item (4): + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + // completed (2 tuples): + [buy_groceries] completed; + [cook_dinner] completed; + // high_priority (1 tuples): + [cook_dinner] high_priority; + // depends (1 tuples): + [cook_dinner, buy_groceries] depends; +} +``` + +--- + +## The Chase Algorithm + +The **chase algorithm** computes the closure of an instance under the theory's axioms. It derives all facts that logically follow from the base facts and axioms. + +### Transitive Closure Example + +```geolog +// Graph with reachability (transitive closure) +theory Graph { + V : Sort; + + // Direct edges + Edge : [src: V, tgt: V] -> Prop; + + // Reachability (transitive closure of Edge) + Path : [src: V, tgt: V] -> Prop; + + // Base case: every edge is a path + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Inductive case: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// A linear chain: a -> b -> c -> d +// Using `= chase { ... }` to automatically apply axioms during elaboration. +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + // Initial edges (chase derives Path tuples) + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} +``` + +### REPL Session: Running the Chase + +When using `= chase { ... }` syntax, the chase runs automatically during elaboration: + +``` +geolog> :source examples/geolog/transitive_closure.geolog +Loading examples/geolog/transitive_closure.geolog... +Defined theory Graph (1 sorts, 2 relations) +Defined instance Chain : Graph (4 elements) [chase: 6 Path tuples derived] + +geolog> :inspect Chain +instance Chain : Graph = { + // V (4): + a : V; + b : V; + c : V; + d : V; + // Edge (3 tuples): + [a, b] Edge; + [b, c] Edge; + [c, d] Edge; + // Path (6 tuples): + [a, b] Path; + [b, c] Path; + [c, d] Path; + [a, c] Path; // Derived: a->b + b->c + [b, d] Path; // Derived: b->c + c->d + [a, d] Path; // Derived: a->c + c->d (or a->b + b->d) +} +``` + +You can also run chase manually with `:chase` on non-chase instances: + +``` +geolog> :chase MyInstance +Running chase on instance 'MyInstance' (theory 'Graph')... +✓ Chase completed in 3 iterations (0.15ms) +``` + +The chase derived: +- **3 base paths** from the Edge → Path axiom +- **2 one-step transitive paths**: (a,c) and (b,d) +- **1 two-step transitive path**: (a,d) + +--- + +## REPL Commands + +### General Commands + +| Command | Description | +|---------|-------------| +| `:help [topic]` | Show help (topics: syntax, examples) | +| `:quit` | Exit the REPL | +| `:list [target]` | List theories/instances | +| `:inspect ` | Show details of a theory or instance | +| `:source ` | Load and execute a .geolog file | +| `:clear` | Clear the screen | +| `:reset` | Reset all state | + +### Instance Mutation + +| Command | Description | +|---------|-------------| +| `:add ` | Add element to instance | +| `:assert [args]` | Assert relation tuple | +| `:retract ` | Retract element | + +### Query Commands + +| Command | Description | +|---------|-------------| +| `:query ` | List all elements of a sort | +| `:explain ` | Show query execution plan | +| `:compile ` | Show RelAlgIR compilation | +| `:chase [max_iter]` | Run chase algorithm | + +### Version Control + +| Command | Description | +|---------|-------------| +| `:commit [msg]` | Commit current changes | +| `:history` | Show commit history | + +### Solver + +| Command | Description | +|---------|-------------| +| `:solve [budget_ms]` | Find model of theory | +| `:extend [budget_ms]` | Extend instance to theory | + +### REPL Session: Query Explanation + +``` +geolog> :source examples/geolog/graph.geolog +Loading examples/geolog/graph.geolog... +Defined theory Graph (2 sorts, 2 functions) + +geolog> :explain Triangle V +Query plan for ':query Triangle V': + +Scan(sort=0) + +Sort: V (index 0) +Instance: Triangle (theory: Graph) + +geolog> :explain Triangle E +Query plan for ':query Triangle E': + +Scan(sort=1) + +Sort: E (index 1) +Instance: Triangle (theory: Graph) +``` + +--- + +## Complete Examples + +### Example 1: Directed Graphs + +**File: `examples/geolog/graph.geolog`** + +```geolog +// Directed Graph: vertices and edges with source/target functions +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} + +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + A : V; + B : V; + C : V; + + ab : E; + bc : E; + ca : E; + + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} + +// A self-loop: one vertex with an edge to itself +instance Loop : Graph = { + v : V; + e : E; + e src = v; + e tgt = v; +} + +// Diamond shape with two paths from top to bottom +instance Diamond : Graph = { + top : V; + left : V; + right : V; + bottom : V; + + top_left : E; + top_right : E; + left_bottom : E; + right_bottom : E; + + top_left src = top; + top_left tgt = left; + top_right src = top; + top_right tgt = right; + left_bottom src = left; + left_bottom tgt = bottom; + right_bottom src = right; + right_bottom tgt = bottom; +} +``` + +--- + +### Example 2: Algebraic Structures (Monoids) + +**File: `examples/geolog/monoid.geolog`** + +```geolog +// Monoid: a set with an associative binary operation and identity +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element selector + id : M -> M; + + // Left identity: id(x) * y = y + ax/left_id : forall x : M, y : M. + |- [x: x id, y: y] mul = y; + + // Right identity: x * id(y) = x + ax/right_id : forall x : M, y : M. + |- [x: x, y: y id] mul = x; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; +} + +// Trivial monoid: single element +instance Trivial : Monoid = { + e : M; + [x: e, y: e] mul = e; + e id = e; +} + +// Boolean "And" monoid +instance BoolAnd : Monoid = { + T : M; + F : M; + + T id = T; + F id = T; + + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} + +// Boolean "Or" monoid +instance BoolOr : Monoid = { + T : M; + F : M; + + T id = F; + F id = F; + + [x: T, y: T] mul = T; + [x: T, y: F] mul = T; + [x: F, y: T] mul = T; + [x: F, y: F] mul = F; +} +``` + +--- + +### Example 3: Preorders with Chase + +**File: `examples/geolog/preorder.geolog`** + +```geolog +// Preorder: reflexive and transitive relation +theory Preorder { + X : Sort; + + // The ordering relation: x ≤ y + leq : [x: X, y: X] -> Prop; + + // Reflexivity: x ≤ x + ax/refl : forall x : X. + |- [x: x, y: x] leq; + + // Transitivity: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; +} + +// Discrete preorder: only reflexive pairs +// Uses `chase` to automatically derive reflexive pairs from ax/refl. +instance Discrete3 : Preorder = chase { + a : X; + b : X; + c : X; +} + +// A total order on 3 elements: bot ≤ mid ≤ top +instance Chain3 : Preorder = chase { + bot : X; + mid : X; + top : X; + + [x: bot, y: mid] leq; + [x: mid, y: top] leq; + // Chase derives: (bot,bot), (mid,mid), (top,top) + (bot,top) +} +``` + +**REPL Session:** + +``` +geolog> :source examples/geolog/preorder.geolog +Defined theory Preorder (1 sorts, 1 relations) +Defined instance Discrete3 : Preorder (3 elements) [chase: 3 leq tuples derived] +Defined instance Chain3 : Preorder (3 elements) [chase: 6 leq tuples derived] + +geolog> :inspect Discrete3 + leq: 3 tuple(s) // (a,a), (b,b), (c,c) - reflexivity only + +geolog> :inspect Chain3 + leq: 6 tuple(s) // reflexive pairs + given + transitive (bot,top) +``` + +--- + +### Example 4: Task Management + +**File: `examples/geolog/todo_list.geolog`** + +```geolog +// TodoList: relational model for task tracking +theory TodoList { + Item : Sort; + + // Status relations (unary, simple arrow syntax) + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; + + // Dependencies (binary, with named fields) + depends : [item: Item, on: Item] -> Prop; + + // Axiom: blocked items depend on incomplete items + ax/dep_blocked : forall x : Item, y : Item. + [item: x, on: y] depends |- x blocked \/ y completed; +} + +instance SampleTodos : TodoList = { + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Unary relations: simple syntax + buy_groceries completed; + cook_dinner high_priority; + + // Binary relation: mixed positional/named syntax + // First positional arg -> 'item', named arg for 'on' + [cook_dinner, on: buy_groceries] depends; +} +``` + +--- + +### Example 5: Transitive Closure (Chase Demo) + +**File: `examples/geolog/transitive_closure.geolog`** + +```geolog +// Transitive Closure - demonstrates the chase algorithm +theory Graph { + V : Sort; + + Edge : [src: V, tgt: V] -> Prop; + Path : [src: V, tgt: V] -> Prop; + + // Base: edges are paths + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Transitivity: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// Linear chain: a -> b -> c -> d (chase runs automatically) +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} + +// Diamond: two paths from top to bottom +instance Diamond : Graph = chase { + top : V; + left : V; + right : V; + bottom : V; + + [src: top, tgt: left] Edge; + [src: top, tgt: right] Edge; + [src: left, tgt: bottom] Edge; + [src: right, tgt: bottom] Edge; +} + +// Cycle: x -> y -> z -> x (chase computes all 9 pairs!) +instance Cycle : Graph = chase { + x : V; + y : V; + z : V; + + [src: x, tgt: y] Edge; + [src: y, tgt: z] Edge; + [src: z, tgt: x] Edge; +} +``` + +**REPL Session** (chase runs during `:source`): + +``` +geolog> :source examples/geolog/transitive_closure.geolog +Defined theory Graph (1 sorts, 2 relations) +Defined instance Chain : Graph (4 elements) [chase: 6 Path tuples] +Defined instance Diamond : Graph (4 elements) [chase: 5 Path tuples] +Defined instance Cycle : Graph (3 elements) [chase: 9 Path tuples] +``` + +--- + +### Example 6: Inline Definitions + +You can define theories and instances directly in the REPL: + +``` +geolog> theory Counter { +...... C : Sort; +...... next : C -> C; +...... } +Defined theory Counter (1 sorts, 1 functions) + +geolog> instance Mod3 : Counter = { +...... zero : C; +...... one : C; +...... two : C; +...... zero next = one; +...... one next = two; +...... two next = zero; +...... } +Defined instance Mod3 : Counter (3 elements) + +geolog> :inspect Mod3 +instance Mod3 : Counter = { + // C (3): + zero : C; + one : C; + two : C; + // next: + zero next = one; + one next = two; + two next = zero; +} +``` + +--- + +## Syntax Reference + +### Sorts +``` +identifier : Sort; +``` + +### Functions +``` +// Unary function +name : Domain -> Codomain; + +// Binary function (product domain) +name : [field1: Sort1, field2: Sort2] -> Codomain; +``` + +### Relations +``` +// Unary relation +name : [field: Sort] -> Prop; + +// Binary relation +name : [x: Sort1, y: Sort2] -> Prop; +``` + +### Axioms +``` +// No premises (fact) +name : forall vars. |- conclusion; + +// With premises +name : forall vars. premise1, premise2 |- conclusion; + +// With disjunction in conclusion +name : forall vars. premise |- conclusion1 \/ conclusion2; +``` + +### Instance Elements +``` +elem_name : Sort; +``` + +### Function Values +``` +// Unary +elem func = value; + +// Product domain +[field1: val1, field2: val2] func = value; +``` + +### Relation Assertions +``` +// Unary relation +elem relation; + +// Binary relation +[field1: val1, field2: val2] relation; +``` + +--- + +## Architecture + +> TODO: greatly expand this section + +Geolog is built with several key components: + +- **Parser**: Converts `.geolog` source to AST +- **Elaborator**: Type-checks and converts AST to core representations +- **Structure**: In-memory model representation with carriers and functions +- **Chase Engine**: Fixpoint computation for derived relations +- **Query Engine**: Relational algebra for querying instances +- **Store**: Persistent, append-only storage with version control + +--- + +## License + +MIT License - see LICENSE file for details. + +--- + +## Contributing + +Contributions welcome! See CLAUDE.md for development guidelines and the `loose_thoughts/` directory for design discussions. diff --git a/architecture.dot b/architecture.dot new file mode 100644 index 0000000..401f926 --- /dev/null +++ b/architecture.dot @@ -0,0 +1,227 @@ +digraph GeologArchitecture { + rankdir=TB; + compound=true; + fontname="Helvetica"; + node [fontname="Helvetica", shape=box, style="rounded,filled", fillcolor="#f0f0f0"]; + edge [fontname="Helvetica"]; + + label="Geolog Architecture"; + labelloc="t"; + fontsize=24; + + // User Interface Layer + subgraph cluster_ui { + label="User Interface"; + style="rounded,filled"; + fillcolor="#e3f2fd"; + + cli [label="CLI\n(bin/geolog.rs)", fillcolor="#bbdefb"]; + repl [label="REPL\n(repl.rs)", fillcolor="#bbdefb"]; + batch [label="Batch Loading\n(.geolog files)", fillcolor="#bbdefb"]; + } + + // Parsing Layer + subgraph cluster_parsing { + label="Parsing Layer"; + style="rounded,filled"; + fillcolor="#e8f5e9"; + + lexer [label="Lexer\n(lexer.rs)", fillcolor="#c8e6c9"]; + parser [label="Parser\n(parser.rs)", fillcolor="#c8e6c9"]; + ast [label="AST\n(ast.rs)", fillcolor="#c8e6c9"]; + error [label="Error Reporting\n(error.rs)\nariadne", fillcolor="#c8e6c9"]; + pretty [label="Pretty Printer\n(pretty.rs)", fillcolor="#c8e6c9"]; + } + + // Elaboration Layer + subgraph cluster_elaboration { + label="Elaboration Layer"; + style="rounded,filled"; + fillcolor="#fff3e0"; + + elab_theory [label="Theory Elaboration\n(elaborate/theory.rs)", fillcolor="#ffe0b2"]; + elab_instance [label="Instance Elaboration\n(elaborate/instance.rs)", fillcolor="#ffe0b2"]; + elab_env [label="Environment\n(elaborate/env.rs)", fillcolor="#ffe0b2"]; + elab_types [label="Type Evaluation\n(elaborate/types.rs)", fillcolor="#ffe0b2"]; + elab_error [label="Type Errors\n(elaborate/error.rs)", fillcolor="#ffe0b2"]; + } + + // Core Layer + subgraph cluster_core { + label="Core Layer"; + style="rounded,filled"; + fillcolor="#fce4ec"; + + core [label="Core IR\n(core.rs)\nSignature, Term,\nFormula, Structure", fillcolor="#f8bbd9"]; + id [label="Identity System\n(id.rs)\nLuid, Slid", fillcolor="#f8bbd9"]; + universe [label="Universe\n(universe.rs)\nUUID <-> Luid", fillcolor="#f8bbd9"]; + naming [label="Naming\n(naming.rs)\nName <-> Luid", fillcolor="#f8bbd9"]; + cc [label="Congruence Closure\n(cc.rs)\nUnion-Find", fillcolor="#f8bbd9"]; + } + + // Storage Layer + subgraph cluster_storage { + label="Storage Layer"; + style="rounded,filled"; + fillcolor="#e1f5fe"; + + store [label="Store\n(store/mod.rs)", fillcolor="#b3e5fc"]; + store_schema [label="Schema Cache\n(store/schema.rs)", fillcolor="#b3e5fc"]; + store_append [label="Append Operations\n(store/append.rs)", fillcolor="#b3e5fc"]; + store_theory [label="Theory CRUD\n(store/theory.rs)", fillcolor="#b3e5fc"]; + store_instance [label="Instance CRUD\n(store/instance.rs)", fillcolor="#b3e5fc"]; + store_commit [label="Version Control\n(store/commit.rs)", fillcolor="#b3e5fc"]; + store_materialize [label="Materialized Views\n(store/materialize.rs)", fillcolor="#b3e5fc"]; + geologmeta [label="GeologMeta\n(Homoiconic Store)", fillcolor="#81d4fa", style="rounded,filled,bold"]; + } + + // Query Layer + subgraph cluster_query { + label="Query & Compilation Layer"; + style="rounded,filled"; + fillcolor="#f3e5f5"; + + query_compile [label="Query Compiler\n(query/compile.rs)", fillcolor="#e1bee7"]; + query_relalg [label="Relational Algebra IR\n(query/to_relalg.rs)\n(query/from_relalg.rs)", fillcolor="#e1bee7"]; + query_chase [label="Chase Algorithm\n(query/chase.rs)\nFixpoint + CC", fillcolor="#ce93d8", style="rounded,filled,bold"]; + query_backend [label="Query Backend\n(query/backend.rs)", fillcolor="#e1bee7"]; + query_optimize [label="Optimizer\n(query/optimize.rs)", fillcolor="#e1bee7"]; + } + + // Solver Layer + subgraph cluster_solver { + label="Solver Layer"; + style="rounded,filled"; + fillcolor="#e0f2f1"; + + solver [label="Model Enumeration\n(solver/mod.rs)", fillcolor="#b2dfdb"]; + solver_tree [label="Search Tree\n(solver/tree.rs)", fillcolor="#b2dfdb"]; + solver_tactics [label="Tactics\n(solver/tactics.rs)\nCheck, Forward,\nPropagate, Auto", fillcolor="#80cbc4", style="rounded,filled,bold"]; + solver_types [label="Solver Types\n(solver/types.rs)", fillcolor="#b2dfdb"]; + } + + // Tensor Layer + subgraph cluster_tensor { + label="Tensor Algebra Layer"; + style="rounded,filled"; + fillcolor="#fff8e1"; + + tensor_expr [label="Tensor Expressions\n(tensor/expr.rs)", fillcolor="#ffecb3"]; + tensor_sparse [label="Sparse Storage\n(tensor/sparse.rs)\nRoaringBitmap", fillcolor="#ffe082", style="rounded,filled,bold"]; + tensor_builder [label="Expression Builder\n(tensor/builder.rs)", fillcolor="#ffecb3"]; + tensor_compile [label="Formula Compiler\n(tensor/compile.rs)", fillcolor="#ffecb3"]; + tensor_check [label="Axiom Checker\n(tensor/check.rs)", fillcolor="#ffecb3"]; + } + + // External Dependencies (simplified) + subgraph cluster_deps { + label="Key Dependencies"; + style="rounded,dashed"; + fillcolor="#fafafa"; + + chumsky [label="chumsky\n(parser combinators)", shape=ellipse, fillcolor="#e0e0e0"]; + rkyv [label="rkyv\n(zero-copy serde)", shape=ellipse, fillcolor="#e0e0e0"]; + roaring [label="roaring\n(bitmaps)", shape=ellipse, fillcolor="#e0e0e0"]; + unionfind [label="egglog-union-find", shape=ellipse, fillcolor="#e0e0e0"]; + } + + // Data Flow Edges + + // UI to Parsing + cli -> repl; + batch -> repl; + repl -> lexer [lhead=cluster_parsing]; + + // Parsing flow + lexer -> parser; + parser -> ast; + ast -> error [style=dashed, label="errors"]; + ast -> pretty [style=dashed, label="roundtrip"]; + + // Parsing to Elaboration + ast -> elab_theory; + ast -> elab_instance; + + // Elaboration internal + elab_theory -> elab_env; + elab_instance -> elab_env; + elab_env -> elab_types; + elab_types -> elab_error [style=dashed]; + + // Elaboration to Core + elab_theory -> core; + elab_instance -> core; + + // Core internal + core -> id; + id -> universe; + id -> naming; + core -> cc; + + // Core to Storage + core -> store [lhead=cluster_storage]; + + // Storage internal + store -> store_schema; + store -> store_append; + store -> store_theory; + store -> store_instance; + store -> store_commit; + store -> store_materialize; + store_append -> geologmeta; + store_theory -> geologmeta; + store_instance -> geologmeta; + store_commit -> geologmeta; + store_materialize -> geologmeta; + + // Query layer connections + repl -> query_compile [label="queries"]; + query_compile -> query_relalg; + query_relalg -> query_optimize; + query_optimize -> query_backend; + query_backend -> store [label="execute"]; + + // Chase + repl -> query_chase [label=":chase"]; + query_chase -> cc [label="equality\nsaturation"]; + query_chase -> store; + query_chase -> tensor_check [label="axiom\nchecking"]; + + // Solver connections + repl -> solver [label=":solve\n:query"]; + solver -> solver_tree; + solver_tree -> solver_tactics; + solver_tactics -> solver_types; + solver_tactics -> query_chase [label="forward\nchaining"]; + solver_tactics -> cc [label="propagate\nequations"]; + solver_tactics -> tensor_check [label="check\naxioms"]; + solver -> store; + + // Tensor internal + tensor_compile -> tensor_expr; + tensor_expr -> tensor_builder; + tensor_builder -> tensor_sparse; + tensor_check -> tensor_compile; + tensor_sparse -> core [label="read\nstructure"]; + + // Dependencies + lexer -> chumsky [style=dotted]; + parser -> chumsky [style=dotted]; + store -> rkyv [style=dotted]; + tensor_sparse -> roaring [style=dotted]; + cc -> unionfind [style=dotted]; + + // Legend + subgraph cluster_legend { + label="Legend"; + style="rounded"; + fillcolor="white"; + + legend_data [label="Data Flow", shape=plaintext]; + legend_dep [label="Dependency", shape=plaintext]; + legend_key [label="Key Component", fillcolor="#80cbc4", style="rounded,filled,bold"]; + + legend_data -> legend_dep [style=invis]; + legend_dep -> legend_key [style=invis]; + } +} diff --git a/architecture.svg b/architecture.svg new file mode 100644 index 0000000..8179b3b --- /dev/null +++ b/architecture.svg @@ -0,0 +1,770 @@ + + + + + + +GeologArchitecture + +Geolog Architecture + +cluster_ui + +User Interface + + +cluster_parsing + +Parsing Layer + + +cluster_elaboration + +Elaboration Layer + + +cluster_core + +Core Layer + + +cluster_storage + +Storage Layer + + +cluster_query + +Query & Compilation Layer + + +cluster_solver + +Solver Layer + + +cluster_tensor + +Tensor Algebra Layer + + +cluster_deps + +Key Dependencies + + +cluster_legend + +Legend + + + +cli + +CLI +(bin/geolog.rs) + + + +repl + +REPL +(repl.rs) + + + +cli->repl + + + + + +lexer + +Lexer +(lexer.rs) + + + +repl->lexer + + + + + +query_compile + +Query Compiler +(query/compile.rs) + + + +repl->query_compile + + +queries + + + +query_chase + +Chase Algorithm +(query/chase.rs) +Fixpoint + CC + + + +repl->query_chase + + +:chase + + + +solver + +Model Enumeration +(solver/mod.rs) + + + +repl->solver + + +:solve +:query + + + +batch + +Batch Loading +(.geolog files) + + + +batch->repl + + + + + +parser + +Parser +(parser.rs) + + + +lexer->parser + + + + + +chumsky + +chumsky +(parser combinators) + + + +lexer->chumsky + + + + + +ast + +AST +(ast.rs) + + + +parser->ast + + + + + +parser->chumsky + + + + + +error + +Error Reporting +(error.rs) +ariadne + + + +ast->error + + +errors + + + +pretty + +Pretty Printer +(pretty.rs) + + + +ast->pretty + + +roundtrip + + + +elab_theory + +Theory Elaboration +(elaborate/theory.rs) + + + +ast->elab_theory + + + + + +elab_instance + +Instance Elaboration +(elaborate/instance.rs) + + + +ast->elab_instance + + + + + +elab_env + +Environment +(elaborate/env.rs) + + + +elab_theory->elab_env + + + + + +core + +Core IR +(core.rs) +Signature, Term, +Formula, Structure + + + +elab_theory->core + + + + + +elab_instance->elab_env + + + + + +elab_instance->core + + + + + +elab_types + +Type Evaluation +(elaborate/types.rs) + + + +elab_env->elab_types + + + + + +elab_error + +Type Errors +(elaborate/error.rs) + + + +elab_types->elab_error + + + + + +id + +Identity System +(id.rs) +Luid, Slid + + + +core->id + + + + + +cc + +Congruence Closure +(cc.rs) +Union-Find + + + +core->cc + + + + + +store + +Store +(store/mod.rs) + + + +core->store + + + + + +universe + +Universe +(universe.rs) +UUID <-> Luid + + + +id->universe + + + + + +naming + +Naming +(naming.rs) +Name <-> Luid + + + +id->naming + + + + + +unionfind + +egglog-union-find + + + +cc->unionfind + + + + + +store_schema + +Schema Cache +(store/schema.rs) + + + +store->store_schema + + + + + +store_append + +Append Operations +(store/append.rs) + + + +store->store_append + + + + + +store_theory + +Theory CRUD +(store/theory.rs) + + + +store->store_theory + + + + + +store_instance + +Instance CRUD +(store/instance.rs) + + + +store->store_instance + + + + + +store_commit + +Version Control +(store/commit.rs) + + + +store->store_commit + + + + + +store_materialize + +Materialized Views +(store/materialize.rs) + + + +store->store_materialize + + + + + +rkyv + +rkyv +(zero-copy serde) + + + +store->rkyv + + + + + +geologmeta + +GeologMeta +(Homoiconic Store) + + + +store_append->geologmeta + + + + + +store_theory->geologmeta + + + + + +store_instance->geologmeta + + + + + +store_commit->geologmeta + + + + + +store_materialize->geologmeta + + + + + +query_relalg + +Relational Algebra IR +(query/to_relalg.rs) +(query/from_relalg.rs) + + + +query_compile->query_relalg + + + + + +query_optimize + +Optimizer +(query/optimize.rs) + + + +query_relalg->query_optimize + + + + + +query_chase->cc + + +equality +saturation + + + +query_chase->store + + + + + +tensor_check + +Axiom Checker +(tensor/check.rs) + + + +query_chase->tensor_check + + +axiom +checking + + + +query_backend + +Query Backend +(query/backend.rs) + + + +query_backend->store + + +execute + + + +query_optimize->query_backend + + + + + +solver->store + + + + + +solver_tree + +Search Tree +(solver/tree.rs) + + + +solver->solver_tree + + + + + +solver_tactics + +Tactics +(solver/tactics.rs) +Check, Forward, +Propagate, Auto + + + +solver_tree->solver_tactics + + + + + +solver_tactics->cc + + +propagate +equations + + + +solver_tactics->query_chase + + +forward +chaining + + + +solver_types + +Solver Types +(solver/types.rs) + + + +solver_tactics->solver_types + + + + + +solver_tactics->tensor_check + + +check +axioms + + + +tensor_expr + +Tensor Expressions +(tensor/expr.rs) + + + +tensor_builder + +Expression Builder +(tensor/builder.rs) + + + +tensor_expr->tensor_builder + + + + + +tensor_sparse + +Sparse Storage +(tensor/sparse.rs) +RoaringBitmap + + + +tensor_sparse->core + + +read +structure + + + +roaring + +roaring +(bitmaps) + + + +tensor_sparse->roaring + + + + + +tensor_builder->tensor_sparse + + + + + +tensor_compile + +Formula Compiler +(tensor/compile.rs) + + + +tensor_compile->tensor_expr + + + + + +tensor_check->tensor_compile + + + + + +legend_data + +Data Flow + + + +legend_dep + +Dependency + + + + +legend_key + +Key Component + + + + diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..e9884a7 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,255 @@ +# Geolog Architecture + +Geolog is a language for geometric logic with semantics in topoi. This document describes the module structure and data flow. + +## Module Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ repl.rs Interactive REPL with commands (:help, :inspect, etc.) │ +│ bin/geolog.rs CLI entry point │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ PARSING / SURFACE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ lexer.rs Tokenization (chumsky-based) │ +│ parser.rs Token stream → AST (chumsky-based) │ +│ ast.rs Surface syntax AST types │ +│ pretty.rs Core → geolog source (inverse of parsing) │ +│ error.rs Error formatting with source spans │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ ELABORATION LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ elaborate/ │ +│ ├── mod.rs Re-exports │ +│ ├── env.rs Elaboration environment (theory registry) │ +│ ├── theory.rs AST theory → Core theory elaboration │ +│ ├── instance.rs AST instance → Core structure elaboration │ +│ └── error.rs Elaboration error types │ +│ │ +│ Transforms surface AST into typed core representation │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ CORE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ core.rs Core IR: Signature, Term, Formula, Structure │ +│ - Signature: sorts + functions + relations │ +│ - Term: Var | App | Record | Project │ +│ - Formula: True | False | Eq | Rel | Conj | Disj | Exists │ +│ - Structure: carriers + function maps + relation storage │ +│ │ +│ id.rs Identity system (Luid = global, Slid = structure-local) │ +│ universe.rs Global element registry (Luid allocation) │ +│ naming.rs Bidirectional name ↔ Luid mapping │ +└───────────────────────────────┬─────────────────────────────────────────────┘ + │ +┌───────────────────────────────▼─────────────────────────────────────────────┐ +│ STORAGE LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ store/ │ +│ ├── mod.rs Store struct: unified GeologMeta persistence │ +│ ├── schema.rs Schema ID caches (sort_ids, func_ids, etc.) │ +│ ├── append.rs Append-only element/function/relation creation │ +│ ├── theory.rs Theory → Store integration │ +│ ├── instance.rs Instance → Store integration │ +│ ├── commit.rs Git-like commit/version control │ +│ └── bootstrap_queries.rs Hardcoded query patterns (being replaced) │ +│ │ +│ workspace.rs Legacy session management (deprecated, use Store) │ +│ patch.rs Patch-based structure modifications │ +│ version.rs Git-like version control for structures │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ QUERY LAYER │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ query/ │ +│ ├── mod.rs Re-exports and overview │ +│ ├── chase.rs Chase algorithm for existential/equality conclusions │ +│ │ - chase_fixpoint_with_cc(): main entry point │ +│ │ - Integrates CongruenceClosure for equality saturation│ +│ ├── compile.rs Query → QueryOp plan compilation │ +│ ├── backend.rs Naive QueryOp executor (reference impl) │ +│ ├── optimize.rs Algebraic law rewriting (filter fusion, etc.) │ +│ ├── pattern.rs Legacy Pattern API (deprecated) │ +│ └── store_queries.rs Store-level compiled query methods │ +│ │ +│ Relational query engine for GeologMeta and instance queries. │ +│ Query API: Query::scan(sort).filter_eq(func, col, val).compile() │ +│ Optimizer applies RelAlgIR laws: Filter(p, Filter(q, x)) → Filter(p∧q, x) │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ SOLVING LAYER (frontier) │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ cc.rs Congruence closure (shared by solver + chase) │ +│ - Element equivalence tracking with union-find │ +│ - Used for equality conclusion axioms │ +│ │ +│ solver/ │ +│ ├── mod.rs Unified model enumeration API + re-exports │ +│ │ - enumerate_models(): core unified function │ +│ │ - solve(): find models from scratch │ +│ │ - query(): extend existing models │ +│ ├── types.rs SearchNode, Obligation, NodeStatus (re-exports cc::*) │ +│ ├── tree.rs Explicit search tree with from_base() for extensions │ +│ └── tactics.rs Automated search tactics: │ +│ - CheckTactic: axiom checking, obligation reporting │ +│ - ForwardChainingTactic: Datalog-style forward chaining │ +│ - PropagateEquationsTactic: congruence closure propagation│ +│ - AutoTactic: composite fixpoint solver │ +│ │ +│ REPL commands: `:solve `, `:extend ` │ +│ See examples/geolog/solver_demo.geolog for annotated examples. │ +│ │ +│ tensor/ │ +│ ├── mod.rs Re-exports │ +│ ├── expr.rs Lazy tensor expression trees │ +│ ├── sparse.rs Sparse tensor storage (RoaringTreemap) │ +│ ├── builder.rs Expression builders (conjunction, disjunction, exists) │ +│ ├── compile.rs Formula → TensorExpr compilation │ +│ └── check.rs Axiom checking via tensor evaluation │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ META LAYER (self-description) │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ meta.rs Rust codegen for GeologMeta theory │ +│ theories/GeologMeta.geolog Homoiconic theory representation │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Data Flow + +### Parsing / Pretty-Printing Flow +``` +Source text → lexer.rs → Token stream → parser.rs → ast::File + ↓ +core::Structure ← elaborate ←──────────────────────── ast::* + ↓ +pretty.rs → Source text (roundtrip!) +``` + +### Elaboration Flow +``` +ast::TheoryDecl → elaborate/theory.rs → core::Theory (Signature + Axioms) +ast::InstanceDecl → elaborate/instance.rs → core::Structure +``` + +### REPL Flow +``` +User input → ReplState::process_line → MetaCommand | GeologInput + ↓ + GeologInput → parse → elaborate → workspace.add_* +``` + +## Key Types + +### Identity System + +```rust +Luid // "Local Universe ID" - globally unique across all structures +Slid // "Structure-Local ID" - index within a single structure + +// A Structure maps Slid → Luid for global identity +structure.get_luid(slid) -> Luid +``` + +### Core Representation + +```rust +// Signatures define the vocabulary +Signature { + sorts: Vec, // Sort names by SortId + functions: Vec, // f : A → B + relations: Vec, // R : A → Prop +} + +// Structures interpret signatures +Structure { + carriers: Vec, // Elements per sort (as Slid) + functions: Vec>, // Function value maps + relations: Vec, // Relation extents + local_to_global: Vec, // Slid → Luid +} +``` + +### Axioms (Sequents) + +```rust +Sequent { + context: Context, // Universally quantified variables + premise: Formula, // Antecedent (conjunction of atomics) + conclusion: Formula, // Consequent (positive geometric formula) +} +``` + +## Design Principles + +1. **Postfix application**: `x f` not `f(x)` — matches categorical composition +2. **Child pointers**: Parent → Child, not Child → Parent (no products in domains) +3. **Upward binding**: Variables point to their binders (scoping is explicit) +4. **Sparse storage**: Relations use RoaringBitmap for efficient membership +5. **Patch-based updates**: Structures evolve via patches, enabling versioning +6. **Explicit search tree**: Solver maintains tree in memory, not call stack + +## Testing Strategy + +- **proptest**: Property-based tests for core operations (naming, patches, structure) +- **unit tests**: Specific behaviors in `tests/unit_*.rs` +- **integration tests**: Example .geolog files in `tests/examples_integration.rs` +- **REPL testing**: Interactive exploration via `cargo run` + +## Future Directions + +See `bd ready` for current work items. Key frontiers: + +- **Query engine** (`geolog-7tt`, `geolog-32x`): Chase algorithm and RelAlgIR compiler +- **Nested instance elaboration** (`geolog-1d4`): Inline instance definitions +- **Monotonic Submodel proofs** (`geolog-rgg`): Lean4 formalization +- **Disjunction variable alignment** (`geolog-69b`): Extend tensor builder for heterogeneous disjuncts + +## Recent Milestones + +- **Unified model enumeration API** (`2026-01-19`): Consolidated `solve()`, `extend()`, and `query()` + into single `enumerate_models()` function. REPL commands `:solve` and `:extend` now share underlying implementation. + +- **Tensor compiler improvements** (`2026-01-20`): + - Function application equalities: `f(x) = y`, `y = f(x)`, `f(x) = g(y)` now compile correctly + - Empty-domain existential fix: `∃x. φ` on empty domain correctly returns false + - Closed `geolog-dxr` (tensor compilation panics on function terms) + +- **Bootstrap query migration** (`2026-01-20`): All 6 bootstrap_queries functions now delegate + to compiled query engine (`store_queries.rs`). Net reduction of ~144 lines of handcoded iteration. + +- **Proptest coverage** (`2026-01-20`): Added 6 solver proptests covering trivial theories, + inconsistent theories, existential theories, and Horn clause propagation. + +- **Theory extends fix** (`2026-01-20`): Fixed bug where function names like `Func/dom` (using `/` + as naming convention) were incorrectly treated as grandparent-qualified names. RelAlgIR.geolog + now loads correctly, unblocking homoiconic query plan work (`geolog-32x`). + +- **:explain REPL command** (`2026-01-20`): Added `:explain ` to show query + execution plans, with Display impl for QueryOp using math notation (∫, δ, z⁻¹, ×, ∧, ∨). + +- **Geometric logic solver complete** (`geolog-xj2`): Forward chaining, equation propagation, + existential body processing, derivation search for False. Interactive via `:solve`. + +- **Chase with equality saturation** (`2026-01-21`): Chase algorithm now integrates congruence + closure (CC) for handling equality conclusion axioms like `R(x,y) |- x = y`. CC tracks + element equivalences and canonicalizes structures after chase converges. This enables + Category theory to terminate correctly: unit law axioms collapse infinite `id;id;...` + compositions. Added `src/cc.rs` as shared module for both solver and chase. + +- **Chase proptests** (`2026-01-21`): Added property-based tests for reflexivity, transitivity, + existential conclusions, and equality conclusions. Multi-session persistence tests verify + chase results survive REPL restart. + +- **Fuzzing infrastructure** (`2026-01-21`): Added `fuzz/` directory with `fuzz_parser` and + `fuzz_repl` targets for finding edge cases. Requires nightly Rust. diff --git a/docs/SYNTAX.md b/docs/SYNTAX.md new file mode 100644 index 0000000..4d6ff18 --- /dev/null +++ b/docs/SYNTAX.md @@ -0,0 +1,336 @@ +# Geolog Surface Syntax Reference + +This document describes the surface syntax of Geolog. For examples, see `examples/geolog/`. + +## Lexical Elements + +### Identifiers +``` +identifier := [a-zA-Z_][a-zA-Z0-9_]* +``` + +### Paths +Paths use `/` as a separator (not `.`), which allows `.` for field projection: +``` +path := identifier ('/' identifier)* +``` +Examples: `P`, `in/src`, `ax/refl` + +### Keywords +``` +namespace theory instance query +Sort Prop forall exists +``` + +### Operators and Punctuation +``` +: -> = |- \/ . , ; +{ } [ ] ( ) +``` + +## Declarations + +A Geolog file consists of declarations: + +``` +file := declaration* +declaration := namespace | theory | instance | query +``` + +### Namespace +``` +namespace identifier; +``` +Currently a no-op; reserved for future module system. + +### Theory + +```ebnf +theory := 'theory' params? identifier '{' theory_item* '}' +params := param_group+ +param_group := '(' param (',' param)* ')' +param := identifier ':' type_expr + +theory_item := sort_decl | function_decl | axiom_decl | field_decl +``` + +#### Sort Declaration +``` +identifier ':' 'Sort' ';' +``` +Example: `P : Sort;` + +#### Function Declaration +``` +path ':' type_expr '->' type_expr ';' +``` +Examples: +``` +src : E -> V; // Unary function +mul : [x: M, y: M] -> M; // Binary function (product domain) +``` + +#### Relation Declaration +Relations are functions to `Prop`: +``` +path ':' type_expr '->' 'Prop' ';' +``` +Example: +``` +leq : [x: X, y: X] -> Prop; // Binary relation +``` + +#### Axiom Declaration +``` +path ':' 'forall' quantified_vars '.' premises '|-' conclusion ';' + +quantified_vars := (var_group (',' var_group)*)? // May be empty! +var_group := identifier (',' identifier)* ':' type_expr +premises := formula (',' formula)* // May be empty +``` + +Examples: +``` +// No premises (Horn clause with empty body) +ax/refl : forall x : X. |- [x: x, y: x] leq; + +// With premises +ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; + +// Empty quantifier - unconditional axiom +// Useful for asserting existence without preconditions +ax/nonempty : forall . |- exists x : X.; +``` + +### Instance + +```ebnf +instance := 'instance' identifier ':' type_expr '=' instance_body +instance_body := '{' instance_item* '}' | 'chase' '{' instance_item* '}' + +instance_item := element_decl | equation | nested_instance +``` + +Using `= chase { ... }` runs the chase algorithm during elaboration, automatically deriving facts from axioms. + +The chase supports: +- **Existential conclusions**: Creates fresh elements for `∃` in axiom conclusions +- **Equality conclusions**: Uses congruence closure to track element equivalences +- **Fixpoint iteration**: Runs until no new facts can be derived + +Equality saturation enables termination for theories with unit laws (like Categories) that would otherwise loop forever. + +#### Element Declaration +``` +identifier ':' type_expr ';' +``` +Example: `A : V;` — declares element `A` of sort `V` + +#### Equation +``` +term '=' term ';' +``` +Example: `ab src = A;` — asserts that applying `src` to `ab` yields `A` + +#### Nested Instance (syntax parsed but not fully elaborated) +``` +identifier '=' '{' instance_item* '}' ';' +``` + +## Type Expressions + +```ebnf +type_expr := 'Sort' | 'Prop' | path | record_type | app_type | arrow_type | instance_type + +record_type := '[' (field (',' field)*)? ']' +field := identifier ':' type_expr // Named field + | type_expr // Positional: gets name "0", "1", etc. + +app_type := type_expr type_expr // Juxtaposition +arrow_type := type_expr '->' type_expr +instance_type := type_expr 'instance' +``` + +Examples: +``` +Sort // The universe of sorts +Prop // Propositions +V // A named sort +[x: M, y: M] // Product type with named fields +[M, M] // Product type with positional fields ("0", "1") +[M, on: M] // Mixed: first positional, second named +M -> M // Function type +PetriNet instance // Instance of a theory +N PetriNet instance // Parameterized: N is a PetriNet instance +``` + +## Terms + +```ebnf +term := path | record | paren_term | application | projection + +record := '[' (entry (',' entry)*)? ']' +entry := identifier ':' term // Named entry + | term // Positional: gets name "0", "1", etc. + +paren_term := '(' term ')' +application := term term // Postfix! 'x f' means 'f(x)' +projection := term '.' identifier // Record projection +``` + +**Important**: Geolog uses **postfix** function application. + +| Geolog | Traditional | +|--------|-------------| +| `x f` | `f(x)` | +| `[x: a, y: b] mul` | `mul(a, b)` | +| `x f g` | `g(f(x))` | + +This matches categorical composition: morphisms compose left-to-right. + +Examples: +``` +A // Variable/element reference +ab src // Apply src to ab +[x: a, y: b] mul // Apply mul to record (named fields) +[a, b] mul // Apply mul to record (positional) +[a, on: b] rel // Mixed: positional first, named second +x f g // Composition: g(f(x)) +r .field // Project field from record r +``` + +**Note on positional fields**: Positional fields are assigned names "0", "1", etc. +When matching against a relation defined with named fields (e.g., `rel : [x: M, y: M] -> Prop`), +positional fields are matched by position: "0" matches the first field, "1" the second, etc. +This allows mixing positional and named syntax: `[a, y: b] rel` is equivalent to `[x: a, y: b] rel`. + +## Formulas + +```ebnf +formula := atomic | exists | disjunction | paren_formula + +atomic := equality | relation_app +equality := term '=' term +relation_app := term identifier // 'x R' means R(x) + +exists := 'exists' quantified_vars '.' formulas? // Body may be empty (= True) +formulas := formula (',' formula)* +disjunction := formula ('\/' formula)+ +paren_formula := '(' formula ')' +``` + +**Conjunction** is implicit: premises in axioms separated by `,` form a conjunction. + +Examples: +``` +x = y // Equality +[x: a, y: b] leq // Relation application +exists z : X. [x: x, y: z] leq // Existential with condition +exists z : X. // Existential with empty body (= exists z. True) +phi \/ psi // Disjunction +``` + +## Comments + +Line comments start with `//`: +``` +// This is a comment +P : Sort; // Inline comment +``` + +## Complete Example + +```geolog +// Directed graph theory +theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; +} + +// A triangle: A → B → C → A +instance Triangle : Graph = { + A : V; + B : V; + C : V; + + ab : E; + ab src = A; + ab tgt = B; + + bc : E; + bc src = B; + bc tgt = C; + + ca : E; + ca src = C; + ca tgt = A; +} +``` + +## Grammar Summary (EBNF) + +```ebnf +file := declaration* + +declaration := 'namespace' ident ';' + | 'theory' params? ident '{' theory_item* '}' + | 'instance' ident ':' type '=' '{' instance_item* '}' + | 'query' ident ':' type '=' formula + +params := ('(' param (',' param)* ')')+ +param := ident ':' type + +theory_item := ident ':' 'Sort' ';' + | path ':' type '->' type ';' + | path ':' 'forall' qvars '.' formulas '|-' formula ';' + +qvars := (ident (',' ident)* ':' type) (',' ...)* +formulas := formula (',' formula)* + +instance_item := ident ':' type ';' + | term '=' term ';' + | ident '=' '{' instance_item* '}' ';' + +type := 'Sort' | 'Prop' | path | '[' fields ']' | type type | type '->' type | type 'instance' +fields := (ident ':' type) (',' ...)* + +term := path | '[' entries ']' | '(' term ')' | term term | term '.' ident +entries := (ident ':' term) (',' ...)* + +formula := term '=' term | term ident | 'exists' qvars '.' formula | formula '\/' formula | '(' formula ')' + +path := ident ('/' ident)* +ident := [a-zA-Z_][a-zA-Z0-9_]* +``` + +## Example Files + +The `examples/geolog/` directory contains working examples: + +| File | Description | +|------|-------------| +| `graph.geolog` | Simple directed graph theory with vertices and edges | +| `preorder.geolog` | Preorder (reflexive, transitive relation) with discrete/chain instances | +| `transitive_closure.geolog` | **Demonstrates chase algorithm** - computes reachability | +| `monoid.geolog` | Algebraic monoid theory with associativity axiom | +| `petri_net.geolog` | Petri net formalization with places, transitions, marking | +| `petri_net_showcase.geolog` | **Full showcase** - parameterized theories, nested instances, cross-references | +| `todo_list.geolog` | Task management example with dependencies | +| `solver_demo.geolog` | Solver demonstration with reachability queries | +| `relalg_simple.geolog` | Simple RelAlgIR query plan examples | + +### Running Examples + +```bash +# Start REPL with an example +cargo run -- examples/geolog/graph.geolog + +# Or load interactively +cargo run +:source examples/geolog/transitive_closure.geolog +:inspect Chain +:chase Chain # Computes transitive closure! +``` diff --git a/examples/elaborate.rs b/examples/elaborate.rs new file mode 100644 index 0000000..58e85a5 --- /dev/null +++ b/examples/elaborate.rs @@ -0,0 +1,168 @@ +use geolog::universe::Universe; +use geolog::{ + elaborate::{ElaborationContext, Env, elaborate_instance_ctx, elaborate_theory}, + parse, + repl::InstanceEntry, +}; +use std::collections::HashMap; +use std::rc::Rc; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} +"#; + + println!("=== PARSING ==="); + let file = match parse(input) { + Ok(f) => f, + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + }; + println!("Parsed {} declarations\n", file.declarations.len()); + + println!("=== ELABORATING ==="); + let mut env = Env::new(); + let mut universe = Universe::new(); + + for decl in &file.declarations { + match &decl.node { + geolog::Declaration::Namespace(name) => { + println!("Skipping namespace: {}", name); + } + geolog::Declaration::Theory(t) => { + print!("Elaborating theory {}... ", t.name); + match elaborate_theory(&mut env, t) { + Ok(elab) => { + println!("OK!"); + println!( + " Params: {:?}", + elab.params.iter().map(|p| &p.name).collect::>() + ); + println!(" Sorts: {:?}", elab.theory.signature.sorts); + println!( + " Functions: {:?}", + elab.theory + .signature + .functions + .iter() + .map(|f| &f.name) + .collect::>() + ); + println!(" Axioms: {}", elab.theory.axioms.len()); + for (i, ax) in elab.theory.axioms.iter().enumerate() { + println!( + " [{i}] {} vars, premise -> conclusion", + ax.context.vars.len() + ); + } + println!(); + + // Add to environment for dependent theories + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + Err(e) => { + println!("FAILED: {}", e); + } + } + } + geolog::Declaration::Instance(i) => { + // Extract theory name from the type expression + let theory_name = i.theory.as_single_path() + .and_then(|p| p.segments.first().cloned()) + .unwrap_or_else(|| "?".to_string()); + print!("Elaborating instance {}... ", i.name); + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + match elaborate_instance_ctx(&mut ctx, i) { + Ok(result) => { + let structure = &result.structure; + println!("OK!"); + println!(" Theory: {}", theory_name); + println!(" Elements: {} total", structure.len()); + for sort_id in 0..structure.carriers.len() { + println!( + " Sort {}: {} elements", + sort_id, + structure.carrier_size(sort_id) + ); + } + println!(" Functions defined:"); + for (fid, func_map) in structure.functions.iter().enumerate() { + println!(" Func {}: {} mappings", fid, func_map.len()); + } + println!(); + } + Err(e) => { + println!("FAILED: {}", e); + } + } + } + geolog::Declaration::Query(_) => { + println!("Skipping query (not implemented yet)"); + } + } + } + + println!("=== SUMMARY ==="); + println!("Elaborated {} theories", env.theories.len()); +} diff --git a/examples/full_petri.rs b/examples/full_petri.rs new file mode 100644 index 0000000..4cb1329 --- /dev/null +++ b/examples/full_petri.rs @@ -0,0 +1,132 @@ +use geolog::parse; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +// Reachability problem: can we get from A to B? +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} + +query findTrace { + ? : ExampleNet Trace instance; +} +"#; + + match parse(input) { + Ok(file) => { + println!("Parsed successfully!"); + println!("Declarations: {}", file.declarations.len()); + for decl in &file.declarations { + match &decl.node { + geolog::Declaration::Namespace(n) => println!(" - namespace {}", n), + geolog::Declaration::Theory(t) => { + println!(" - theory {} ({} items)", t.name, t.body.len()) + } + geolog::Declaration::Instance(i) => { + println!(" - instance {} ({} items)", i.name, i.body.len()) + } + geolog::Declaration::Query(q) => println!(" - query {}", q.name), + } + } + } + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + } +} diff --git a/examples/geolog/category.geolog b/examples/geolog/category.geolog new file mode 100644 index 0000000..f480a1f --- /dev/null +++ b/examples/geolog/category.geolog @@ -0,0 +1,86 @@ +// Category theory in current geolog syntax +// +// This is the "desugared" version of the aspirational syntax in +// loose_thoughts/2026-01-21_dependent_sorts_and_functional_relations.md + +theory Category { + ob : Sort; + mor : Sort; + + // Morphism source and target + src : mor -> ob; + tgt : mor -> ob; + + // Composition: comp(f, g, h) means "h = f ; g" (f then g) + // Domain constraint: f.tgt = g.src + comp : [f: mor, g: mor, h: mor] -> Prop; + + // Identity: id(a, f) means "f is the identity on a" + id : [a: ob, f: mor] -> Prop; + + // === Axioms === + + // Identity morphisms have matching source and target + ax/id_src : forall x : ob, i : mor. [a: x, f: i] id |- i src = x; + ax/id_tgt : forall x : ob, i : mor. [a: x, f: i] id |- i tgt = x; + + // Composition domain constraint + ax/comp_dom : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- p tgt = q src; + + // Composition source/target + ax/comp_src : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- r src = p src; + ax/comp_tgt : forall p : mor, q : mor, r : mor. + [f: p, g: q, h: r] comp |- r tgt = q tgt; + + // Existence of identities (one per object) + ax/id_exists : forall x : ob. |- exists i : mor. [a: x, f: i] id; + + // Existence of composites (when composable) + ax/comp_exists : forall p : mor, q : mor. + p tgt = q src |- exists r : mor. [f: p, g: q, h: r] comp; + + // Left unit: id_a ; f = f + ax/unit_left : forall x : ob, i : mor, p : mor, r : mor. + [a: x, f: i] id, p src = x, [f: i, g: p, h: r] comp |- r = p; + + // Right unit: f ; id_b = f + ax/unit_right : forall y : ob, i : mor, p : mor, r : mor. + [a: y, f: i] id, p tgt = y, [f: p, g: i, h: r] comp |- r = p; + + // Associativity: (f ; g) ; h = f ; (g ; h) + ax/assoc : forall p : mor, q : mor, r : mor, pq : mor, qr : mor, pqr1 : mor, pqr2 : mor. + [f: p, g: q, h: pq] comp, [f: pq, g: r, h: pqr1] comp, + [f: q, g: r, h: qr] comp, [f: p, g: qr, h: pqr2] comp + |- pqr1 = pqr2; + + // Uniqueness of composition (functional) + ax/comp_unique : forall p : mor, q : mor, r1 : mor, r2 : mor. + [f: p, g: q, h: r1] comp, [f: p, g: q, h: r2] comp |- r1 = r2; + + // Uniqueness of identity (one per object) + ax/id_unique : forall x : ob, i1 : mor, i2 : mor. + [a: x, f: i1] id, [a: x, f: i2] id |- i1 = i2; +} + +// The "walking arrow" category: A --f--> B +// +// Now we can declare just objects and non-identity morphisms! +// The chase derives: +// - Identity morphisms for each object (via ax/id_exists) +// - Composition facts (via ax/comp_exists) +// - Source/target for compositions (via ax/comp_src, ax/comp_tgt) +// +// The equality saturation (via congruence closure) collapses: +// - id;id;id;... = id (via ax/unit_left and ax/unit_right) +// - Duplicate compositions (via ax/comp_unique) +// Without CC, the chase would loop forever creating id;id, id;id;id, ... +instance Arrow : Category = chase { + // Objects + A : ob; + B : ob; + + // Non-identity morphism + f : mor; f src = A; f tgt = B; +} diff --git a/examples/geolog/field_projection_chase_test.geolog b/examples/geolog/field_projection_chase_test.geolog new file mode 100644 index 0000000..a007482 --- /dev/null +++ b/examples/geolog/field_projection_chase_test.geolog @@ -0,0 +1,27 @@ +// Test: Field projection in chase + +theory FieldProjectionChaseTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Marker sort for elements whose x field matches a given a + XMatches : Sort; + XMatches/r : XMatches -> R; + XMatches/a : XMatches -> A; + + // Axiom: if r's x field equals a, create an XMatches + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- exists m : XMatches. m XMatches/r = r, m XMatches/a = a; +} + +instance Test : FieldProjectionChaseTest = chase { + a1 : A; + a2 : A; + b1 : B; + r1 : R; + r1 R/data = [x: a1, y: b1]; + r2 : R; + r2 R/data = [x: a2, y: b1]; +} diff --git a/examples/geolog/field_projection_test.geolog b/examples/geolog/field_projection_test.geolog new file mode 100644 index 0000000..deb7376 --- /dev/null +++ b/examples/geolog/field_projection_test.geolog @@ -0,0 +1,12 @@ +// Test: Field projection syntax + +theory FieldProjectionTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Axiom using field projection: r R/data .x + ax1 : forall r : R, a : A. r R/data .x = a |- true; +} diff --git a/examples/geolog/graph.geolog b/examples/geolog/graph.geolog new file mode 100644 index 0000000..69ba7f5 --- /dev/null +++ b/examples/geolog/graph.geolog @@ -0,0 +1,79 @@ +// Directed Graph: vertices and edges with source/target functions +// +// This is the canonical example of a "presheaf" - a functor from a small +// category (the "walking arrow" • → •) to Set. + +theory Graph { + V : Sort; // Vertices + E : Sort; // Edges + + src : E -> V; // Source of an edge + tgt : E -> V; // Target of an edge +} + +// A simple triangle graph: A → B → C → A +instance Triangle : Graph = { + // Vertices + A : V; + B : V; + C : V; + + // Edges + ab : E; + bc : E; + ca : E; + + // Edge endpoints + ab src = A; + ab tgt = B; + bc src = B; + bc tgt = C; + ca src = C; + ca tgt = A; +} + +// A self-loop: one vertex with an edge to itself +instance Loop : Graph = { + v : V; + e : E; + e src = v; + e tgt = v; +} + +// The "walking arrow": two vertices, one edge +instance Arrow : Graph = { + s : V; + t : V; + f : E; + f src = s; + f tgt = t; +} + +// A more complex graph: diamond shape with two paths from top to bottom +// +// top +// / \ +// left right +// \ / +// bottom +// +instance Diamond : Graph = { + top : V; + left : V; + right : V; + bottom : V; + + top_left : E; + top_right : E; + left_bottom : E; + right_bottom : E; + + top_left src = top; + top_left tgt = left; + top_right src = top; + top_right tgt = right; + left_bottom src = left; + left_bottom tgt = bottom; + right_bottom src = right; + right_bottom tgt = bottom; +} diff --git a/examples/geolog/iso_instance_test.geolog b/examples/geolog/iso_instance_test.geolog new file mode 100644 index 0000000..68653c8 --- /dev/null +++ b/examples/geolog/iso_instance_test.geolog @@ -0,0 +1,29 @@ +// Multi-parameter theory instantiation test + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +theory A { a : Sort; } +theory B { b : Sort; } + +instance As : A = { + a1 : a; + a2 : a; +} + +instance Bs : B = { + b1 : b; + b2 : b; +} + +// Can we create an Iso instance with sort parameters? +instance AB_Iso : As/a Bs/b Iso = { + a1 fwd = Bs/b1; + a2 fwd = Bs/b2; + b1 bwd = As/a1; + b2 bwd = As/a2; +} diff --git a/examples/geolog/iso_theory_test.geolog b/examples/geolog/iso_theory_test.geolog new file mode 100644 index 0000000..d17ad21 --- /dev/null +++ b/examples/geolog/iso_theory_test.geolog @@ -0,0 +1,9 @@ +// Multi-parameter theory test (Iso from vision) + +// First just try sorts as parameters +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + // Axioms would need chained function application... + // fb : forall x : X. |- x fwd bwd = x; +} diff --git a/examples/geolog/monoid.geolog b/examples/geolog/monoid.geolog new file mode 100644 index 0000000..596dd37 --- /dev/null +++ b/examples/geolog/monoid.geolog @@ -0,0 +1,78 @@ +// Monoid: a set with an associative binary operation and identity element +// +// This is the simplest algebraic structure with interesting axioms. +// Note: geolog uses postfix function application. + +theory Monoid { + M : Sort; + + // Binary operation: M × M → M + mul : [x: M, y: M] -> M; + + // Identity element: we use a unary function from M to M that + // "picks out" the identity (any x maps to e) + // A cleaner approach would use Unit → M but that needs product support. + id : M -> M; + + // Left identity: id(x) * y = y (id(x) is always e) + ax/left_id : forall x : M, y : M. + |- [x: x id, y: y] mul = y; + + // Right identity: x * id(y) = x + ax/right_id : forall x : M, y : M. + |- [x: x, y: y id] mul = x; + + // Associativity: (x * y) * z = x * (y * z) + ax/assoc : forall x : M, y : M, z : M. + |- [x: [x: x, y: y] mul, y: z] mul = [x: x, y: [x: y, y: z] mul] mul; + + // id is constant: id(x) = id(y) for all x, y + ax/id_const : forall x : M, y : M. + |- x id = y id; +} + +// Trivial monoid: single element, e * e = e +instance Trivial : Monoid = { + e : M; + + // Multiplication table: e * e = e + // Using positional syntax: [a, b] maps to [x: a, y: b] + [e, e] mul = e; + + // Identity: e is the identity element + e id = e; +} + +// Boolean "And" monoid: {T, F} with T as identity +// T and T = T, T and F = F, F and T = F, F and F = F +instance BoolAnd : Monoid = { + T : M; + F : M; + + // Identity: T is the identity element + T id = T; + F id = T; + + // Multiplication table for "and": + [x: T, y: T] mul = T; + [x: T, y: F] mul = F; + [x: F, y: T] mul = F; + [x: F, y: F] mul = F; +} + +// Boolean "Or" monoid: {T, F} with F as identity +// T or T = T, T or F = T, F or T = T, F or F = F +instance BoolOr : Monoid = { + T : M; + F : M; + + // Identity: F is the identity element + T id = F; + F id = F; + + // Multiplication table for "or": + [x: T, y: T] mul = T; + [x: T, y: F] mul = T; + [x: F, y: T] mul = T; + [x: F, y: F] mul = F; +} diff --git a/examples/geolog/nested_instance_test.geolog b/examples/geolog/nested_instance_test.geolog new file mode 100644 index 0000000..2182887 --- /dev/null +++ b/examples/geolog/nested_instance_test.geolog @@ -0,0 +1,33 @@ +// Test: Nested instance declarations (following vision pattern) + +theory Place { + P : Sort; +} + +theory (Pl : Place instance) Token { + token : Sort; + token/of : token -> Pl/P; +} + +theory (Pl : Place instance) Problem { + initial_marking : Pl Token instance; + target_marking : Pl Token instance; +} + +// Create a place instance +instance MyPlaces : Place = { + p1 : P; + p2 : P; +} + +// Test nested instance declarations +instance TestProblem : MyPlaces Problem = { + initial_marking = { + t1 : token; + t1 token/of = MyPlaces/p1; + }; + target_marking = { + t2 : token; + t2 token/of = MyPlaces/p2; + }; +} diff --git a/examples/geolog/petri_net.geolog b/examples/geolog/petri_net.geolog new file mode 100644 index 0000000..8e4e235 --- /dev/null +++ b/examples/geolog/petri_net.geolog @@ -0,0 +1,135 @@ +// Petri Net: a bipartite graph between places and transitions +// +// Petri nets model concurrent systems. Places hold tokens, transitions +// fire when their input places have tokens, consuming inputs and +// producing outputs. +// +// This encoding uses explicit "arc" sorts for input/output connections, +// which is more faithful to the categorical semantics (a span). + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + In : Sort; // Input arcs (from place to transition) + Out : Sort; // Output arcs (from transition to place) + + // Input arc endpoints + in/place : In -> P; + in/trans : In -> T; + + // Output arc endpoints + out/trans : Out -> T; + out/place : Out -> P; +} + +// A simple producer-consumer net: +// +// (ready) --[produce]--> (buffer) --[consume]--> (done) +// +instance ProducerConsumer : PetriNet = { + // Places + ready : P; + buffer : P; + done : P; + + // Transitions + produce : T; + consume : T; + + // Input arcs + i1 : In; + i1 in/place = ready; + i1 in/trans = produce; + + i2 : In; + i2 in/place = buffer; + i2 in/trans = consume; + + // Output arcs + o1 : Out; + o1 out/trans = produce; + o1 out/place = buffer; + + o2 : Out; + o2 out/trans = consume; + o2 out/place = done; +} + +// Mutual exclusion: two processes competing for a shared resource +// +// (idle1) --[enter1]--> (crit1) --[exit1]--> (idle1) +// ^ | +// | (mutex) | +// | v +// (idle2) --[enter2]--> (crit2) --[exit2]--> (idle2) +// +instance MutualExclusion : PetriNet = { + // Places for process 1 + idle1 : P; + crit1 : P; + + // Places for process 2 + idle2 : P; + crit2 : P; + + // Shared mutex token + mutex : P; + + // Transitions + enter1 : T; + exit1 : T; + enter2 : T; + exit2 : T; + + // Process 1 enters: needs idle1 AND mutex + i_enter1_idle : In; + i_enter1_idle in/place = idle1; + i_enter1_idle in/trans = enter1; + + i_enter1_mutex : In; + i_enter1_mutex in/place = mutex; + i_enter1_mutex in/trans = enter1; + + o_enter1 : Out; + o_enter1 out/trans = enter1; + o_enter1 out/place = crit1; + + // Process 1 exits: releases mutex + i_exit1 : In; + i_exit1 in/place = crit1; + i_exit1 in/trans = exit1; + + o_exit1_idle : Out; + o_exit1_idle out/trans = exit1; + o_exit1_idle out/place = idle1; + + o_exit1_mutex : Out; + o_exit1_mutex out/trans = exit1; + o_exit1_mutex out/place = mutex; + + // Process 2 enters: needs idle2 AND mutex + i_enter2_idle : In; + i_enter2_idle in/place = idle2; + i_enter2_idle in/trans = enter2; + + i_enter2_mutex : In; + i_enter2_mutex in/place = mutex; + i_enter2_mutex in/trans = enter2; + + o_enter2 : Out; + o_enter2 out/trans = enter2; + o_enter2 out/place = crit2; + + // Process 2 exits: releases mutex + i_exit2 : In; + i_exit2 in/place = crit2; + i_exit2 in/trans = exit2; + + o_exit2_idle : Out; + o_exit2_idle out/trans = exit2; + o_exit2_idle out/place = idle2; + + o_exit2_mutex : Out; + o_exit2_mutex out/trans = exit2; + o_exit2_mutex out/place = mutex; +} diff --git a/examples/geolog/petri_net_full.geolog b/examples/geolog/petri_net_full.geolog new file mode 100644 index 0000000..d4e3fe7 --- /dev/null +++ b/examples/geolog/petri_net_full.geolog @@ -0,0 +1,195 @@ +// Full Petri Net Reachability - Type-Theoretic Encoding +// +// This demonstrates the complete type-theoretic encoding of Petri net +// reachability from the original geolog design vision. +// +// Original design: loose_thoughts/2025-12-12_12:10_VanillaPetriNetRechability.md +// +// Key concepts: +// - PetriNet: places, transitions, input/output arcs (with proper arc semantics) +// - Marking: tokens in a net (parameterized by net) +// - ReachabilityProblem: initial and target markings (nested instances) +// - Trace: sequence of firings with wires connecting arcs +// - Iso: isomorphism between two sorts (used for bijections) +// - Solution: a trace with isomorphisms to markings +// +// This encoding is more type-theoretically precise than the simple +// PlaceReachability: it tracks individual tokens and arc multiplicities, +// enabling correct handling of "multi-token" transitions. + +// ============================================================ +// THEORY: PetriNet +// Basic structure: places, transitions, and arcs +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + // Each arc knows which place/transition it connects + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Defines initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace is a sequence of transition firings with "wires" +// connecting input and output arcs +// ============================================================ + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output of one firing to input of another + W : Sort; + W/src : W -> [firing : F, arc : N/out]; // Wire source (firing, output arc) + W/tgt : W -> [firing : F, arc : N/in]; // Wire target (firing, input arc) + + // Wire coherence: output arc must belong to source firing's transition + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + // Wire coherence: input arc must belong to target firing's transition + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + + // Wire uniqueness: each (firing, out-arc) pair has at most one wire + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + // Wire uniqueness: each (firing, in-arc) pair has at most one wire + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + // Terminals: for initial marking tokens (input) and final marking tokens (output) + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Every output arc of every firing must be wired OR be an output terminal + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + + // Every input arc of every firing must be wired OR be an input terminal + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// An isomorphism between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A solution to a reachability problem +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + // The witnessing trace + trace : N Trace instance; + + // Bijection between input terminals and initial marking tokens + initial_marking_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection between output terminals and target marking tokens + target_marking_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + // Initial marking commutes: token placement matches terminal placement + initial_marking_P_comm : forall i : trace/input_terminal. + |- i trace/input_terminal/of = i initial_marking_iso/fwd RP/initial_marking/token/of; + + // Target marking commutes: token placement matches terminal placement + target_marking_P_comm : forall o : trace/output_terminal. + |- o trace/output_terminal/of = o target_marking_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet - a small Petri net +// +// (A) --[ab]--> (B) --[bc]--> (C) +// ^ | +// +---[ba]------+ +// ============================================================ + +instance ExampleNet : PetriNet = { + // Places + A : P; + B : P; + C : P; + + // Transitions + ab : T; + ba : T; + bc : T; + + // A -> B (via ab) + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + + // B -> C (via bc) + bc_in : in; + bc_in in/src = B; + bc_in in/tgt = bc; + bc_out : out; + bc_out out/src = bc; + bc_out out/tgt = C; +} + +// ============================================================ +// Example queries (once query solving is implemented): +// +// query can_reach_B_from_A { +// ? : ExampleNet problem0 Solution instance; +// } +// +// where problem0 : ExampleNet ReachabilityProblem = { +// initial_marking = { tok : token; tok token/of = ExampleNet/A; }; +// target_marking = { tok : token; tok token/of = ExampleNet/B; }; +// } +// ============================================================ diff --git a/examples/geolog/petri_net_showcase.geolog b/examples/geolog/petri_net_showcase.geolog new file mode 100644 index 0000000..ece5de2 --- /dev/null +++ b/examples/geolog/petri_net_showcase.geolog @@ -0,0 +1,345 @@ +// Petri Net Reachability - Full Type-Theoretic Encoding +// +// This showcase demonstrates geolog's core capabilities through a +// non-trivial domain: encoding Petri net reachability as dependent types. +// +// A solution to a reachability problem is NOT a yes/no boolean but a +// CONSTRUCTIVE WITNESS: a diagrammatic proof that tokens can flow from +// initial to target markings via a sequence of transition firings. +// +// Key concepts demonstrated: +// - Parameterized theories (Marking depends on PetriNet instance) +// - Nested instance types (ReachabilityProblem contains Marking instances) +// - Sort-parameterized theories (Iso takes two sorts as parameters) +// - Cross-instance references (solution's trace elements reference problem's tokens) +// +// Original design: loose_thoughts/2025-12-12_12:10_VanillaPetriNetRechability.md + +// ============================================================ +// THEORY: PetriNet +// Places, transitions, and arcs with proper arc semantics +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking (parameterized by N : PetriNet) +// A marking assigns tokens to places +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem (parameterized by N : PetriNet) +// Initial and target markings as nested instances +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (parameterized by N : PetriNet) +// A trace records transition firings and token flow via wires +// ============================================================ +// +// A trace is a diagrammatic proof of reachability: +// - Firings represent transition occurrences +// - Wires connect output arcs of firings to input arcs of other firings +// - Terminals connect to the initial/target markings +// +// The completeness axiom (ax/must_be_fed) ensures every input arc +// of every firing is accounted for - either wired from another firing +// or fed by an input terminal. + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence: source arc must belong to source firing's transition + ax/wire_src_coherent : forall w : W. + |- w W/src_arc N/out/src = w W/src_firing F/of; + + // Wire coherence: target arc must belong to target firing's transition + ax/wire_tgt_coherent : forall w : W. + |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + + // Wire place coherence: wire connects matching places + ax/wire_place_coherent : forall w : W. + |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + + // Terminals connect to specific firings and arcs + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: every input arc must be fed by a wire or input terminal + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured by a wire or output terminal + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso (parameterized by two sorts) +// Isomorphism (bijection) between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution (parameterized by N and RP) +// A constructive witness that target is reachable from initial +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + // Bijection: input terminals <-> initial marking tokens + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + + // Bijection: output terminals <-> target marking tokens + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + // Commutativity axioms (currently unchecked): + // ax/init_comm : forall i : trace/input_terminal. + // |- i trace/input_terminal/of = i initial_iso/fwd RP/initial_marking/token/of; + // ax/target_comm : forall o : trace/output_terminal. + // |- o trace/output_terminal/of = o target_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet +// +// A Petri net with places A, B, C and transitions: +// ab: consumes 1 token from A, produces 1 token in B +// ba: consumes 1 token from B, produces 1 token in A +// abc: consumes 1 token from A AND 1 from B, produces 1 token in C +// +// +---[ba]----+ +// v | +// (A) --[ab]->(B) --+ +// | | +// +----[abc]-------+--> (C) +// +// The abc transition is interesting: it requires BOTH an A-token +// and a B-token to fire, producing a C-token. +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + // A -> B (via ab) + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + // B -> A (via ba) + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + // A + B -> C (via abc) - note: two input arcs! + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// ============================================================ +// PROBLEM 0: Can we reach B from A with one token? +// Initial: 1 token in A +// Target: 1 token in B +// ============================================================ + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// SOLUTION 0: Yes! Fire transition 'ab' once. +// +// This Solution instance is a CONSTRUCTIVE PROOF: +// - The trace contains one firing (f1) of transition 'ab' +// - The input terminal feeds the A-token into f1's input arc +// - The output terminal captures f1's B-token output +// - The isomorphisms prove the token counts match exactly +// ============================================================ + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + // One firing of transition 'ab' + f1 : F; + f1 F/of = ExampleNet/ab; + + // Input terminal: feeds the initial A-token into f1 + it : input_terminal; + it input_terminal/of = ExampleNet/A; + it input_terminal/tgt_firing = f1; + it input_terminal/tgt_arc = ExampleNet/ab_in; + + // Output terminal: captures f1's B-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + ot output_terminal/src_firing = f1; + ot output_terminal/src_arc = ExampleNet/ab_out; + }; + + initial_iso = { + trace/it fwd = problem0/initial_marking/tok; + problem0/initial_marking/tok bwd = trace/it; + }; + + target_iso = { + trace/ot fwd = problem0/target_marking/tok; + problem0/target_marking/tok bwd = trace/ot; + }; +} + +// ============================================================ +// PROBLEM 2: Can we reach C from two A-tokens? +// Initial: 2 tokens in A +// Target: 1 token in C +// +// This is interesting because the only path to C is via 'abc', +// which requires tokens in BOTH A and B simultaneously. +// ============================================================ + +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// SOLUTION 2: Yes! Fire 'ab' then 'abc'. +// +// Token flow diagram: +// +// [it1]--A-->[f1: ab]--B--wire-->[f2: abc]--C-->[ot] +// [it2]--A-----------------^ +// +// Step 1: Fire 'ab' to move one token A -> B +// - it1 feeds A-token into f1 via ab_in +// - f1 produces B-token via ab_out +// Step 2: Fire 'abc' consuming one A-token and one B-token +// - it2 feeds A-token into f2 via abc_in1 +// - Wire connects f1's ab_out to f2's abc_in2 (the B-input) +// - f2 produces C-token via abc_out +// ============================================================ + +instance solution2 : ExampleNet problem2 Solution = { + trace = { + // Two firings + f1 : F; f1 F/of = ExampleNet/ab; // First: A -> B + f2 : F; f2 F/of = ExampleNet/abc; // Second: A + B -> C + + // Wire connecting f1's B-output to f2's B-input + // This is the crucial connection that makes the trace valid! + w1 : W; + w1 W/src_firing = f1; + w1 W/src_arc = ExampleNet/ab_out; + w1 W/tgt_firing = f2; + w1 W/tgt_arc = ExampleNet/abc_in2; + + // Input terminal 1: feeds first A-token into f1 + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + // Input terminal 2: feeds second A-token into f2 + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal: captures f2's C-token output + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + }; + + // Bijection: 2 input terminals <-> 2 initial tokens + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + // Bijection: 1 output terminal <-> 1 target token + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} diff --git a/examples/geolog/petri_net_solution.geolog b/examples/geolog/petri_net_solution.geolog new file mode 100644 index 0000000..4f3048f --- /dev/null +++ b/examples/geolog/petri_net_solution.geolog @@ -0,0 +1,188 @@ +// Full Petri Net Reachability with Synthesized Solution +// +// This file contains the complete type-theoretic encoding of Petri net +// reachability, plus a manually synthesized solution proving that place B +// is reachable from place A in the example net. +// +// ============================================================ +// This instance was synthesized automatically by Claude Opus 4.5. +// As was this entire file, and this entire project, really. +// ============================================================ + +// ============================================================ +// THEORY: PetriNet - Basic structure with arc semantics +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + in : Sort; // Input arcs (place -> transition) + out : Sort; // Output arcs (transition -> place) + + in/src : in -> P; // Input arc source place + in/tgt : in -> T; // Input arc target transition + out/src : out -> T; // Output arc source transition + out/tgt : out -> P; // Output arc target place +} + +// ============================================================ +// THEORY: Marking - Tokens parameterized by a net +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; // Which place each token is in +} + +// ============================================================ +// THEORY: ReachabilityProblem - Initial and target markings +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace - A sequence of transition firings with wires +// ============================================================ +// +// Simplified version for now - full version with product types commented out below. + +theory (N : PetriNet instance) Trace { + F : Sort; // Firings + F/of : F -> N/T; // Which transition each fires + + // Terminals for initial/final marking tokens + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; +} + +// Full Trace theory with wires and product types (not yet fully supported): +// +// theory (N : PetriNet instance) Trace { +// F : Sort; // Firings +// F/of : F -> N/T; // Which transition each fires +// +// W : Sort; // Wires connecting firings +// W/src : W -> [firing : F, arc : N/out]; // Wire source +// W/tgt : W -> [firing : F, arc : N/in]; // Wire target +// +// // Wire coherence axioms +// ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; +// ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; +// +// // Wire uniqueness +// ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; +// ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; +// +// // Terminals for initial/final marking tokens +// input_terminal : Sort; +// output_terminal : Sort; +// input_terminal/of : input_terminal -> N/P; +// output_terminal/of : output_terminal -> N/P; +// input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; +// output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +// +// // Coverage axioms +// ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- +// (exists w : W. w W/src = [firing: f, arc: arc]) \/ +// (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); +// ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- +// (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ +// (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +// } + +// ============================================================ +// THEORY: Iso - Isomorphism between two sorts +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution - A complete reachability witness +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; + + ax/init_comm : forall i : trace/input_terminal. + |- i trace/input_terminal/of = i initial_iso/fwd RP/initial_marking/token/of; + ax/target_comm : forall o : trace/output_terminal. + |- o trace/output_terminal/of = o target_iso/fwd RP/target_marking/token/of; +} + +// ============================================================ +// INSTANCE: ExampleNet - A small Petri net +// +// (A) --[ab]--> (B) --[bc]--> (C) +// ^ | +// +---[ba]------+ +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; bc : T; + + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + bc_in : in; bc_in in/src = B; bc_in in/tgt = bc; + bc_out : out; bc_out out/src = bc; bc_out out/tgt = C; +} + +// ============================================================ +// INSTANCE: problem0 - Can we reach B from A? +// ============================================================ + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// ============================================================ +// INSTANCE: solution0 - YES! Here's the proof. +// ============================================================ +// This instance was synthesized automatically by Claude Opus 4.5. +// ============================================================ + +// The solution proves that place B is reachable from place A by firing +// transition ab. This creates a trace with one firing and the necessary +// input/output terminal mappings. + +instance solution0 : ExampleNet problem0 Solution = { + trace = { + f1 : F; + f1 F/of = ExampleNet/ab; + + it : input_terminal; + it input_terminal/of = ExampleNet/A; + + ot : output_terminal; + ot output_terminal/of = ExampleNet/B; + }; + + // NOTE: Cross-instance references (e.g., trace/it in initial_iso) + // are not yet fully supported. The iso instances would map: + // - trace/it <-> problem0/initial_marking/tok + // - trace/ot <-> problem0/target_marking/tok +} diff --git a/examples/geolog/petri_reachability.geolog b/examples/geolog/petri_reachability.geolog new file mode 100644 index 0000000..fb5a453 --- /dev/null +++ b/examples/geolog/petri_reachability.geolog @@ -0,0 +1,164 @@ +// Petri Net Reachability - Full Example +// +// This demonstrates the core ideas from the original geolog design document: +// modeling Petri net reachability using geometric logic with the chase algorithm. +// +// Original design: loose_thoughts/2025-12-12_12:10.md +// +// Key concepts: +// - PetriNet: places, transitions, input/output arcs +// - Marking: assignment of tokens to places (parameterized theory) +// - Trace: sequence of transition firings connecting markings +// - Reachability: computed via chase algorithm + +// ============================================================ +// THEORY: PetriNet +// ============================================================ + +theory PetriNet { + P : Sort; // Places + T : Sort; // Transitions + In : Sort; // Input arcs (place -> transition) + Out : Sort; // Output arcs (transition -> place) + + // Arc structure + in/place : In -> P; + in/trans : In -> T; + out/trans : Out -> T; + out/place : Out -> P; +} + +// ============================================================ +// THEORY: Marking (parameterized) +// A marking assigns tokens to places in a specific net +// ============================================================ + +theory (N : PetriNet instance) Marking { + Token : Sort; + of : Token -> N/P; +} + +// ============================================================ +// THEORY: PlaceReachability +// Simplified reachability at the place level +// ============================================================ + +theory PlaceReachability { + P : Sort; + T : Sort; + + // Which transition connects which places + // Fires(t, from, to) means transition t can move a token from 'from' to 'to' + Fires : [trans: T, from: P, to: P] -> Prop; + + // Reachability relation (transitive closure) + CanReach : [from: P, to: P] -> Prop; + + // Reflexivity: every place can reach itself + ax/refl : forall p : P. + |- [from: p, to: p] CanReach; + + // Transition firing creates reachability + ax/fire : forall t : T, x : P, y : P. + [trans: t, from: x, to: y] Fires |- [from: x, to: y] CanReach; + + // Transitivity: reachability composes + ax/trans : forall x : P, y : P, z : P. + [from: x, to: y] CanReach, [from: y, to: z] CanReach |- [from: x, to: z] CanReach; +} + +// ============================================================ +// INSTANCE: SimpleNet +// A -> B -> C with bidirectional A <-> B +// +// (A) <--[ba]-- (B) --[bc]--> (C) +// | ^ +// +---[ab]------+ +// ============================================================ + +// Uses chase to derive CanReach from axioms (reflexivity, fire, transitivity) +instance SimpleNet : PlaceReachability = chase { + // Places + A : P; + B : P; + C : P; + + // Transitions + ab : T; // A -> B + ba : T; // B -> A + bc : T; // B -> C + + // Firing relations + [trans: ab, from: A, to: B] Fires; + [trans: ba, from: B, to: A] Fires; + [trans: bc, from: B, to: C] Fires; +} + +// ============================================================ +// INSTANCE: MutexNet +// Two processes competing for a mutex +// +// idle1 --[enter1]--> crit1 --[exit1]--> idle1 +// ^ | +// | mutex | +// | v +// idle2 --[enter2]--> crit2 --[exit2]--> idle2 +// ============================================================ + +// Uses chase to derive reachability relation +instance MutexNet : PlaceReachability = chase { + // Places + idle1 : P; + crit1 : P; + idle2 : P; + crit2 : P; + mutex : P; + + // Transitions + enter1 : T; + exit1 : T; + enter2 : T; + exit2 : T; + + // Process 1 acquires mutex: idle1 + mutex -> crit1 + // (simplified: we track place-level, not token-level) + [trans: enter1, from: idle1, to: crit1] Fires; + [trans: enter1, from: mutex, to: crit1] Fires; + + // Process 1 releases mutex: crit1 -> idle1 + mutex + [trans: exit1, from: crit1, to: idle1] Fires; + [trans: exit1, from: crit1, to: mutex] Fires; + + // Process 2 acquires mutex: idle2 + mutex -> crit2 + [trans: enter2, from: idle2, to: crit2] Fires; + [trans: enter2, from: mutex, to: crit2] Fires; + + // Process 2 releases mutex: crit2 -> idle2 + mutex + [trans: exit2, from: crit2, to: idle2] Fires; + [trans: exit2, from: crit2, to: mutex] Fires; +} + +// ============================================================ +// INSTANCE: ProducerConsumerNet +// Producer creates items, consumer processes them +// +// ready --[produce]--> buffer --[consume]--> done +// ============================================================ + +// Uses chase to derive reachability relation +instance ProducerConsumerNet : PlaceReachability = chase { + // Places + ready : P; + buffer : P; + done : P; + + // Transitions + produce : T; + consume : T; + + // Produce: ready -> buffer + [trans: produce, from: ready, to: buffer] Fires; + + // Consume: buffer -> done + [trans: consume, from: buffer, to: done] Fires; +} diff --git a/examples/geolog/petri_reachability_full_vision.geolog b/examples/geolog/petri_reachability_full_vision.geolog new file mode 100644 index 0000000..a3155f5 --- /dev/null +++ b/examples/geolog/petri_reachability_full_vision.geolog @@ -0,0 +1,72 @@ +// Full Petri Net Reachability Vision Test +// From 2025-12-12_12:10_VanillaPetriNetRechability.md + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// Simplified Trace theory without disjunctions for now +theory (N : PetriNet instance) SimpleTrace { + F : Sort; + F/of : F -> N/T; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Simplified ax5: every firing+arc gets an output terminal + ax5 : forall f : F, arc : N/out. |- exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]; + + // Simplified ax6: every firing+arc gets an input terminal + ax6 : forall f : F, arc : N/in. |- exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + ab : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; +} + +// Test nested instance elaboration +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + tok : token; + tok token/of = ExampleNet/A; + }; + target_marking = { + tok : token; + tok token/of = ExampleNet/B; + }; +} + +// Test chase with SimpleTrace +instance trace0 : ExampleNet SimpleTrace = chase { + f1 : F; + f1 F/of = ExampleNet/ab; +} diff --git a/examples/geolog/petri_reachability_vision.geolog b/examples/geolog/petri_reachability_vision.geolog new file mode 100644 index 0000000..20ef578 --- /dev/null +++ b/examples/geolog/petri_reachability_vision.geolog @@ -0,0 +1,94 @@ +// Petri Net Reachability Vision Test +// Based on 2025-12-12 design document + +// Basic Petri net structure +theory PetriNet { + // Places + P : Sort; + + // Transitions + T : Sort; + + // Arcs (input to transitions, output from transitions) + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// A marking is a multiset of tokens, each at a place +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// A reachability problem is: can we get from initial marking to target? +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// A trace is a sequence of firings connected by wires +theory (N : PetriNet instance) Trace { + // Firings of transitions + F : Sort; + F/of : F -> N/T; + + // Wires connect firing outputs to firing inputs + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + // Terminals are unconnected arc endpoints (to/from the initial/target markings) + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +} + +// Example Petri net: A <--ab/ba--> B, (A,B) --abc--> C +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +// Reachability problem: Can we reach B from A? +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} diff --git a/examples/geolog/petri_trace_axioms.geolog b/examples/geolog/petri_trace_axioms.geolog new file mode 100644 index 0000000..1295e0c --- /dev/null +++ b/examples/geolog/petri_trace_axioms.geolog @@ -0,0 +1,66 @@ +// Test Trace theory with axioms using product codomains + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// Trace theory with axioms +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Axiom: wires are injective on source + // forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + // (Commented out - requires product codomain equality in premises) + + // Axiom: every arc endpoint must be wired or terminated + // forall f : F, arc : N/out. arc N/out/src = f F/of |- + // (exists w : W. w W/src = [firing: f, arc: arc]) \/ + // (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + // (Commented out - requires product codomain values in conclusions) +} + +// Simple net for testing +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Test that the basic theory without axioms still works +instance SimpleTrace : SimpleNet Trace = { + f1 : F; + f1 F/of = SimpleNet/t; + + it : input_terminal; + it input_terminal/of = SimpleNet/A; + it input_terminal/tgt = [firing: f1, arc: SimpleNet/arc_in]; + + ot : output_terminal; + ot output_terminal/of = SimpleNet/B; + ot output_terminal/src = [firing: f1, arc: SimpleNet/arc_out]; +} diff --git a/examples/geolog/petri_trace_coverage_test.geolog b/examples/geolog/petri_trace_coverage_test.geolog new file mode 100644 index 0000000..dd3c2f2 --- /dev/null +++ b/examples/geolog/petri_trace_coverage_test.geolog @@ -0,0 +1,36 @@ +// Test: Trace coverage axiom (simplified) + +theory PetriNet { + P : Sort; + T : Sort; + out : Sort; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + output_terminal : Sort; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Simplified ax5: for every arc and firing, if the arc's source is the firing's transition, + // create an output terminal + ax5 : forall f : F, arc : N/out. |- exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]; +} + +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Trace with just a firing - chase should create a terminal +instance TestTrace : SimpleNet Trace = chase { + f1 : F; + f1 F/of = SimpleNet/t; +} diff --git a/examples/geolog/petri_trace_full_vision.geolog b/examples/geolog/petri_trace_full_vision.geolog new file mode 100644 index 0000000..687cac8 --- /dev/null +++ b/examples/geolog/petri_trace_full_vision.geolog @@ -0,0 +1,57 @@ +// Trace theory with wires and disjunctions +// Testing the full vision from 2025-12-12 + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + // Every out arc of every firing: either wired or terminal + ax5 : forall f : F, arc : N/out. |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + + // Every in arc of every firing: either wired or terminal + ax6 : forall f : F, arc : N/in. |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Chase should create both wires AND terminals (naive chase adds all disjuncts) +instance trace_test : SimpleNet Trace = chase { + f1 : F; + f1 F/of = SimpleNet/t; +} diff --git a/examples/geolog/petri_trace_test.geolog b/examples/geolog/petri_trace_test.geolog new file mode 100644 index 0000000..0a3a674 --- /dev/null +++ b/examples/geolog/petri_trace_test.geolog @@ -0,0 +1,58 @@ +// Test that Trace theory with product codomains works + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// Simple Petri net: A --t--> B +instance SimpleNet : PetriNet = { + A : P; + B : P; + t : T; + arc_in : in; + arc_in in/src = A; + arc_in in/tgt = t; + arc_out : out; + arc_out out/src = t; + arc_out out/tgt = B; +} + +// Trace theory with product codomains for wire endpoints +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; +} + +// A simple trace: one firing of t, with input/output terminals +instance SimpleTrace : SimpleNet Trace = { + f1 : F; + f1 F/of = SimpleNet/t; + + // Input terminal (token comes from external marking) + it : input_terminal; + it input_terminal/of = SimpleNet/A; + it input_terminal/tgt = [firing: f1, arc: SimpleNet/arc_in]; + + // Output terminal (token goes to external marking) + ot : output_terminal; + ot output_terminal/of = SimpleNet/B; + ot output_terminal/src = [firing: f1, arc: SimpleNet/arc_out]; +} diff --git a/examples/geolog/preorder.geolog b/examples/geolog/preorder.geolog new file mode 100644 index 0000000..47983a0 --- /dev/null +++ b/examples/geolog/preorder.geolog @@ -0,0 +1,42 @@ +// Preorder: a set with a reflexive, transitive relation +// +// This demonstrates RELATIONS (predicates) as opposed to functions. +// A relation R : A -> Prop is a predicate on A. +// For binary relations, we use a product domain: R : [x: A, y: A] -> Prop + +theory Preorder { + X : Sort; + + // The ordering relation: x ≤ y + leq : [x: X, y: X] -> Prop; + + // Reflexivity: x ≤ x + ax/refl : forall x : X. + |- [x: x, y: x] leq; + + // Transitivity: x ≤ y ∧ y ≤ z → x ≤ z + ax/trans : forall x : X, y : X, z : X. + [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq; +} + +// The discrete preorder: only reflexive pairs +// (no elements are comparable except to themselves) +// Uses `chase` to automatically derive reflexive pairs from axiom ax/refl. +instance Discrete3 : Preorder = chase { + a : X; + b : X; + c : X; +} + +// A total order on 3 elements: a ≤ b ≤ c +// Uses `chase` to derive reflexive and transitive closure. +instance Chain3 : Preorder = chase { + bot : X; + mid : X; + top : X; + + // Assert the basic ordering; chase will add reflexive pairs + // and transitive closure (bot ≤ top) + [x: bot, y: mid] leq; + [x: mid, y: top] leq; +} diff --git a/examples/geolog/product_codomain_equality_test.geolog b/examples/geolog/product_codomain_equality_test.geolog new file mode 100644 index 0000000..9a86219 --- /dev/null +++ b/examples/geolog/product_codomain_equality_test.geolog @@ -0,0 +1,23 @@ +// Test: Product codomain equality in premise (ax3 pattern) + +theory ProductCodomainEqTest { + A : Sort; + B : Sort; + + W : Sort; + W/src : W -> [x: A, y: B]; + + // ax3 pattern: forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2 + // This should make W injective on src + ax_inj : forall w1 : W, w2 : W. w1 W/src = w2 W/src |- w1 = w2; +} + +// Instance with two wires that have the same src - should be identified by chase +instance Test : ProductCodomainEqTest = { + a1 : A; + b1 : B; + w1 : W; + w1 W/src = [x: a1, y: b1]; + w2 : W; + w2 W/src = [x: a1, y: b1]; +} diff --git a/examples/geolog/product_codomain_test.geolog b/examples/geolog/product_codomain_test.geolog new file mode 100644 index 0000000..b804354 --- /dev/null +++ b/examples/geolog/product_codomain_test.geolog @@ -0,0 +1,51 @@ +// Test: Product Codomain Support +// +// This tests the new feature where functions can have product codomains, +// allowing record literal assignments like: +// elem func = [field1: v1, field2: v2]; + +theory ProductCodomainTest { + A : Sort; + B : Sort; + C : Sort; + + // Function with product codomain: maps A elements to (B, C) pairs + pair_of : A -> [left: B, right: C]; +} + +instance TestInstance : ProductCodomainTest = { + // Elements + a1 : A; + b1 : B; + b2 : B; + c1 : C; + + // Assign product codomain value using record literal + a1 pair_of = [left: b1, right: c1]; +} + +// A more realistic example: Edges in a graph +theory DirectedGraph { + V : Sort; + E : Sort; + + // Edge endpoints as a product codomain + endpoints : E -> [src: V, tgt: V]; +} + +instance TriangleGraph : DirectedGraph = { + // Vertices + v0 : V; + v1 : V; + v2 : V; + + // Edges + e01 : E; + e12 : E; + e20 : E; + + // Assign edge endpoints using record literals + e01 endpoints = [src: v0, tgt: v1]; + e12 endpoints = [src: v1, tgt: v2]; + e20 endpoints = [src: v2, tgt: v0]; +} diff --git a/examples/geolog/record_existential_test.geolog b/examples/geolog/record_existential_test.geolog new file mode 100644 index 0000000..f1d2fbe --- /dev/null +++ b/examples/geolog/record_existential_test.geolog @@ -0,0 +1,18 @@ +// Test: Record literals in existential conclusions + +theory RecordExistentialTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Axiom: given any a:A and b:B, there exists an R with that data + ax1 : forall a : A, b : B. |- + exists r : R. r R/data = [x: a, y: b]; +} + +instance Test : RecordExistentialTest = chase { + a1 : A; + b1 : B; +} diff --git a/examples/geolog/record_in_axiom_test.geolog b/examples/geolog/record_in_axiom_test.geolog new file mode 100644 index 0000000..559139d --- /dev/null +++ b/examples/geolog/record_in_axiom_test.geolog @@ -0,0 +1,12 @@ +// Test: Record literals in axioms + +theory RecordAxiomTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Test axiom with record literal RHS + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- true; +} diff --git a/examples/geolog/record_premise_chase_test.geolog b/examples/geolog/record_premise_chase_test.geolog new file mode 100644 index 0000000..5f67156 --- /dev/null +++ b/examples/geolog/record_premise_chase_test.geolog @@ -0,0 +1,23 @@ +// Test: Chase with record literals in premises + +theory RecordPremiseTest { + A : Sort; + B : Sort; + + R : Sort; + R/data : R -> [x: A, y: B]; + + // Derived sort for processed items + Processed : Sort; + Processed/r : Processed -> R; + + // Axiom: given r with data [x: a, y: b], create a Processed for it + ax1 : forall r : R, a : A, b : B. r R/data = [x: a, y: b] |- exists p : Processed. p Processed/r = r; +} + +instance Test : RecordPremiseTest = { + a1 : A; + b1 : B; + r1 : R; + r1 R/data = [x: a1, y: b1]; +} diff --git a/examples/geolog/relalg_simple.geolog b/examples/geolog/relalg_simple.geolog new file mode 100644 index 0000000..39aebff --- /dev/null +++ b/examples/geolog/relalg_simple.geolog @@ -0,0 +1,130 @@ +// Example: RelAlgIR query plan instances +// +// This demonstrates creating query plans as RelAlgIR instances. +// These show the string diagram representation of relational algebra. +// +// First we need to load both GeologMeta (for Srt, Func, etc.) and RelAlgIR. +// This file just defines instances; load theories first in the REPL: +// :load theories/GeologMeta.geolog +// :load theories/RelAlgIR.geolog +// :load examples/geolog/relalg_simple.geolog +// +// Note: RelAlgIR extends GeologMeta, so a RelAlgIR instance contains +// elements from both GeologMeta sorts (Srt, Func, Elem) and RelAlgIR +// sorts (Wire, Schema, ScanOp, etc.) + +// ============================================================ +// Example 1: Simple Scan +// ============================================================ +// Query: "scan all elements of sort V" +// Plan: () --[ScanOp]--> Wire + +instance ScanV : RelAlgIR = chase { + // -- Schema (target theory) -- + target_theory : GeologMeta/Theory; + target_theory GeologMeta/Theory/parent = target_theory; + + v_srt : GeologMeta/Srt; + v_srt GeologMeta/Srt/theory = target_theory; + + // -- Query Plan -- + v_base_schema : BaseSchema; + v_base_schema BaseSchema/srt = v_srt; + + v_schema : Schema; + v_base_schema BaseSchema/schema = v_schema; + + scan_out : Wire; + scan_out Wire/schema = v_schema; + + scan : ScanOp; + scan ScanOp/srt = v_srt; + scan ScanOp/out = scan_out; + + scan_op : Op; + scan ScanOp/op = scan_op; +} + +// ============================================================ +// Example 2: Filter(Scan) +// ============================================================ +// Query: "scan E, filter where src(e) = some vertex" +// Plan: () --[Scan]--> w1 --[Filter]--> w2 +// +// This demonstrates composition via wire sharing. + +// Uses chase to derive relations. +instance FilterScan : RelAlgIR = chase { + // -- Schema (representing Graph theory) -- + target_theory : GeologMeta/Theory; + target_theory GeologMeta/Theory/parent = target_theory; + + // Sorts: V (vertices), E (edges) + v_srt : GeologMeta/Srt; + v_srt GeologMeta/Srt/theory = target_theory; + + e_srt : GeologMeta/Srt; + e_srt GeologMeta/Srt/theory = target_theory; + + // Functions: src : E -> V + // First create the DSort wrappers + v_base_ds : GeologMeta/BaseDS; + v_base_ds GeologMeta/BaseDS/srt = v_srt; + + e_base_ds : GeologMeta/BaseDS; + e_base_ds GeologMeta/BaseDS/srt = e_srt; + + v_dsort : GeologMeta/DSort; + v_base_ds GeologMeta/BaseDS/dsort = v_dsort; + + e_dsort : GeologMeta/DSort; + e_base_ds GeologMeta/BaseDS/dsort = e_dsort; + + src_func : GeologMeta/Func; + src_func GeologMeta/Func/theory = target_theory; + src_func GeologMeta/Func/dom = e_dsort; + src_func GeologMeta/Func/cod = v_dsort; + + // NOTE: For a complete example, we'd also need an Instance element + // and Elem elements. For simplicity, we use a simpler predicate structure. + + // Using TruePred for now (matches all, demonstrating structure) + + // -- Query Plan -- + // Schema for E + e_base_schema : BaseSchema; + e_base_schema BaseSchema/srt = e_srt; + + e_schema : Schema; + e_base_schema BaseSchema/schema = e_schema; + + // Wire 1: output of Scan (E elements) + w1 : Wire; + w1 Wire/schema = e_schema; + + // Wire 2: output of Filter (filtered E elements) + w2 : Wire; + w2 Wire/schema = e_schema; + + // Scan operation + scan : ScanOp; + scan ScanOp/srt = e_srt; + scan ScanOp/out = w1; + + scan_op : Op; + scan ScanOp/op = scan_op; + + // Predicate: TruePred (matches all - demonstrates filter structure) + true_pred : TruePred; + pred_elem : Pred; + true_pred TruePred/pred = pred_elem; + + // Filter operation: w1 --[Filter(pred)]--> w2 + filter : FilterOp; + filter FilterOp/in = w1; + filter FilterOp/out = w2; + filter FilterOp/pred = pred_elem; + + filter_op : Op; + filter FilterOp/op = filter_op; +} diff --git a/examples/geolog/solver_demo.geolog b/examples/geolog/solver_demo.geolog new file mode 100644 index 0000000..b8dec99 --- /dev/null +++ b/examples/geolog/solver_demo.geolog @@ -0,0 +1,132 @@ +// Solver Demo: Theories demonstrating the geometric logic solver +// +// Use the :solve command to find instances of these theories: +// :source examples/geolog/solver_demo.geolog +// :solve EmptyModel +// :solve Inhabited +// :solve Inconsistent +// +// The solver uses forward chaining to automatically: +// - Add witness elements for existentials +// - Assert relation tuples +// - Detect unsatisfiability (derivation of False) + +// ============================================================================ +// Theory 1: EmptyModel - Trivially satisfiable with empty carrier +// ============================================================================ +// +// A theory with no axioms is satisfied by the empty structure. +// The solver should report SOLVED immediately with 0 elements. + +theory EmptyModel { + A : Sort; + B : Sort; + f : A -> B; + R : A -> Prop; +} + +// ============================================================================ +// Theory 2: UnconditionalExistential - Requires witness creation +// ============================================================================ +// +// Axiom: forall x : P. |- exists y : P. y R +// +// SUBTLE: This axiom's premise (True) and conclusion (∃y.R(y)) don't mention x! +// So even though there's a "forall x : P", the check happens once for an empty +// assignment. The premise True holds, but ∃y.R(y) doesn't hold for empty P +// (no witnesses). The solver correctly detects this and adds a witness. +// +// This is correct geometric logic semantics! The universal over x doesn't +// protect against empty P because x isn't used in the formulas. + +theory UnconditionalExistential { + P : Sort; + R : P -> Prop; + + // This effectively says "there must exist some y with R(y)" + // because x is unused - the check happens once regardless of |P| + ax : forall x : P. |- exists y : P. y R; +} + +// ============================================================================ +// Theory 3: VacuouslyTrue - Axiom that IS vacuously true for empty carriers +// ============================================================================ +// +// Axiom: forall x : P. |- x R +// +// For every x, assert R(x). When P is empty, there are no x values to check, +// so the axiom is vacuously satisfied. Compare with UnconditionalExistential! + +theory VacuouslyTrue { + P : Sort; + R : P -> Prop; + + // This truly IS vacuously true for empty P because x IS used in the conclusion + ax : forall x : P. |- x R; +} + +// ============================================================================ +// Theory 4: Inconsistent - UNSAT via derivation of False +// ============================================================================ +// +// Axiom: forall x. |- false +// +// For any element x, we derive False. This is immediately UNSAT. +// The solver detects this and reports UNSAT. + +theory Inconsistent { + A : Sort; + + // Contradiction: any element leads to False + ax : forall a : A. |- false; +} + +// ============================================================================ +// Theory 5: ReflexiveRelation - Forward chaining asserts reflexive tuples +// ============================================================================ +// +// Axiom: forall x. |- R(x, x) +// +// For every element x, the pair (x, x) is in relation R. +// The solver will assert R(x, x) for each element added. + +theory ReflexiveRelation { + X : Sort; + R : [a: X, b: X] -> Prop; + + // Reflexivity: every element is related to itself + ax/refl : forall x : X. |- [a: x, b: x] R; +} + +// ============================================================================ +// Theory 6: ChainedWitness - Nested existential body processing +// ============================================================================ +// +// Axiom: forall x. |- exists y. exists z. E(x, y), E(y, z) +// +// For every x, there exist y and z such that E(x,y) and E(y,z). +// Forward chaining creates witnesses and asserts the relations. + +theory ChainedWitness { + N : Sort; + E : [src: N, tgt: N] -> Prop; + + // Chain: every node has a two-step path out + ax/chain : forall x : N. |- exists y : N. exists z : N. [src: x, tgt: y] E, [src: y, tgt: z] E; +} + +// ============================================================================ +// Theory 7: EqualityCollapse - Equation handling via congruence closure +// ============================================================================ +// +// Axiom: forall x, y. |- x = y +// +// All elements of sort X are equal. The solver adds equations to the +// congruence closure and merges equivalence classes. + +theory EqualityCollapse { + X : Sort; + + // All elements are equal + ax/all_equal : forall x : X, y : X. |- x = y; +} diff --git a/examples/geolog/sort_param_simple.geolog b/examples/geolog/sort_param_simple.geolog new file mode 100644 index 0000000..1ba3217 --- /dev/null +++ b/examples/geolog/sort_param_simple.geolog @@ -0,0 +1,31 @@ +// Simpler sort parameter test + +theory (X : Sort) Container { + elem : X; // not a Sort, but an element of X +} + +// Hmm, this doesn't quite work... +// Let me try the actual vision pattern + +theory Base { + A : Sort; + B : Sort; +} + +instance MyBase : Base = { + a1 : A; + a2 : A; + b1 : B; + b2 : B; +} + +// Now try a theory parameterized by an instance +theory (Inst : Base instance) Map { + map : Inst/A -> Inst/B; +} + +// Instance of Map parameterized by MyBase +instance MyMap : MyBase Map = { + a1 map = MyBase/b1; + a2 map = MyBase/b2; +} diff --git a/examples/geolog/todo_list.geolog b/examples/geolog/todo_list.geolog new file mode 100644 index 0000000..aa2cab9 --- /dev/null +++ b/examples/geolog/todo_list.geolog @@ -0,0 +1,44 @@ +// TodoList: A simple relational model for tracking tasks +// +// This demonstrates geolog as a persistent relational database. +// Elements represent tasks, and relations track their status. + +theory TodoList { + // The sort of todo items + Item : Sort; + + // Unary relations for item status (simple arrow syntax) + completed : Item -> Prop; + high_priority : Item -> Prop; + blocked : Item -> Prop; + + // Binary relation for dependencies + depends : [item: Item, on: Item] -> Prop; + + // Axiom: if an item depends on another, either it is blocked + // or the dependency is completed + ax/dep_blocked : forall x : Item, y : Item. + [item: x, on: y] depends |- x blocked \/ y completed; +} + +// Example: An empty todo list ready for interactive use +instance MyTodos : TodoList = { + // Start empty - add items interactively with :add +} + +// Example: A pre-populated todo list +instance SampleTodos : TodoList = { + // Items + buy_groceries : Item; + cook_dinner : Item; + do_laundry : Item; + clean_house : Item; + + // Status: unary relations use simple syntax + buy_groceries completed; + cook_dinner high_priority; + + // Dependencies: cook_dinner depends on buy_groceries + // Mixed syntax: first positional arg maps to 'item' field + [cook_dinner, on: buy_groceries] depends; +} diff --git a/examples/geolog/transitive_closure.geolog b/examples/geolog/transitive_closure.geolog new file mode 100644 index 0000000..7e76309 --- /dev/null +++ b/examples/geolog/transitive_closure.geolog @@ -0,0 +1,77 @@ +// Transitive Closure Example +// +// This example demonstrates the chase algorithm computing transitive +// closure of a relation. We define a Graph theory with Edge and Path +// relations, where Path is the transitive closure of Edge. +// +// Run with: +// cargo run -- examples/geolog/transitive_closure.geolog +// Then: +// :source examples/geolog/transitive_closure.geolog +// :inspect Chain +// :chase Chain +// +// The chase will derive Path tuples for all reachable pairs: +// - Edge(a,b), Edge(b,c), Edge(c,d) as base facts +// - Path(a,b), Path(b,c), Path(c,d) from base axiom +// - Path(a,c), Path(b,d) from one step of transitivity +// - Path(a,d) from two steps of transitivity + +theory Graph { + V : Sort; + + // Direct edges in the graph + Edge : [src: V, tgt: V] -> Prop; + + // Reachability (transitive closure of Edge) + Path : [src: V, tgt: V] -> Prop; + + // Base case: every edge is a path + ax/base : forall x, y : V. + [src: x, tgt: y] Edge |- [src: x, tgt: y] Path; + + // Inductive case: paths compose + ax/trans : forall x, y, z : V. + [src: x, tgt: y] Path, [src: y, tgt: z] Path |- [src: x, tgt: z] Path; +} + +// A linear chain: a -> b -> c -> d +// Chase derives Path tuples from Edge via ax/base and ax/trans. +instance Chain : Graph = chase { + a : V; + b : V; + c : V; + d : V; + + // Edges form a chain + [src: a, tgt: b] Edge; + [src: b, tgt: c] Edge; + [src: c, tgt: d] Edge; +} + +// A diamond: a -> b, a -> c, b -> d, c -> d +// Chase derives all reachable paths. +instance Diamond : Graph = chase { + top : V; + left : V; + right : V; + bottom : V; + + // Two paths from top to bottom + [src: top, tgt: left] Edge; + [src: top, tgt: right] Edge; + [src: left, tgt: bottom] Edge; + [src: right, tgt: bottom] Edge; +} + +// A cycle: a -> b -> c -> a +// Chase derives all reachable paths (full connectivity). +instance Cycle : Graph = chase { + x : V; + y : V; + z : V; + + [src: x, tgt: y] Edge; + [src: y, tgt: z] Edge; + [src: z, tgt: x] Edge; +} diff --git a/examples/main.rs b/examples/main.rs new file mode 100644 index 0000000..623ab5f --- /dev/null +++ b/examples/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello world!") +} diff --git a/examples/roundtrip.rs b/examples/roundtrip.rs new file mode 100644 index 0000000..08d7f06 --- /dev/null +++ b/examples/roundtrip.rs @@ -0,0 +1,216 @@ +use geolog::{parse, pretty_print}; + +fn main() { + let input = r#" +namespace VanillaPetriNets; + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +theory (N : PetriNet instance) Trace { + F : Sort; + F/of : F -> N/T; + + W : Sort; + W/src : W -> [firing : F, arc : N/out]; + W/tgt : W -> [firing : F, arc : N/in]; + + ax1 : forall w : W. |- w W/src .arc N/out/src = w W/src .firing F/of; + ax2 : forall w : W. |- w W/tgt .arc N/in/tgt = w W/tgt .firing F/of; + ax3 : forall w1, w2 : W. w1 W/src = w2 W/src |- w1 = w2; + ax4 : forall w1, w2 : W. w1 W/tgt = w2 W/tgt |- w1 = w2; + + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + input_terminal/tgt : input_terminal -> [firing : F, arc : N/in]; + output_terminal/src : output_terminal -> [firing : F, arc : N/out]; + + ax5 : forall f : F, arc : N/out. arc N/out/src = f F/of |- + (exists w : W. w W/src = [firing: f, arc: arc]) \/ + (exists o : output_terminal. o output_terminal/src = [firing: f, arc: arc]); + ax6 : forall f : F, arc : N/in. arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt = [firing: f, arc: arc]) \/ + (exists i : input_terminal. i input_terminal/tgt = [firing: f, arc: arc]); +} + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ba : T; + abc : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; + ba_in : in; + ba_in in/src = B; + ba_in in/tgt = ba; + ba_out : out; + ba_out out/src = ba; + ba_out out/tgt = A; + abc_in1 : in; + abc_in1 in/src = A; + abc_in1 in/tgt = abc; + abc_in2 : in; + abc_in2 in/src = B; + abc_in2 in/tgt = abc; + abc_out : out; + abc_out out/src = abc; + abc_out out/tgt = C; +} + +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + t token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/B; + }; +} + +query findTrace { + ? : ExampleNet Trace instance; +} +"#; + + println!("=== PARSING ORIGINAL ==="); + let ast1 = match parse(input) { + Ok(f) => f, + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + }; + println!("Parsed {} declarations", ast1.declarations.len()); + + println!("\n=== PRETTY PRINTING ==="); + let printed = pretty_print(&ast1); + println!("{}", printed); + + println!("\n=== RE-PARSING ==="); + let ast2 = match parse(&printed) { + Ok(f) => f, + Err(e) => { + eprintln!("Re-parse error: {}", e); + eprintln!("\nPrinted output was:\n{}", printed); + std::process::exit(1); + } + }; + println!("Re-parsed {} declarations", ast2.declarations.len()); + + println!("\n=== COMPARING ==="); + if ast1.declarations.len() != ast2.declarations.len() { + eprintln!("Declaration count mismatch!"); + std::process::exit(1); + } + + // Compare declaration types + for (i, (d1, d2)) in ast1 + .declarations + .iter() + .zip(ast2.declarations.iter()) + .enumerate() + { + let type1 = match &d1.node { + geolog::Declaration::Namespace(_) => "namespace", + geolog::Declaration::Theory(_) => "theory", + geolog::Declaration::Instance(_) => "instance", + geolog::Declaration::Query(_) => "query", + }; + let type2 = match &d2.node { + geolog::Declaration::Namespace(_) => "namespace", + geolog::Declaration::Theory(_) => "theory", + geolog::Declaration::Instance(_) => "instance", + geolog::Declaration::Query(_) => "query", + }; + if type1 != type2 { + eprintln!("Declaration {} type mismatch: {} vs {}", i, type1, type2); + std::process::exit(1); + } + print!(" [{}] {} ", i, type1); + + // Check names/details + match (&d1.node, &d2.node) { + (geolog::Declaration::Namespace(n1), geolog::Declaration::Namespace(n2)) => { + if n1 != n2 { + eprintln!("name mismatch: {} vs {}", n1, n2); + std::process::exit(1); + } + println!("{} ✓", n1); + } + (geolog::Declaration::Theory(t1), geolog::Declaration::Theory(t2)) => { + if t1.name != t2.name { + eprintln!("name mismatch: {} vs {}", t1.name, t2.name); + std::process::exit(1); + } + if t1.body.len() != t2.body.len() { + eprintln!( + "body length mismatch: {} vs {}", + t1.body.len(), + t2.body.len() + ); + std::process::exit(1); + } + println!("{} ({} items) ✓", t1.name, t1.body.len()); + } + (geolog::Declaration::Instance(i1), geolog::Declaration::Instance(i2)) => { + if i1.name != i2.name { + eprintln!("name mismatch: {} vs {}", i1.name, i2.name); + std::process::exit(1); + } + if i1.body.len() != i2.body.len() { + eprintln!( + "body length mismatch: {} vs {}", + i1.body.len(), + i2.body.len() + ); + std::process::exit(1); + } + println!("{} ({} items) ✓", i1.name, i1.body.len()); + } + (geolog::Declaration::Query(q1), geolog::Declaration::Query(q2)) => { + if q1.name != q2.name { + eprintln!("name mismatch: {} vs {}", q1.name, q2.name); + std::process::exit(1); + } + println!("{} ✓", q1.name); + } + _ => unreachable!(), + } + } + + println!("\n=== ROUNDTRIP SUCCESS ==="); +} diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..0ff89a4 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "geolog-fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer_sys = "0.4" + +[dependencies.geolog] +path = ".." + +# Parser fuzzer - tests lexer/parser robustness +[[bin]] +name = "fuzz_parser" +path = "fuzz_targets/fuzz_parser.rs" +test = false +doc = false +bench = false + +# REPL fuzzer - tests full execution pipeline +[[bin]] +name = "fuzz_repl" +path = "fuzz_targets/fuzz_repl.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..4633ac8 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,60 @@ +# Fuzzing geolog + +This directory contains fuzz targets for finding bugs and edge cases in geolog. + +## Requirements + +Fuzzing requires the nightly Rust compiler due to sanitizer support: + +```bash +rustup install nightly +rustup default nightly # or use +nightly flag +``` + +## Available Targets + +- **fuzz_parser**: Exercises the lexer and parser with arbitrary UTF-8 input +- **fuzz_repl**: Exercises the full REPL execution pipeline + +## Running Fuzzers + +```bash +# List all fuzz targets +cargo fuzz list + +# Run the parser fuzzer +cargo +nightly fuzz run fuzz_parser + +# Run the REPL fuzzer +cargo +nightly fuzz run fuzz_repl + +# Run with a time limit (e.g., 60 seconds) +cargo +nightly fuzz run fuzz_parser -- -max_total_time=60 + +# Run with a corpus directory +cargo +nightly fuzz run fuzz_parser corpus/fuzz_parser +``` + +## Corpus + +Interesting inputs found during fuzzing are automatically saved to `corpus//`. +These can be used to reproduce issues: + +```bash +# Reproduce a crash +cargo +nightly fuzz run fuzz_parser corpus/fuzz_parser/ +``` + +## Minimizing Crashes + +```bash +cargo +nightly fuzz tmin fuzz_parser +``` + +## Coverage + +Generate coverage reports: + +```bash +cargo +nightly fuzz coverage fuzz_parser +``` diff --git a/fuzz/fuzz_targets/fuzz_parser.rs b/fuzz/fuzz_targets/fuzz_parser.rs new file mode 100644 index 0000000..475268f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_parser.rs @@ -0,0 +1,17 @@ +//! Fuzz the geolog parser +//! +//! This target exercises the lexer and parser to find edge cases +//! and potential panics in the parsing code. + +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Try to interpret the data as UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // The parser should never panic, even on malformed input + // It should return an error instead + let _ = geolog::parse(input); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_repl.rs b/fuzz/fuzz_targets/fuzz_repl.rs new file mode 100644 index 0000000..6b78887 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_repl.rs @@ -0,0 +1,22 @@ +//! Fuzz the geolog REPL execution +//! +//! This target exercises the full REPL pipeline: parsing, elaboration, +//! and instance creation. It should never panic on any input. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use geolog::repl::ReplState; + +fuzz_target!(|data: &[u8]| { + // Try to interpret the data as UTF-8 + if let Ok(input) = std::str::from_utf8(data) { + // Create a fresh REPL state for each fuzz input + // (in-memory, no persistence) + let mut state = ReplState::new(); + + // The REPL should never panic on any input + // It should return a Result<_, String> error instead + let _ = state.execute_geolog(input); + } +}); diff --git a/proofs/.gitignore b/proofs/.gitignore new file mode 100644 index 0000000..1db7135 --- /dev/null +++ b/proofs/.gitignore @@ -0,0 +1,2 @@ +# Lake build artifacts +.lake/ diff --git a/proofs/GeologProofs.lean b/proofs/GeologProofs.lean new file mode 100644 index 0000000..e5e40a0 --- /dev/null +++ b/proofs/GeologProofs.lean @@ -0,0 +1 @@ +import GeologProofs.MonotonicSubmodel diff --git a/proofs/GeologProofs/MonotonicSubmodel.lean b/proofs/GeologProofs/MonotonicSubmodel.lean new file mode 100644 index 0000000..9d4c6d5 --- /dev/null +++ b/proofs/GeologProofs/MonotonicSubmodel.lean @@ -0,0 +1,1520 @@ +import ModelTheoryTopos.Geometric.Structure +import Mathlib.Data.Set.Basic +import Mathlib.Order.Monotone.Basic +import Mathlib.Logic.Function.Basic +import Mathlib.CategoryTheory.Types +import Mathlib.CategoryTheory.Limits.Types.Shapes +import Mathlib.CategoryTheory.Limits.Types.Images +import Mathlib.CategoryTheory.Subobject.Types +import Mathlib.CategoryTheory.Subobject.Lattice + +/-! +# Monotonic Submodel Property + +This file proves the Monotonic Submodel Property for geometric logic structures, +specialized to the category `Type u`. + +## Main Results + +- `pushforward_preserves_closure`: Function closure preserved under pushforward +- `monotonic_submodel_property`: Valid(t) ⊆ Valid(t+1) under atomic extensions + +## Technical Note + +We work with `Type u` and focus on base sorts where the interpretation +is definitionally the carrier type: `(DerivedSorts.inj A).interpret M.sorts = M.sorts A`. +-/ + +namespace MonotonicSubmodel + +open CategoryTheory Limits Signature + +universe u + +/-! +## Instance Priority Override + +The model-theory-topos library defines `OrderBot (Subobject X)` with `sorry`. +We override it with Mathlib's proper implementation for Type u, which requires +`HasInitial C` and `InitialMonoClass C`. +-/ + +-- Override model-theory-topos's sorried OrderBot with Mathlib's proper instance +attribute [instance 2000] Subobject.orderBot + +variable {S : Signature} + +/-! +## Subobjects in Type u + +In Type u, subobjects correspond to subsets via `Types.subobjectEquivSet α : Subobject α ≃o Set α`. +We work with the arrow's range as the concrete set representation. + +Key Mathlib facts we leverage: +- `Types.subobjectEquivSet` proves Subobject α ≃o Set α +- `mono_iff_injective` shows monos in Type u are injective functions +- Products in Type u are pi types: `∏ᶜ F ≅ ∀ j, F j` +- Pullbacks are subtypes: `pullback f g ≅ { p : X × Y // f p.1 = g p.2 }` +-/ + +/-! +## Transport Lemmas for DerivedSorts.interpret +-/ + +/-- For a base sort, interpretation is definitionally the carrier -/ +theorem interpret_inj (M : Structure S (Type u)) (A : S.Sorts) : + (DerivedSorts.inj A).interpret M.sorts = M.sorts A := rfl + +/-- Transport along domain equality -/ +def castDom {M : Structure S (Type u)} {f : S.Functions} {A : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) (x : M.sorts A) : + f.domain.interpret M.sorts := + cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + +/-- Transport along codomain equality -/ +def castCod {M : Structure S (Type u)} {f : S.Functions} {B : S.Sorts} + (hcod : f.codomain = DerivedSorts.inj B) (y : f.codomain.interpret M.sorts) : + M.sorts B := + cast (congrArg (DerivedSorts.interpret M.sorts) hcod) y + +/-! +## Lifting Elements Along Embeddings + +We define `liftSort'` which lifts elements of derived sorts along a family of maps +on base sorts. This is defined before `StructureEmbedding` so that the embedding +can use it in its `func_comm` field. +-/ + +/-- Lift an element of a derived sort along a family of maps on base sorts. + For base sorts: apply the map directly. + For products: apply componentwise via Types.productIso. -/ +noncomputable def liftSort' {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) : (D : DerivedSorts S.Sorts) → + D.interpret M.sorts → D.interpret M'.sorts + | .inj B => embed B + | .prod Aᵢ => fun x => + let x' := (Types.productIso _).hom x + let y' : ∀ i, (Aᵢ i).interpret M'.sorts := fun i => liftSort' embed (Aᵢ i) (x' i) + (Types.productIso _).inv y' + +/-- For base sorts, liftSort' equals embed (with casting) -/ +theorem liftSort'_inj {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) + {D : DerivedSorts S.Sorts} {A : S.Sorts} (hD : D = .inj A) + (x : D.interpret M.sorts) : + liftSort' embed D x = cast (by rw [hD]) (embed A (cast (by rw [hD]) x)) := by + subst hD + simp only [liftSort', cast_eq] + +/-! +## Subset Selection +-/ + +/-- A subset selection for base sorts of a structure in Type u -/ +structure SubsetSelection (M : Structure S (Type u)) where + subset : (A : S.Sorts) → Set (M.sorts A) + +/-! +## Function Closure +-/ + +/-- Function closure for a function with base domain and codomain -/ +def funcPreservesSubset {M : Structure S (Type u)} + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) : Prop := + ∀ x : M.sorts A, + x ∈ sel.subset A → + castCod hcod (M.Functions f (castDom hdom x)) ∈ sel.subset B + +/-! +## Structure Embeddings +-/ + +/-- An embedding of structures. + Functions must commute with the embedding on ALL derived sorts (not just base sorts). + This is the correct premise for the Monotonic Submodel Property. -/ +structure StructureEmbedding (M M' : Structure S (Type u)) where + /-- The carrier maps on base sorts -/ + embed : ∀ A, M.sorts A → M'.sorts A + /-- Embeddings are injective -/ + embed_inj : ∀ A, Function.Injective (embed A) + /-- Functions commute with embedding (for ALL functions, regardless of domain/codomain sort) -/ + func_comm : ∀ (f : S.Functions) (x : f.domain.interpret M.sorts), + liftSort' embed f.codomain (M.Functions f x) = + M'.Functions f (liftSort' embed f.domain x) + +/-- Helper: liftSort' on .inj sorts equals embed -/ +theorem liftSort'_inj_eq {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) (A : S.Sorts) (x : M.sorts A) : + liftSort' embed (.inj A) x = embed A x := rfl + +/-- liftSort' on a derived sort equal to .inj A with explicit cast handling -/ +theorem liftSort'_inj_cast {M M' : Structure S (Type u)} + (embed : ∀ A, M.sorts A → M'.sorts A) {D : DerivedSorts S.Sorts} {A : S.Sorts} + (h : D = .inj A) (x : D.interpret M.sorts) : + liftSort' embed D x = + cast (congrArg (DerivedSorts.interpret M'.sorts) h.symm) + (embed A (cast (congrArg (DerivedSorts.interpret M.sorts) h) x)) := by + subst h + rfl + +/-- For base-sorted functions, the embedding commutes in a simpler form. + This extracts the base-sort case from the general func_comm. -/ +theorem StructureEmbedding.func_comm_base {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (x : M.sorts A) : + emb.embed B (castCod hcod (M.Functions f (castDom hdom x))) = + castCod hcod (M'.Functions f (castDom hdom (emb.embed A x))) := by + -- Unfold the cast helpers + simp only [castDom, castCod] + -- Get func_comm instance + have hfc := emb.func_comm f (cast (congrArg (DerivedSorts.interpret M.sorts) hdom.symm) x) + -- Rewrite liftSort' using the helper lemmas + rw [liftSort'_inj_cast emb.embed hcod, liftSort'_inj_cast emb.embed hdom] at hfc + -- Now simplify the casts in hfc + simp only [cast_cast, cast_eq] at hfc + -- hfc : cast hcod.symm' a = b where we want a = cast hcod' b + -- Apply cast hcod' to both sides of hfc + have hfc' := congrArg (cast (congrArg (DerivedSorts.interpret M'.sorts) hcod)) hfc + simp only [cast_cast, cast_eq] at hfc' ⊢ + exact hfc' + +/-! +## Pushforward of Subset Selections +-/ + +/-- Push forward a subset selection along an embedding -/ +def SubsetSelection.pushforward {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (sel : SubsetSelection M) : SubsetSelection M' where + subset A := emb.embed A '' sel.subset A + +/-- **Key Lemma**: Function closure is preserved by pushforward -/ +theorem pushforward_preserves_closure {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (hclosed : funcPreservesSubset sel f hdom hcod) : + funcPreservesSubset (sel.pushforward emb) f hdom hcod := by + intro x' hx' + -- x' is in the image of sel.subset A + simp only [SubsetSelection.pushforward, Set.mem_image] at hx' ⊢ + obtain ⟨x, hx_mem, hx_eq⟩ := hx' + -- Apply function closure in M + have hout := hclosed x hx_mem + -- The output is in sel.subset B + refine ⟨castCod hcod (M.Functions f (castDom hdom x)), hout, ?_⟩ + -- Use the base-sorted func_comm helper + have hfc := emb.func_comm_base f hdom hcod x + -- hfc : emb.embed B (castCod hcod (M.Functions f (castDom hdom x))) = + -- castCod hcod (M'.Functions f (castDom hdom (emb.embed A x))) + rw [hfc, ← hx_eq] + +/-! +## Main Theorem +-/ + +/-- +**Main Theorem (Monotonic Submodel Property)** + +For base-sorted functions, the pushforward of a function-closed subset +selection along an embedding is also function-closed. + +This is stated per-function; the full property follows by applying to all functions. +-/ +theorem monotonic_submodel_property {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) + (hclosed : funcPreservesSubset sel f hdom hcod) : + funcPreservesSubset (sel.pushforward emb) f hdom hcod := + pushforward_preserves_closure emb sel f hdom hcod hclosed + +/-! +## Closed Subset Selections +-/ + +/-- A subset selection is fully closed if it's closed under all base-sorted functions -/ +structure ClosedSubsetSelection (M : Structure S (Type u)) extends SubsetSelection M where + /-- Function closure for all base-sorted functions -/ + func_closed : ∀ (f : S.Functions) {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B), + funcPreservesSubset toSubsetSelection f hdom hcod + +/-- +**Semantic Monotonicity**: If sel is a closed subset selection in M, +and emb : M → M' is an embedding, then sel.pushforward emb is also closed in M'. + +This is the semantic content of the CALM theorem's monotonicity condition: +extending a structure by adding elements preserves the validity of existing submodels. +-/ +theorem semantic_monotonicity {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : ClosedSubsetSelection M) + (f : S.Functions) + {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B) : + funcPreservesSubset (sel.toSubsetSelection.pushforward emb) f hdom hcod := + pushforward_preserves_closure emb sel.toSubsetSelection f hdom hcod (sel.func_closed f hdom hcod) + +/-- The pushforward of a closed selection is closed -/ +def ClosedSubsetSelection.pushforward {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (sel : ClosedSubsetSelection M) : ClosedSubsetSelection M' where + toSubsetSelection := sel.toSubsetSelection.pushforward emb + func_closed f {_A} {_B} hdom hcod := semantic_monotonicity emb sel f hdom hcod + +/-! +## Relation Preservation +-/ + +/-- Transport for relation domains -/ +def castRelDom {M : Structure S (Type u)} {R : S.Relations} {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A) : + R.domain.interpret M.sorts := + cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + +/-! +In Type u, a `Subobject X` represents a monomorphism into X, which +corresponds to a subset of X. An element x : X is "in" the subobject +iff x is in the range of the representing monomorphism (the arrow). +-/ + +/-- Membership in a subobject (in Type u): x is in the range of the arrow -/ +def subobjectMem {X : Type u} (S : Subobject X) (x : X) : Prop := + x ∈ Set.range S.arrow + +/-- Relation membership for base-sorted relations -/ +def relMem {M : Structure S (Type u)} (R : S.Relations) {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A) : Prop := + subobjectMem (M.Relations R) (castRelDom hdom x) + +/-- A structure embedding that also preserves relations. + Relation preservation is stated for ALL derived sort domains, not just base sorts, + since geometric relations can have product domains (e.g., binary relations). -/ +structure RelPreservingEmbedding (M M' : Structure S (Type u)) extends StructureEmbedding M M' where + /-- Relations are preserved: if x ∈ R in M, then liftSort'(x) ∈ R in M' -/ + rel_preserve : ∀ (R : S.Relations) (x : R.domain.interpret M.sorts), + subobjectMem (M.Relations R) x → + subobjectMem (M'.Relations R) (liftSort' embed R.domain x) + +/-- +A **conservative expansion** is an embedding where: +1. Relations are preserved (forward): R(x) in M → R(emb(x)) in M' +2. Relations are reflected (backward): R(emb(x)) in M' → R(x) in M + +The reflection condition captures "only adding relation tuples concerning new elements": +if a relation holds on lifted old elements in M', it must have already held in M. + +With both directions, formula satisfaction becomes an IFF for old tuples, +which is the key to proving that old submodels remain valid models. +-/ +structure ConservativeExpansion (M M' : Structure S (Type u)) extends RelPreservingEmbedding M M' where + /-- Relations are reflected: if R(emb(x)) in M', then R(x) in M + (no new relation tuples added on old elements) -/ + rel_reflect : ∀ (R : S.Relations) (x : R.domain.interpret M.sorts), + subobjectMem (M'.Relations R) (liftSort' embed R.domain x) → + subobjectMem (M.Relations R) x + +/-- Relation membership is an IFF for conservative expansions -/ +theorem rel_preserve_iff {M M' : Structure S (Type u)} + (emb : ConservativeExpansion M M') + (R : S.Relations) (x : R.domain.interpret M.sorts) : + subobjectMem (M.Relations R) x ↔ + subobjectMem (M'.Relations R) (liftSort' emb.embed R.domain x) := + ⟨emb.rel_preserve R x, emb.rel_reflect R x⟩ + +/-! +### Subset Selection with Relation Closure + +A subset selection is "relation-closed" if whenever x is in the selection +and x is in relation R, then x satisfies the "domain requirement" for R. +For geometric logic, this isn't quite the right notion since relations can +have product domains. However, for base-sorted relations it's straightforward. +-/ + +/-- A closed selection respects relations: elements in relations stay in the selection -/ +structure FullyClosedSelection (M : Structure S (Type u)) extends ClosedSubsetSelection M where + /-- For base-sorted relations, if x ∈ R and x ∈ sel, the membership is consistent -/ + rel_closed : ∀ (R : S.Relations) {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) (x : M.sorts A), + relMem R hdom x → x ∈ subset A + +/-- Elements in the selection get pushed forward -/ +theorem selection_pushforward_mem {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') + (sel : SubsetSelection M) + {A : S.Sorts} + (x : M.sorts A) + (hsel : x ∈ sel.subset A) : + emb.embed A x ∈ (sel.pushforward emb).subset A := by + simp only [SubsetSelection.pushforward, Set.mem_image] + exact ⟨x, hsel, rfl⟩ + +/-- Relation membership transfers across embeddings (base-sorted version). + This is a corollary of the general `rel_preserve` for relations with base sort domains. -/ +theorem rel_mem_transfer {M M' : Structure S (Type u)} + (emb : RelPreservingEmbedding M M') + (R : S.Relations) + {A : S.Sorts} + (hdom : R.domain = DerivedSorts.inj A) + (x : M.sorts A) + (hrel : relMem (M := M) R hdom x) : + relMem (M := M') R hdom (emb.embed A x) := by + simp only [relMem, castRelDom, subobjectMem] at hrel ⊢ + -- Convert hrel to subobjectMem form for the general rel_preserve + let x' : R.domain.interpret M.sorts := cast (congrArg (DerivedSorts.interpret M.sorts) hdom).symm x + have hrel' : subobjectMem (M.Relations R) x' := by convert hrel + have h := emb.rel_preserve R x' hrel' + -- h : subobjectMem (M'.Relations R) (liftSort' emb.embed R.domain x') + -- Use liftSort'_inj_cast to handle the equation + rw [liftSort'_inj_cast emb.embed hdom] at h + simp only [cast_cast, cast_eq, x'] at h + convert h using 2 + +/-! +## Connection to Theory Satisfaction + +The key insight connecting our structural results to `Theory.interpret`. +-/ + +/-! +### Formula Satisfaction via Subobjects + +In `Type u`, formula interpretation gives a subobject, which is essentially +a subset. An element (or tuple) satisfies a formula iff it's in that subset. + +**Key Mathlib lemmas for Type u:** +- `Types.subobjectEquivSet α : Subobject α ≃o Set α` - subobjects = sets +- In this order iso, `⊤ ↦ Set.univ` and `⊥ ↦ ∅` +- Product of subobjects ↦ intersection of sets +- Coproduct of subobjects ↦ union of sets +-/ + +/-- An element is in the formula's interpretation (Type u specific) -/ +def formulaSatisfied {M : Structure S (Type u)} [κ : SmallUniverse S] [G : Geometric κ (Type u)] + {xs : Context S} (φ : Formula xs) (t : Context.interpret M xs) : Prop := + subobjectMem (Formula.interpret M φ) t + +/-! +### Lifting Embeddings to Contexts + +An embedding on sorts lifts to an embedding on context interpretations. +In Type u, this is straightforward because: +- `Context.interpret M xs` is the categorical product `∏ᶜ (fun i => ⟦M | xs.nth i⟧ᵈ)` +- By `Types.productIso`, this is isomorphic to `∀ i, M.sorts (xs.nth i).underlying` +- The lift applies the embedding componentwise + +**Justification:** In Type u, products are pi types (`Types.productIso : ∏ᶜ F ≅ ∀ j, F j`), +so lifting is just `fun ctx i => emb.embed _ (ctx i)` modulo the isomorphism. +-/ + +/-- Types.productIso.hom extracts component j when applied at j. + Uses Mathlib's Types.productIso_hom_comp_eval. -/ +lemma Types_productIso_hom_apply {J : Type v} (f : J → Type (max v u)) (x : ∏ᶜ f) (j : J) : + (Types.productIso f).hom x j = Pi.π f j x := by + have h := Types.productIso_hom_comp_eval f j + exact congrFun h x + +/-- Types.productIso.inv satisfies projection identity. + Uses Mathlib's Types.productIso_inv_comp_π. -/ +lemma Types_productIso_inv_apply {J : Type v} (f : J → Type (max v u)) (g : (j : J) → f j) (j : J) : + Pi.π f j ((Types.productIso f).inv g) = g j := by + have h := Types.productIso_inv_comp_π f j + exact congrFun h g + +/-- Lift an element of a derived sort along an embedding. + For base sorts: just the embedding. + For products: apply componentwise via Types.productIso. -/ +noncomputable def liftSort {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') : (A : DerivedSorts S.Sorts) → + A.interpret M.sorts → A.interpret M'.sorts + | .inj B => emb.embed B + | .prod Aᵢ => fun x => + let x' := (Types.productIso _).hom x + let y' : ∀ i, (Aᵢ i).interpret M'.sorts := fun i => liftSort emb (Aᵢ i) (x' i) + (Types.productIso _).inv y' + +/-- liftSort equals liftSort' applied to the embedding -/ +theorem liftSort_eq_liftSort' {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (D : DerivedSorts S.Sorts) (x : D.interpret M.sorts) : + liftSort emb D x = liftSort' emb.embed D x := by + induction D with + | inj B => rfl + | prod Aᵢ ih => + simp only [liftSort, liftSort'] + -- Both sides are productIso.inv applied to a function. + -- We need to show the functions are equal. + -- Goal: productIso.inv (fun i => liftSort ...) = productIso.inv (fun i => liftSort' ...) + -- This follows by congruence if the functions are equal + have heq : (fun i => liftSort emb (Aᵢ i) ((Types.productIso _).hom x i)) = + (fun i => liftSort' emb.embed (Aᵢ i) ((Types.productIso _).hom x i)) := by + funext i + exact ih i _ + simp only [heq] + +/-- liftSort is injective for any derived sort. + For base sorts, this is just embed_inj. + For products, this follows from componentwise injectivity. -/ +theorem liftSort_injective {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (D : DerivedSorts S.Sorts) : + Function.Injective (liftSort emb D) := by + induction D with + | inj B => + -- For base sorts, liftSort = embed, which is injective by embed_inj + exact emb.embed_inj B + | prod Aᵢ ih => + -- For products, show componentwise injectivity implies total injectivity + intro x y hxy + -- liftSort emb (.prod Aᵢ) x = productIso.inv (fun i => liftSort emb (Aᵢ i) (productIso.hom x i)) + simp only [liftSort] at hxy + -- hxy : productIso.inv (fun i => ...) = productIso.inv (fun i' => ...) + -- productIso is an isomorphism, so its inv is injective (via hom ∘ inv = id) + let iso_M' := Types.productIso (fun j => (Aᵢ j).interpret M'.sorts) + -- In Types, hom ≫ inv = 𝟙 gives hom (inv x) = x + have hinv_li : Function.LeftInverse iso_M'.hom iso_M'.inv := fun a => by + have h := congrFun (iso_M'.inv_hom_id) a + simp only [types_comp_apply, types_id_apply] at h + exact h + have hinv_inj : Function.Injective iso_M'.inv := hinv_li.injective + have h := hinv_inj hxy + -- h : (fun i => liftSort emb (Aᵢ i) (productIso.hom x i)) = + -- (fun i => liftSort emb (Aᵢ i) (productIso.hom y i)) + -- Extract componentwise and use ih + have hcomp : ∀ i, (Types.productIso _).hom x i = (Types.productIso _).hom y i := by + intro i + have hi := congrFun h i + exact ih i hi + -- Reconstruct equality of x and y + have hxy' : (Types.productIso _).hom x = (Types.productIso _).hom y := funext hcomp + let iso_M := Types.productIso (fun j => (Aᵢ j).interpret M.sorts) + have hhom_li : Function.LeftInverse iso_M.inv iso_M.hom := fun a => by + have h := congrFun (iso_M.hom_inv_id) a + simp only [types_comp_apply, types_id_apply] at h + exact h + have hhom_inj : Function.Injective iso_M.hom := hhom_li.injective + exact hhom_inj hxy' + +/-- Lift an embedding to context interpretations (componentwise application) -/ +noncomputable def liftEmbedContext {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') (xs : Context S) : + Context.interpret M xs → Context.interpret M' xs := fun ctx => + let ctx' := (Types.productIso _).hom ctx + let liftedCtx' : ∀ i, (xs.nth i).interpret M'.sorts := + fun i => liftSort emb (xs.nth i) (ctx' i) + (Types.productIso _).inv liftedCtx' + +/-- Generalized relation preservation for arbitrary derived sort domains. + This is the version needed for formula satisfaction monotonicity. + Follows from RelPreservingEmbedding.rel_preserve via liftSort_eq_liftSort'. -/ +theorem rel_preserve_general {M M' : Structure S (Type u)} + (emb : RelPreservingEmbedding M M') + (R : S.Relations) (x : R.domain.interpret M.sorts) : + subobjectMem (M.Relations R) x → + subobjectMem (M'.Relations R) (liftSort emb.toStructureEmbedding R.domain x) := by + intro hmem + rw [liftSort_eq_liftSort'] + exact emb.rel_preserve R x hmem + +/-! +### Formula Monotonicity + +For geometric formulas, satisfaction transfers across relation-preserving embeddings. +The proof outline by formula case: + +| Formula | Interpretation | Why monotone | +|---------|---------------|--------------| +| `rel R t` | `pullback ⟦t⟧ᵗ (M.Relations R)` | rel_preserve + pullback naturality | +| `true` | `⊤` | Always satisfied | +| `false` | `⊥` | Never satisfied (vacuous) | +| `φ ∧ ψ` | `φ.interpret ⨯ ψ.interpret` | IH on both components | +| `t₁ = t₂` | `equalizerSubobject ⟦t₁⟧ᵗ ⟦t₂⟧ᵗ` | Embedding injectivity | +| `∃x.φ` | `(exists π).obj φ.interpret` | Witness transfers via emb | +| `⋁ᵢφᵢ` | `∐ᵢ φᵢ.interpret` | Satisfied disjunct transfers | + +Each case uses specific Mathlib lemmas about Type u: +- `true/false`: `Types.subobjectEquivSet` sends ⊤ to univ, ⊥ to ∅ +- `conj`: Product of subobjects = intersection via order iso +- `eq`: Equalizer in Type u = `{x | f x = g x}` (Types.equalizer_eq_kernel) +- `exists`: Image in Type u = `Set.range f` +- `infdisj`: Coproduct = union +-/ + +/-- Term interpretation commutes with embedding via liftSort. + Proof by induction on term structure. -/ +theorem term_interpret_commutes {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : StructureEmbedding M M') + {xs : Context S} {A : DerivedSorts S.Sorts} + (t : Term xs A) (ctx : Context.interpret M xs) : + Term.interpret M' t (liftEmbedContext emb xs ctx) = + liftSort emb A (Term.interpret M t ctx) := by + -- Induction on term structure. + -- Each case requires careful handling of Types.productIso and eqToHom casts. + -- The key insights: + -- - var: liftEmbedContext applies liftSort componentwise, extraction via Pi.π matches + -- - func: follows from func_comm generalized to derived sorts + -- - pair: componentwise by IH, using productIso injectivity + -- - proj: IH plus extraction from liftSort on products + induction t with + | var v => + -- Term.interpret for var v is: Pi.π _ v ≫ eqToHom _ + -- In Type u, eqToHom is identity when proving xs.nth v = xs.nth v (rfl) + simp only [Term.interpret, types_comp_apply, eqToHom_refl, types_id_apply] + -- Goal: Pi.π _ v (liftEmbedContext emb xs ctx) = liftSort emb _ (Pi.π _ v ctx) + -- + -- liftEmbedContext applies liftSort componentwise via Types.productIso: + -- liftEmbedContext ctx = productIso.inv (fun i => liftSort (productIso.hom ctx i)) + -- Extracting component v via Pi.π gives the v-th component of the function. + -- + -- Define the relevant functions with explicit types + let f_M := fun i : Fin xs.length => (xs.nth i).interpret M.sorts + let f_M' := fun i : Fin xs.length => (xs.nth i).interpret M'.sorts + -- The lifted function + let g : (i : Fin xs.length) → f_M' i := + fun i => liftSort emb (xs.nth i) ((Types.productIso f_M).hom ctx i) + -- liftEmbedContext is productIso.inv applied to g + have h1 : liftEmbedContext emb xs ctx = (Types.productIso f_M').inv g := rfl + rw [h1] + -- Apply Types_productIso_inv_apply: Pi.π f_M' v (productIso.inv g) = g v + rw [Types_productIso_inv_apply f_M' g v] + -- Now goal: g v = liftSort emb (xs.nth v) (Pi.π f_M v ctx) + -- g v = liftSort emb (xs.nth v) ((Types.productIso f_M).hom ctx v) + -- So we need: (Types.productIso f_M).hom ctx v = Pi.π f_M v ctx + have h2 : (Types.productIso f_M).hom ctx v = Pi.π f_M v ctx := + Types_productIso_hom_apply f_M ctx v + simp only [g, h2] + -- Goal should now be: liftSort emb (xs.nth v) (Pi.π f_M v ctx) = liftSort emb _ (Pi.π _ v ctx) + -- This is definitionally true since f_M i = (xs.nth i).interpret M.sorts + rfl + | func f t' ih => + -- Function application: (func f t').interpret M ctx = t'.interpret M ctx ≫ M.Functions f + -- In Type u, composition is just function application. + simp only [Term.interpret, types_comp_apply] + -- Goal: M'.Functions f (t'.interpret M' (liftEmbedContext emb xs ctx)) = + -- liftSort emb f.codomain (M.Functions f (t'.interpret M ctx)) + -- By IH: t'.interpret M' (liftEmbedContext emb xs ctx) = liftSort emb f.domain (t'.interpret M ctx) + rw [ih] + -- Goal: M'.Functions f (liftSort emb f.domain (t'.interpret M ctx)) = + -- liftSort emb f.codomain (M.Functions f (t'.interpret M ctx)) + -- This is exactly func_comm (with sides swapped) + -- func_comm : liftSort' embed f.codomain (M.Functions f x) = M'.Functions f (liftSort' embed f.domain x) + -- liftSort emb = liftSort' emb.embed (we need a lemma for this or unfold) + have hfc := emb.func_comm f (t'.interpret M ctx) + -- hfc : liftSort' emb.embed f.codomain (M.Functions f _) = M'.Functions f (liftSort' emb.embed f.domain _) + -- We need: M'.Functions f (liftSort emb f.domain _) = liftSort emb f.codomain (M.Functions f _) + -- which is hfc.symm after showing liftSort emb = liftSort' emb.embed + rw [liftSort_eq_liftSort' emb f.domain, liftSort_eq_liftSort' emb f.codomain] + exact hfc.symm + | @pair n Aᵢ tᵢ ih => + -- Pair builds a product from component interpretations. + -- Both sides are elements of the product type. Show equal componentwise. + simp only [Term.interpret] + -- Use that Types.productIso is an isomorphism to transfer to component equality + let f_M := fun j : Fin n => (Aᵢ j).interpret M.sorts + let f_M' := fun j : Fin n => (Aᵢ j).interpret M'.sorts + let lhs := Pi.lift (fun i => (tᵢ i).interpret M') (liftEmbedContext emb xs ctx) + let rhs := liftSort emb (.prod Aᵢ) (Pi.lift (fun i => (tᵢ i).interpret M) ctx) + -- Show lhs and rhs are equal by applying Types.productIso.hom and using funext + suffices h : (Types.productIso f_M').hom lhs = (Types.productIso f_M').hom rhs by + have hinj := (Types.productIso f_M').toEquiv.injective + exact hinj h + funext j + simp only [Types_productIso_hom_apply, Types.pi_lift_π_apply, lhs] + -- Goal: (tᵢ j).interpret M' (liftEmbedContext emb xs ctx) = (Types.productIso f_M').hom rhs j + rw [ih j] + -- RHS + simp only [rhs] + let x := Pi.lift (fun i => (tᵢ i).interpret M) ctx + let g : (j : Fin n) → f_M' j := fun j => liftSort emb (Aᵢ j) ((Types.productIso f_M).hom x j) + have h1 : liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g := rfl + rw [h1, Types_productIso_inv_apply f_M' g j] + simp only [g, Types_productIso_hom_apply, x, Types.pi_lift_π_apply] + | @proj n Aᵢ t' i ih => + -- Projection extracts the i-th component from a product. + -- Term.interpret M (proj t' i) = t'.interpret M ≫ Pi.π _ i + simp only [Term.interpret, types_comp_apply] + -- Goal: Pi.π _ i (t'.interpret M' (liftEmbedContext emb xs ctx)) = + -- liftSort emb (Aᵢ i) (Pi.π _ i (t'.interpret M ctx)) + -- By IH: t'.interpret M' (liftEmbedContext emb xs ctx) = liftSort emb (.prod Aᵢ) (t'.interpret M ctx) + rw [ih] + -- Goal: Pi.π _ i (liftSort emb (.prod Aᵢ) (t'.interpret M ctx)) = + -- liftSort emb (Aᵢ i) (Pi.π _ i (t'.interpret M ctx)) + -- This is "liftSort distributes over projection" + -- By definition, liftSort emb (.prod Aᵢ) x = productIso.inv (fun j => liftSort emb (Aᵢ j) (productIso.hom x j)) + let x := Term.interpret M t' ctx + let f_M := fun j : Fin n => (Aᵢ j).interpret M.sorts + let f_M' := fun j : Fin n => (Aᵢ j).interpret M'.sorts + let g : (j : Fin n) → f_M' j := fun j => liftSort emb (Aᵢ j) ((Types.productIso f_M).hom x j) + -- liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g + have h1 : liftSort emb (.prod Aᵢ) x = (Types.productIso f_M').inv g := rfl + rw [h1] + -- Apply Types_productIso_inv_apply: Pi.π f_M' i (productIso.inv g) = g i + rw [Types_productIso_inv_apply f_M' g i] + -- Goal: g i = liftSort emb (Aᵢ i) (Pi.π f_M i x) + -- g i = liftSort emb (Aᵢ i) ((Types.productIso f_M).hom x i) + have h2 : (Types.productIso f_M).hom x i = Pi.π f_M i x := + Types_productIso_hom_apply f_M x i + simp only [g, h2] + rfl + +/-- Context morphism interpretation commutes with liftEmbedContext. + This is the context morphism analogue of term_interpret_commutes. + For a context morphism σ : ys ⟶ xs, we have: + liftEmbedContext xs (σ.interpret M ctx) = σ.interpret M' (liftEmbedContext ys ctx) -/ +theorem hom_interpret_commutes {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : StructureEmbedding M M') + {ys xs : Context S} + (σ : ys ⟶ xs) (ctx : Context.interpret M ys) : + liftEmbedContext emb xs (Context.Hom.interpret M σ ctx) = + Context.Hom.interpret M' σ (liftEmbedContext emb ys ctx) := by + -- σ.interpret = Pi.lift (fun i => (σ i).interpret) + -- Both sides are built from Pi.lift; compare componentwise + simp only [Context.Hom.interpret] + -- Goal: liftEmbedContext xs (Pi.lift (fun i => (σ i).interpret M) ctx) = + -- Pi.lift (fun i => (σ i).interpret M') (liftEmbedContext ys ctx) + -- Use Types.productIso to extract components + let f_M := fun i : Fin xs.length => (xs.nth i).interpret M.sorts + let f_M' := fun i : Fin xs.length => (xs.nth i).interpret M'.sorts + apply (Types.productIso f_M').toEquiv.injective + funext i + -- Compare components: apply productIso.hom and extract i-th component + simp only [Iso.toEquiv_fun] + rw [Types_productIso_hom_apply f_M', Types_productIso_hom_apply f_M'] + -- RHS: Pi.π f_M' i (Pi.lift (fun i => (σ i).interpret M') (liftEmbedContext ys ctx)) + -- = (σ i).interpret M' (liftEmbedContext ys ctx) + rw [Types.pi_lift_π_apply] + -- LHS: Pi.π f_M' i (liftEmbedContext xs (Pi.lift (fun i => (σ i).interpret M) ctx)) + -- By definition of liftEmbedContext + simp only [liftEmbedContext] + rw [Types_productIso_inv_apply f_M', Types_productIso_hom_apply f_M] + -- LHS: liftSort emb (xs.nth i) (Pi.π f_M i (Pi.lift (fun i => (σ i).interpret M) ctx)) + rw [Types.pi_lift_π_apply] + -- LHS: liftSort emb (xs.nth i) ((σ i).interpret M ctx) + -- RHS: (σ i).interpret M' (liftEmbedContext ys ctx) + -- By term_interpret_commutes + exact (term_interpret_commutes emb (σ i) ctx).symm + +/-! +**Formula Satisfaction Monotonicity** + +Geometric formula satisfaction is preserved by relation-preserving embeddings. +This is the semantic justification for the CALM theorem: valid queries +remain valid as the database grows. + +The proof structure is complete; each case requires unpacking the categorical +definitions using Type u specific lemmas from Mathlib. +-/ + +/-- In Type u, morphisms from initial objects are monomorphisms (vacuously injective) -/ +instance : InitialMonoClass (Type u) where + isInitial_mono_from {I} X hI := by + -- hI : IsInitial I means I is empty (in Type u) + -- So any morphism from I is injective (vacuously) + rw [mono_iff_injective] + intro a b _ + -- I is empty: there's a map to PEmpty, so I must be empty + have hemp : IsEmpty I := ⟨fun x => PEmpty.elim (hI.to PEmpty.{u+1} x)⟩ + exact hemp.elim a + +/-- ⊤.arrow is surjective in Type u (since it's an iso, and isos are bijections) -/ +theorem top_arrow_surjective {X : Type u} : Function.Surjective (⊤ : Subobject X).arrow := by + haveI : IsIso (⊤ : Subobject X).arrow := Subobject.isIso_top_arrow + exact ((isIso_iff_bijective (⊤ : Subobject X).arrow).mp inferInstance).2 + +/-- ⊥.underlying is empty in Type u. + With Mathlib's OrderBot (via instance priority override), this follows from botCoeIsoInitial. -/ +theorem bot_underlying_isEmpty {X : Type u} : IsEmpty ((⊥ : Subobject X) : Type u) := by + have h1 : (Subobject.underlying.obj (⊥ : Subobject X)) ≅ ⊥_ (Type u) := Subobject.botCoeIsoInitial + have h2 : ⊥_ (Type u) ≅ PEmpty := Types.initialIso + exact ⟨fun y => PEmpty.elim ((h1 ≪≫ h2).hom y)⟩ + +/-- The set corresponding to a subobject under Types.subobjectEquivSet is the range of its arrow. + This is essentially by definition since both go through the representative. -/ +theorem subobject_equiv_eq_range {X : Type u} (f : Subobject X) : + (Types.subobjectEquivSet X) f = Set.range f.arrow := by + simp only [Types.subobjectEquivSet] + rfl + +/-- Types.equalizerIso.inv sends ⟨x, heq⟩ to the element of equalizer that ι maps to x. -/ +lemma types_equalizerIso_inv_ι {X Y : Type u} (f g : X ⟶ Y) (x_sub : { x : X // f x = g x }) : + equalizer.ι f g ((Types.equalizerIso f g).inv x_sub) = x_sub.val := by + have h := limit.isoLimitCone_inv_π (F := parallelPair f g) Types.equalizerLimit WalkingParallelPair.zero + simp only [Types.equalizerIso, parallelPair_obj_zero, limit.π] at h ⊢ + exact congrFun h x_sub + +/-- In Type u, x ∈ range (equalizerSubobject f g).arrow iff f x = g x. -/ +theorem equalizer_range_iff {X Y : Type u} (f g : X ⟶ Y) (x : X) : + x ∈ Set.range (equalizerSubobject f g).arrow ↔ f x = g x := by + simp only [equalizerSubobject] + constructor + · intro ⟨z, hz⟩ + let z' := (Subobject.underlyingIso (equalizer.ι f g)).hom z + have hz' : equalizer.ι f g z' = x := by + have h := Subobject.underlyingIso_hom_comp_eq_mk (equalizer.ι f g) + simp only [← h, types_comp_apply] at hz + exact hz + have hcond := equalizer.condition f g + have h1 : (equalizer.ι f g ≫ f) z' = (equalizer.ι f g ≫ g) z' := by rw [hcond] + simp only [types_comp_apply, hz'] at h1 + exact h1 + · intro heq + let x_sub : { y : X // f y = g y } := ⟨x, heq⟩ + let z_eq : equalizer f g := (Types.equalizerIso f g).inv x_sub + let z := (Subobject.underlyingIso (equalizer.ι f g)).inv z_eq + use z + have h := Subobject.underlyingIso_hom_comp_eq_mk (equalizer.ι f g) + calc (Subobject.mk (equalizer.ι f g)).arrow z + = ((Subobject.underlyingIso (equalizer.ι f g)).hom ≫ equalizer.ι f g) + ((Subobject.underlyingIso (equalizer.ι f g)).inv z_eq) := by rw [h] + _ = equalizer.ι f g ((Subobject.underlyingIso (equalizer.ι f g)).hom + ((Subobject.underlyingIso (equalizer.ι f g)).inv z_eq)) := rfl + _ = equalizer.ι f g z_eq := by simp + _ = x_sub.val := types_equalizerIso_inv_ι f g x_sub + _ = x := rfl + +/-- In Type u, x ∈ range (f ⊓ g).arrow iff x is in range of both f.arrow and g.arrow. + This uses that Types.subobjectEquivSet is an order isomorphism, so it preserves inf. + In Set, inf is intersection, so x ∈ (f ⊓ g) ↔ x ∈ f ∧ x ∈ g. -/ +theorem inf_range_iff {X : Type u} (f g : Subobject X) (x : X) : + x ∈ Set.range (f ⊓ g).arrow ↔ x ∈ Set.range f.arrow ∧ x ∈ Set.range g.arrow := by + -- Use the order isomorphism Types.subobjectEquivSet : Subobject X ≃o Set X + let iso := Types.subobjectEquivSet X + -- Translate membership using subobject_equiv_eq_range + rw [← subobject_equiv_eq_range (f ⊓ g)] + rw [← subobject_equiv_eq_range f] + rw [← subobject_equiv_eq_range g] + -- Now use that the order iso preserves inf + have h : iso (f ⊓ g) = iso f ⊓ iso g := iso.map_inf f g + -- Goal: x ∈ iso (f ⊓ g) ↔ x ∈ iso f ∧ x ∈ iso g + show x ∈ iso (f ⊓ g) ↔ x ∈ iso f ∧ x ∈ iso g + rw [h] + -- In Set X, ⊓ = ∩, so membership is conjunction + rfl + +/-- In Type u, pullback.snd has range equal to preimage. + For pullback g f where g : Z → Y and f : X → Y, + range(pullback.snd) = { x | ∃ z, g z = f x } = f⁻¹(range g). -/ +lemma pullback_snd_range {X Y Z : Type u} (g : Z ⟶ Y) (f : X ⟶ Y) (x : X) : + x ∈ Set.range (pullback.snd g f) ↔ f x ∈ Set.range g := by + constructor + · intro ⟨z, hz⟩ + let z' := (Types.pullbackIsoPullback g f).hom z + have hcond : g z'.val.1 = f z'.val.2 := z'.property + have hsnd : z'.val.2 = x := by + have h2 := congrFun (limit.isoLimitCone_hom_π (Types.pullbackLimitCone g f) WalkingCospan.right) z + simp only [Types.pullbackLimitCone, limit.π] at h2 + rw [← hz] + exact h2.symm + use z'.val.1 + rw [← hsnd, hcond] + · intro ⟨z, hz⟩ + let p : Types.PullbackObj g f := ⟨(z, x), hz⟩ + let z' := (Types.pullbackIsoPullback g f).inv p + use z' + have h := limit.isoLimitCone_inv_π (Types.pullbackLimitCone g f) WalkingCospan.right + exact congrFun h p + +/-- For isomorphic MonoOvers, their arrows have the same range. + This is because an iso in MonoOver X means the underlying morphism + commutes with the arrows (as Over morphisms). -/ +lemma monoover_iso_same_range {X : Type u} (A B : MonoOver X) (h : A ≅ B) : + Set.range A.arrow = Set.range B.arrow := by + have hcomm : h.hom.left ≫ B.arrow = A.arrow := Over.w h.hom + have hcomm' : h.inv.left ≫ A.arrow = B.arrow := Over.w h.inv + ext x + constructor + · intro ⟨a, ha⟩ + use h.hom.left a + calc B.arrow (h.hom.left a) + = (h.hom.left ≫ B.arrow) a := rfl + _ = A.arrow a := by rw [hcomm] + _ = x := ha + · intro ⟨b, hb⟩ + use h.inv.left b + calc A.arrow (h.inv.left b) + = (h.inv.left ≫ A.arrow) b := rfl + _ = B.arrow b := by rw [hcomm'] + _ = x := hb + +/-- The arrow of a Subobject equals the arrow of its representative. -/ +lemma subobject_arrow_eq_representative_arrow {X : Type u} (P : Subobject X) : + P.arrow = (Subobject.representative.obj P).arrow := rfl + +/-- In Type u, x ∈ range ((Subobject.pullback f).obj P).arrow iff f x ∈ range P.arrow. + This is the set-theoretic fact that pullback of a subobject is the preimage. -/ +theorem pullback_range_iff {X Y : Type u} (f : X ⟶ Y) (P : Subobject Y) (x : X) : + x ∈ Set.range ((Subobject.pullback f).obj P).arrow ↔ f x ∈ Set.range P.arrow := by + let R := Subobject.representative.obj P + -- R.arrow = P.arrow + have harrow : R.arrow = P.arrow := (subobject_arrow_eq_representative_arrow P).symm + -- (MonoOver.pullback f).obj R has arrow = pullback.snd R.arrow f + have hpb_arrow : ((MonoOver.pullback f).obj R).arrow = pullback.snd R.arrow f := + MonoOver.pullback_obj_arrow f R + -- P = toThinSkeleton R (since representative is a section of toThinSkeleton) + have hP : P = (toThinSkeleton (MonoOver Y)).obj R := (Quotient.out_eq P).symm + -- (lower F).obj (toThinSkeleton R) = toThinSkeleton (F.obj R) + have h1 : (Subobject.pullback f).obj P = + (toThinSkeleton (MonoOver X)).obj ((MonoOver.pullback f).obj R) := by + rw [hP]; rfl + -- representative of the RHS is iso to (MonoOver.pullback f).obj R + have h2 : Subobject.representative.obj ((toThinSkeleton (MonoOver X)).obj ((MonoOver.pullback f).obj R)) ≅ + (MonoOver.pullback f).obj R := + Subobject.representativeIso _ + -- Combine: representative of (pullback f).obj P is iso to (MonoOver.pullback f).obj R + have h3 : Subobject.representative.obj ((Subobject.pullback f).obj P) ≅ + (MonoOver.pullback f).obj R := by rw [h1]; exact h2 + -- The arrows have the same range + have h4 : Set.range ((Subobject.pullback f).obj P).arrow = + Set.range ((MonoOver.pullback f).obj R).arrow := by + rw [subobject_arrow_eq_representative_arrow] + exact monoover_iso_same_range _ _ h3 + -- Combine everything + rw [h4, hpb_arrow, pullback_snd_range, harrow] + +/-- In Type u, the range of image.ι equals the range of the original morphism. + This uses that factorThruImage is an epi (surjective in Type u). -/ +lemma image_ι_range_eq {X Y : Type u} (g : X ⟶ Y) : + Set.range (image.ι g) = Set.range g := by + ext y + constructor + · intro ⟨z, hz⟩ + have h_epi : Epi (factorThruImage g) := inferInstance + rw [epi_iff_surjective] at h_epi + obtain ⟨x, hx⟩ := h_epi z + use x + calc g x + = (factorThruImage g ≫ image.ι g) x := by rw [image.fac] + _ = image.ι g (factorThruImage g x) := rfl + _ = image.ι g z := by rw [hx] + _ = y := hz + · intro ⟨x, hx⟩ + use factorThruImage g x + calc image.ι g (factorThruImage g x) + = (factorThruImage g ≫ image.ι g) x := rfl + _ = g x := by rw [image.fac] + _ = y := hx + +/-- The arrow of (MonoOver.exists f).obj M equals image.ι (M.arrow ≫ f). -/ +lemma monoover_exists_arrow {X Y : Type u} (f : X ⟶ Y) (M : MonoOver X) : + ((MonoOver.exists f).obj M).arrow = image.ι (M.arrow ≫ f) := rfl + +/-- The range of ((Subobject.exists f).obj P).arrow equals the range of (P.arrow ≫ f). -/ +lemma subobject_exists_arrow_range {X Y : Type u} (f : X ⟶ Y) (P : Subobject X) : + Set.range ((Subobject.exists f).obj P).arrow = Set.range (P.arrow ≫ f) := by + let rep_P := Subobject.representative.obj P + let existsM := (MonoOver.exists f).obj rep_P + let existsP := (Subobject.exists f).obj P + + -- Step 1: P = [rep_P] in the thin skeleton + have h_P_eq : P = (toThinSkeleton (MonoOver X)).obj rep_P := by + simp only [rep_P] + exact (Quotient.out_eq P).symm + + -- Step 2: Use lower_comm to get the key equation + have h_func : (Subobject.lower (MonoOver.exists f)).obj ((toThinSkeleton (MonoOver X)).obj rep_P) = + (toThinSkeleton (MonoOver Y)).obj ((MonoOver.exists f).obj rep_P) := by + have h := Subobject.lower_comm (MonoOver.exists f) + have := congrFun (congrArg (fun G => G.obj) h) rep_P + simp only [Functor.comp_obj] at this + exact this + + -- Step 3: existsP = [existsM] + have h_eq : existsP = (toThinSkeleton (MonoOver Y)).obj existsM := by + calc existsP + = (Subobject.lower (MonoOver.exists f)).obj P := rfl + _ = (Subobject.lower (MonoOver.exists f)).obj ((toThinSkeleton (MonoOver X)).obj rep_P) := by rw [← h_P_eq] + _ = (toThinSkeleton (MonoOver Y)).obj ((MonoOver.exists f).obj rep_P) := h_func + _ = (toThinSkeleton (MonoOver Y)).obj existsM := rfl + + -- Step 4: representative.obj existsP ≅ existsM + have h_iso : Subobject.representative.obj existsP ≅ existsM := by + rw [h_eq] + exact Subobject.representativeIso existsM + + -- Step 5: Arrows have the same range + have h_range : Set.range existsP.arrow = Set.range existsM.arrow := + monoover_iso_same_range _ _ h_iso + + have h_arrow : existsM.arrow = image.ι (rep_P.arrow ≫ f) := monoover_exists_arrow f rep_P + have h_img : Set.range (image.ι (rep_P.arrow ≫ f)) = Set.range (rep_P.arrow ≫ f) := image_ι_range_eq _ + have h_rep : rep_P.arrow = P.arrow := rfl + + rw [h_range, h_arrow, h_img, h_rep] + +/-- In Type u, y ∈ range ((Subobject.exists f).obj P).arrow iff ∃ x ∈ range P.arrow, f x = y. + This is the set-theoretic fact that exists/image of a subobject is the direct image. -/ +theorem exists_range_iff {X Y : Type u} [HasImages (Type u)] (f : X ⟶ Y) (P : Subobject X) (y : Y) : + y ∈ Set.range ((Subobject.exists f).obj P).arrow ↔ ∃ x, x ∈ Set.range P.arrow ∧ f x = y := by + rw [subobject_exists_arrow_range] + constructor + · intro ⟨z, hz⟩ + use P.arrow z + exact ⟨⟨z, rfl⟩, hz⟩ + · intro ⟨x, ⟨z, hz⟩, hfx⟩ + use z + simp only [types_comp_apply, hz, hfx] + +/-- For subobjects A ≤ B, if x ∈ range A.arrow then x ∈ range B.arrow. + This is the element-level characterization of subobject ordering in Type. -/ +theorem subobject_le_range {X : Type u} {A B : Subobject X} (h : A ≤ B) + {x : X} (hx : x ∈ Set.range A.arrow) : x ∈ Set.range B.arrow := by + -- h : A ≤ B gives us a morphism ofLE : A.underlying → B.underlying + -- with the property: ofLE ≫ B.arrow = A.arrow + obtain ⟨a, ha⟩ := hx + -- a : A.underlying, A.arrow a = x + -- Use ofLE to get an element of B.underlying + use Subobject.ofLE A B h a + -- Need: B.arrow (ofLE a) = x + rw [← ha] + exact congrFun (Subobject.ofLE_arrow h) a + +/-- In Subobject X (for Type u), the categorical coproduct equals the lattice supremum. + This follows from the universal properties: both are the least upper bound of the family. -/ +theorem coproduct_eq_iSup {X : Type u} {ι : Type*} (P : ι → Subobject X) [HasCoproduct P] : + ∐ P = ⨆ i, P i := by + apply le_antisymm + · -- ∐ P ≤ ⨆ P: construct morphism from ∐ P to ⨆ P using the coproduct universal property + exact Quiver.Hom.le (Sigma.desc (fun i => (le_iSup P i).hom)) + · -- ⨆ P ≤ ∐ P: show P i ≤ ∐ P for all i, then ⨆ is least upper bound + apply iSup_le + intro i + exact Quiver.Hom.le (Sigma.ι P i) + +/-- In Type u, x ∈ range (⨆ᵢ Pᵢ).arrow iff ∃ i, x ∈ range (Pᵢ).arrow. + This is the set-theoretic fact that supremum of subobjects is union. -/ +theorem iSup_range_iff {X : Type u} {ι : Type*} (P : ι → Subobject X) (x : X) : + x ∈ Set.range (⨆ i, P i).arrow ↔ ∃ i, x ∈ Set.range (P i).arrow := by + -- Use the order isomorphism Types.subobjectEquivSet + let iso := Types.subobjectEquivSet X + -- iso preserves suprema: iso (⨆ᵢ Pᵢ) = ⨆ᵢ (iso Pᵢ) + -- In Set X, ⨆ = ⋃, so membership is existential + rw [← subobject_equiv_eq_range (⨆ i, P i)] + -- Use that the order iso preserves iSup + have h : iso (⨆ i, P i) = ⨆ i, iso (P i) := iso.map_iSup P + rw [h] + -- In Set X, ⨆ (as sets) is union, so x ∈ ⋃ᵢ Sᵢ ↔ ∃ i, x ∈ Sᵢ + simp only [Set.iSup_eq_iUnion, Set.mem_iUnion] + constructor + · intro ⟨i, hi⟩ + use i + rw [← subobject_equiv_eq_range (P i)] + exact hi + · intro ⟨i, hi⟩ + use i + rw [← subobject_equiv_eq_range (P i)] at hi + exact hi + +theorem formula_satisfaction_monotone {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : RelPreservingEmbedding M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) + (hsat : formulaSatisfied (M := M) φ t) : + formulaSatisfied (M := M') φ (liftEmbedContext emb.toStructureEmbedding xs t) := by + induction φ with + | rel R term => + -- rel R t ↦ (Subobject.pullback (term.interpret)).obj (M.Relations R) + -- By pullback_range_iff: t ∈ this iff term.interpret M t ∈ M.Relations R + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range ((pullback (term.interpret M)).obj (M.Relations R)).arrow + -- Goal : liftEmbedContext t ∈ range ((pullback (term.interpret M')).obj (M'.Relations R)).arrow + rw [pullback_range_iff] at hsat ⊢ + -- hsat : term.interpret M t ∈ range (M.Relations R).arrow + -- Goal : term.interpret M' (liftEmbedContext t) ∈ range (M'.Relations R).arrow + -- Apply term_interpret_commutes to rewrite the LHS + rw [term_interpret_commutes emb.toStructureEmbedding term t] + -- Goal: liftSort emb R.domain (term.interpret M t) ∈ range (M'.Relations R).arrow + -- Apply rel_preserve_general + exact rel_preserve_general emb R (Term.interpret M term t) hsat + | «true» => + -- ⊤ contains everything: use that ⊤.arrow is surjective + unfold formulaSatisfied subobjectMem + simp only [Formula.interpret] + exact top_arrow_surjective _ + | «false» => + -- ⊥ contains nothing: the underlying type is empty, so hsat is contradictory + -- Formula.interpret .false = ⊥, and we need to show hsat is vacuously true + unfold formulaSatisfied subobjectMem at hsat + simp only [Formula.interpret] at hsat + obtain ⟨y, _⟩ := hsat + -- y is in the underlying of ⊥ (using Geometric.instOrderBotSubobject) + -- Both Geometric's ⊥ and Mathlib's ⊥ are bottom in the same partial order, so they're equal. + -- Prove the two different ⊥s are equal by le_antisymm + have heq : ∀ {X : Type u}, + @Bot.bot (Subobject X) (Geometric.instOrderBotSubobject X).toBot = + @Bot.bot (Subobject X) Subobject.orderBot.toBot := by + intro X + apply le_antisymm + · exact @OrderBot.bot_le _ _ (Geometric.instOrderBotSubobject X) _ + · exact @OrderBot.bot_le _ _ Subobject.orderBot _ + -- Rewrite y's type to use Mathlib's ⊥ + rw [heq] at y + -- Now y : underlying of Mathlib's ⊥, which is empty + -- Derive False from y being in an empty type, then prove anything + exact False.elim (bot_underlying_isEmpty.false y) + | conj φ ψ ihφ ihψ => + -- Conjunction: both components must hold + -- Strategy: use inf_range_iff to decompose and recompose + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat: t ∈ range (φ.interpret ⨯ ψ.interpret).arrow (in M) + -- Goal: liftEmbedContext ... t ∈ range (φ.interpret ⨯ ψ.interpret).arrow (in M') + + -- Use prod_eq_inf: ⨯ = ⊓ in Subobject + have prod_inf_M := Subobject.prod_eq_inf (f₁ := Formula.interpret M φ) (f₂ := Formula.interpret M ψ) + have prod_inf_M' := Subobject.prod_eq_inf (f₁ := Formula.interpret M' φ) (f₂ := Formula.interpret M' ψ) + + -- Decompose: if t ∈ (φ ⊓ ψ), then t ∈ φ and t ∈ ψ + rw [prod_inf_M] at hsat + rw [inf_range_iff] at hsat + obtain ⟨hφ, hψ⟩ := hsat + + -- Apply induction hypotheses + have ihφ' := ihφ t hφ + have ihψ' := ihψ t hψ + + -- Recompose: if liftEmbedContext t ∈ φ' and ∈ ψ', then ∈ (φ' ⊓ ψ') + rw [prod_inf_M'] + rw [inf_range_iff] + exact ⟨ihφ', ihψ'⟩ + | eq t1 t2 => + -- Equality: t1 = t2 interprets as equalizerSubobject ⟦t1⟧ᵗ ⟦t2⟧ᵗ + -- Using equalizer_range_iff: t ∈ equalizerSubobject ↔ t1.interpret t = t2.interpret t + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ equalizerSubobject (t1.interpret M) (t2.interpret M) + -- Goal : liftEmbedContext t ∈ equalizerSubobject (t1.interpret M') (t2.interpret M') + rw [equalizer_range_iff] at hsat ⊢ + -- hsat : t1.interpret M t = t2.interpret M t + -- Goal : t1.interpret M' (liftEmbedContext t) = t2.interpret M' (liftEmbedContext t) + -- Apply term_interpret_commutes to both sides + rw [term_interpret_commutes emb.toStructureEmbedding t1 t] + rw [term_interpret_commutes emb.toStructureEmbedding t2 t] + -- Now goal is: liftSort emb _ (t1.interpret M t) = liftSort emb _ (t2.interpret M t) + -- This follows from hsat by congruence (liftSort is a function) + rw [hsat] + | @«exists» A xs' φ ih => + -- Existential quantification: ∃x.φ(ctx, x) interprets as + -- (Subobject.exists π).obj (φ.interpret) + -- where π : Context.interpret M (xs'.cons A) → Context.interpret M xs' + -- is the projection that drops the last variable. + -- Note: xs' is the base context, xs = exists binds xs' with "∃A.φ" having context xs' + -- + -- In Type u, (exists f).obj P corresponds to the image of P under f: + -- y ∈ ((exists f).obj P).arrow iff ∃ x ∈ P.arrow, f x = y + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range ((Subobject.exists ((xs'.π A).interpret M)).obj (φ.interpret M)).arrow + -- Goal : liftEmbedContext xs' t ∈ range ((Subobject.exists ((xs'.π A).interpret M')).obj (φ.interpret M')).arrow + rw [exists_range_iff] at hsat ⊢ + -- hsat : ∃ ctx', ctx' ∈ range (φ.interpret M).arrow ∧ (xs'.π A).interpret M ctx' = t + -- Goal : ∃ ctx', ctx' ∈ range (φ.interpret M').arrow ∧ (xs'.π A).interpret M' ctx' = liftEmbedContext xs' t + obtain ⟨ctx', hctx'_in, hctx'_proj⟩ := hsat + -- Lift ctx' to M' + let ctx'_lifted := liftEmbedContext emb.toStructureEmbedding _ ctx' + use ctx'_lifted + constructor + · -- Show ctx'_lifted ∈ range (φ.interpret M').arrow by IH + exact ih ctx' hctx'_in + · -- Show (xs'.π A).interpret M' ctx'_lifted = liftEmbedContext xs' t + -- By hom_interpret_commutes: liftEmbedContext xs' ((xs'.π A).interpret M ctx') = + -- (xs'.π A).interpret M' (liftEmbedContext (A ∶ xs') ctx') + have hcomm := hom_interpret_commutes emb.toStructureEmbedding (xs'.π A) ctx' + -- hcomm : liftEmbedContext xs' ((xs'.π A).interpret M ctx') = (xs'.π A).interpret M' ctx'_lifted + rw [← hcomm, hctx'_proj] + | infdisj φᵢ ih => + -- Infinitary disjunction: ⋁ᵢφᵢ interprets as ∐ (fun i ↦ φᵢ.interpret) + -- which is the coproduct/supremum of subobjects. + -- + -- In Type u, coproduct of subobjects corresponds to union: + -- x ∈ (⨆ᵢ Pᵢ).arrow iff ∃ i, x ∈ (Pᵢ).arrow + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + -- hsat : t ∈ range (∐ᵢ (φᵢ.interpret M)).arrow + -- Goal : liftEmbedContext xs t ∈ range (∐ᵢ (φᵢ.interpret M')).arrow + -- Use coproduct_eq_iSup: ∐ P = ⨆ P for subobjects + rw [coproduct_eq_iSup] at hsat ⊢ + -- Now use iSup_range_iff to convert to existential + rw [iSup_range_iff] at hsat ⊢ + -- hsat : ∃ i, t ∈ range ((φᵢ i).interpret M).arrow + -- Goal : ∃ i, liftEmbedContext xs t ∈ range ((φᵢ i).interpret M').arrow + obtain ⟨i, hi⟩ := hsat + use i + -- By IH: formulaSatisfied (φᵢ i) t → formulaSatisfied (φᵢ i) (liftEmbedContext t) + exact ih i t hi + +/-! +## The Bidirectional Theorem: Conservative Expansions + +For **conservative expansions** (where new relation tuples only concern new elements), +formula satisfaction is an **IFF**, not just an implication. This is the key to +proving that old submodels remain valid models under universe expansion. +-/ + +/-- +**Backward direction**: For conservative expansions, formula satisfaction in M' +implies satisfaction in M. This is the converse of `formula_satisfaction_monotone`. + +Combined with `formula_satisfaction_monotone`, this gives the full IFF. +-/ +theorem formula_satisfaction_reflect {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) + (hsat : formulaSatisfied φ (liftEmbedContext emb.toStructureEmbedding xs t)) : + formulaSatisfied φ t := by + -- Proof by induction on formula structure, using rel_reflect for the base case + induction φ with + | rel R term => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [pullback_range_iff] at hsat ⊢ + -- hsat : term.interpret M' (liftEmbedContext t) ∈ range (M'.Relations R).arrow + -- Use term_interpret_commutes to rewrite + rw [term_interpret_commutes emb.toStructureEmbedding term t] at hsat + -- hsat : liftSort emb R.domain (term.interpret M t) ∈ range (M'.Relations R).arrow + -- Convert liftSort to liftSort' to match rel_reflect + rw [liftSort_eq_liftSort'] at hsat + -- Apply rel_reflect + exact emb.rel_reflect R _ hsat + | «true» => + -- ⊤ contains everything + unfold formulaSatisfied subobjectMem + simp only [Formula.interpret] + exact top_arrow_surjective _ + | false => + unfold formulaSatisfied subobjectMem at hsat + simp only [Formula.interpret] at hsat + have heq : ∀ {X : Type u}, + @Bot.bot (Subobject X) (Geometric.instOrderBotSubobject X).toBot = + @Bot.bot (Subobject X) Subobject.orderBot.toBot := by + intro X + apply le_antisymm + · exact @OrderBot.bot_le _ _ (Geometric.instOrderBotSubobject X) _ + · exact @OrderBot.bot_le _ _ Subobject.orderBot _ + obtain ⟨y, _⟩ := hsat + rw [heq] at y + exact False.elim (bot_underlying_isEmpty.false y) + | conj φ ψ ihφ ihψ => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + have prod_inf_M := Subobject.prod_eq_inf (f₁ := Formula.interpret M φ) (f₂ := Formula.interpret M ψ) + have prod_inf_M' := Subobject.prod_eq_inf (f₁ := Formula.interpret M' φ) (f₂ := Formula.interpret M' ψ) + rw [prod_inf_M] + rw [inf_range_iff] + rw [prod_inf_M'] at hsat + rw [inf_range_iff] at hsat + obtain ⟨hφ', hψ'⟩ := hsat + exact ⟨ihφ t hφ', ihψ t hψ'⟩ + | eq t1 t2 => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [equalizer_range_iff] at hsat ⊢ + -- hsat : t1.interpret M' (liftEmbedContext t) = t2.interpret M' (liftEmbedContext t) + -- Use term_interpret_commutes and injectivity of embedding + rw [term_interpret_commutes emb.toStructureEmbedding t1 t] at hsat + rw [term_interpret_commutes emb.toStructureEmbedding t2 t] at hsat + -- hsat : liftSort emb _ (t1.interpret M t) = liftSort emb _ (t2.interpret M t) + -- By injectivity of liftSort (which uses embed's injectivity) + exact liftSort_injective emb.toStructureEmbedding _ hsat + | @«exists» A xs' φ ih => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [exists_range_iff] at hsat ⊢ + obtain ⟨ctx'_lifted, hctx'_in, hctx'_proj⟩ := hsat + -- ctx'_lifted = (a', t') where a' : A.interpret M' and t' = liftEmbedContext t + -- hctx'_in : φ is satisfied by ctx'_lifted in M' + -- We need a witness (a, t) in M where a : A.interpret M satisfies φ + -- + -- MATHEMATICAL ISSUE: The witness a' in M' might be a "new" element not in the + -- image of the embedding. For the backward reflection to work, we would need + -- either: + -- (1) The witness to always be in the image (requires additional structure), or + -- (2) A different witness in M that still satisfies φ (model completeness) + -- + -- This sorry represents a genuine mathematical gap: conservative expansion + -- alone doesn't guarantee existential reflection. The IFF theorem is still + -- useful for quantifier-free formulas and formulas where witnesses can be + -- traced back to M. + sorry + | infdisj φᵢ ih => + unfold formulaSatisfied subobjectMem at hsat ⊢ + simp only [Formula.interpret] at hsat ⊢ + rw [coproduct_eq_iSup] at hsat ⊢ + rw [iSup_range_iff] at hsat ⊢ + obtain ⟨i, hi⟩ := hsat + use i + exact ih i t hi + +/-- +**Formula satisfaction is an IFF for conservative expansions**: + +For a conservative expansion (new relation tuples only concern new elements), +a tuple t from M satisfies φ in M if and only if lifted(t) satisfies φ in M'. + +This is the key theorem for proving model preservation under universe expansion. + +**Caveat**: The backward direction (reflect) has a sorry in the existential case. +This is because an existential witness in M' might be a "new" element not in +the image of the embedding. Full reflection of existentials would require +additional structure (e.g., witness reflection property) or model completeness. +The theorem is fully mechanized for quantifier-free formulas. +-/ +theorem formula_satisfaction_iff {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + {xs : Context S} + (φ : Formula xs) + (t : Context.interpret M xs) : + formulaSatisfied φ t ↔ + formulaSatisfied φ (liftEmbedContext emb.toStructureEmbedding xs t) := + ⟨formula_satisfaction_monotone emb.toRelPreservingEmbedding φ t, + formula_satisfaction_reflect emb φ t⟩ + +/-! +### Sequent and Theory Preservation + +With the IFF theorem, we can now prove proper sequent and theory preservation. +-/ + +/-- +**Sequent preservation for conservative expansions**: + +If a sequent (premise ⊢ conclusion) holds in M, and emb is a conservative expansion, +then for any tuple t from M: +- If lifted(t) satisfies the premise in M', then lifted(t) satisfies the conclusion in M' + +This follows because: +1. premise(lifted(t)) in M' ↔ premise(t) in M (by formula_satisfaction_iff) +2. In M, premise(t) → conclusion(t) (by the sequent) +3. conclusion(t) in M ↔ conclusion(lifted(t)) in M' (by formula_satisfaction_iff) +-/ +theorem sequent_preservation {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + (seq : S.Sequent) + (hseq : Sequent.interpret M seq) + (t : Context.interpret M seq.ctx) + (hprem : formulaSatisfied seq.premise (liftEmbedContext emb.toStructureEmbedding seq.ctx t)) : + formulaSatisfied seq.concl (liftEmbedContext emb.toStructureEmbedding seq.ctx t) := by + -- Step 1: premise(lifted(t)) → premise(t) in M (backward direction of IFF) + have hprem_M := (formula_satisfaction_iff emb seq.premise t).mpr hprem + -- Step 2: In M, premise(t) → conclusion(t) via subobject ordering + -- hseq : ⟦M|premise⟧ ≤ ⟦M|conclusion⟧ + -- This means: if t ∈ range(premise) then t ∈ range(conclusion) + unfold Sequent.interpret at hseq + unfold formulaSatisfied subobjectMem at hprem_M ⊢ + have hconcl_M : t ∈ Set.range (Formula.interpret M seq.concl).arrow := + subobject_le_range hseq hprem_M + -- Step 3: conclusion(t) in M → conclusion(lifted(t)) in M' (forward direction of IFF) + exact (formula_satisfaction_iff emb seq.concl t).mp hconcl_M + +/-- +**Theory preservation for conservative expansions**: + +If M satisfies theory T, and emb is a conservative expansion to M', +then for any tuple t from M and any axiom in T: +- The axiom holds for lifted(t) in M' +-/ +theorem theory_preservation {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : ConservativeExpansion M M') + (T : S.Theory) + (hM : Theory.interpret M T) + (seq : S.Sequent) + (hseq_in : seq ∈ T.axioms) + (t : Context.interpret M seq.ctx) + (hprem : formulaSatisfied seq.premise (liftEmbedContext emb.toStructureEmbedding seq.ctx t)) : + formulaSatisfied seq.concl (liftEmbedContext emb.toStructureEmbedding seq.ctx t) := + sequent_preservation emb seq (hM seq hseq_in) t hprem + +/-! +### Model Set Monotonicity (The Main Corollary) + +**Key Principle**: As the universe of elements expands (with new function values and +relation assertions concerning *only* new elements), the set of subsets that form +valid models of a geometric theory T grows monotonically. + +Formally, let: +- U(t) = universe at time t, with U(t) ⊆ U(t') for t ≤ t' +- Models(T, U(t)) = { S ⊆ U(t) : S is a substructure satisfying T } + +Then: Models(T, U(t)) ⊆ Models(T, U(t')) + +**Why this is true**: + +1. **Intrinsic Theory Interpretation**: `Theory.interpret S T` depends *only* on the + structure S itself—its sorts, functions, and relations. It does NOT depend on + any ambient structure that S might be embedded in. + +2. **Substructure Preservation**: When the universe expands, old substructures S ⊆ U(t) + remain unchanged: + - Same elements + - Same function values (new values only concern new elements) + - Same relation tuples (new tuples only concern new elements) + +3. **Therefore**: If S ⊨ T at time t, then S ⊨ T at time t' > t. + +4. **Moreover**: New subsets involving new elements may form *additional* models, + so the model set can only grow. + +**Connection to formula_satisfaction_monotone**: + +Our main theorem `formula_satisfaction_monotone` provides the element-level view: +- For a tuple t from substructure S satisfying formula φ +- The same tuple (lifted via embedding) satisfies φ in any extension M' ⊇ S + +This connects to theory interpretation via `Sequent.interpret`: +- A sequent `premise ⊢ conclusion` holds in S iff `⟦S|premise⟧ᶠ ≤ ⟦S|conclusion⟧ᶠ` +- Equivalently: ∀ tuples t, if t satisfies premise then t satisfies conclusion +- By `formula_satisfaction_monotone`, embedded tuples preserve this property + +**Consequence for GeologMeta**: +- Incremental model checking is sound: adding elements never invalidates existing models +- Coordination-free: no need to re-verify old submodels when universe expands +- This is the semantic foundation for CALM theorem applications +-/ + +/-- +**Axiom Satisfaction for Embedded Tuples**: + +If M satisfies a theory T, and we embed M into M' via a relation-preserving embedding, +then for any tuple t from M: +- If t satisfies the premise of an axiom (premise ⊢ conclusion) in M +- Then the lifted tuple satisfies the conclusion in M' + +This is the element-level view of model preservation. +-/ +theorem axiom_satisfaction_embedded {M M' : Structure S (Type u)} + [κ : SmallUniverse S] [G : Geometric κ (Type u)] + (emb : RelPreservingEmbedding M M') + {xs : Context S} + (_premise conclusion : Formula xs) + (t : Context.interpret M xs) + (_hprem : formulaSatisfied _premise t) + (hconcl : formulaSatisfied conclusion t) : + formulaSatisfied conclusion (liftEmbedContext emb.toStructureEmbedding xs t) := + formula_satisfaction_monotone emb conclusion t hconcl + +/-- +**Model Set Monotonicity** (term-level witness): + +Given: +- S is a substructure of M (via embedding emb_SM) +- M is a substructure of M' (via embedding emb_MM') +- S satisfies theory T + +Then: S still satisfies T (trivially, since Theory.interpret S T depends only on S). + +The embedding composition emb_SM ≫ emb_MM' shows S is also a substructure of M', +but this doesn't affect S's satisfaction of T. + +This theorem exists to document that `Theory.interpret` is intrinsic to the structure. +-/ +theorem model_set_monotone + {S_sub M M' : Structure S (Type u)} + [κ : SmallUniverse S] [_G : Geometric κ (Type u)] + (_emb_SM : StructureEmbedding S_sub M) + (_emb_MM' : StructureEmbedding M M') + (T : S.Theory) + (hT : Theory.interpret S_sub T) : + Theory.interpret S_sub T := + hT -- Trivially true: Theory.interpret depends only on S_sub, not on M or M' + +/-! +### Summary of Results + +We have now formalized the **Monotonic Submodel Property** for geometric logic: + +1. **`formula_satisfaction_monotone`**: The core theorem showing that satisfaction of + geometric formulas is preserved when tuples are lifted via relation-preserving embeddings. + +2. **`axiom_satisfaction_embedded`**: Corollary for sequent axioms—if a tuple satisfies + both premise and conclusion in M, the lifted tuple satisfies the conclusion in M'. + +3. **`model_set_monotone`**: Documents that `Theory.interpret S T` is intrinsic to S, + so valid submodels remain valid as the ambient universe expands. + +**The Key Insight**: Geometric formulas (built from relations, equality, ∧, ∨, ∃, and +infinitary ∨) are "positive existential"—they only assert existence, never non-existence. +This positivity is what makes satisfaction monotonic under structure extensions. +-/ + +/-- The full selection (all elements) is trivially closed -/ +def fullSelection (M : Structure S (Type u)) : ClosedSubsetSelection M where + subset := fun _ => Set.univ + func_closed := fun _ {_A} {_B} _ _ _ _ => Set.mem_univ _ + +/-- **Theorem**: The pushforward of the full selection is closed in M' -/ +theorem full_selection_pushforward_closed {M M' : Structure S (Type u)} + (emb : StructureEmbedding M M') : + ∀ (f : S.Functions) {A B : S.Sorts} + (hdom : f.domain = DerivedSorts.inj A) + (hcod : f.codomain = DerivedSorts.inj B), + funcPreservesSubset ((fullSelection M).toSubsetSelection.pushforward emb) f hdom hcod := + fun f {_A} {_B} hdom hcod => semantic_monotonicity emb (fullSelection M) f hdom hcod + +/-! +## The Complete Picture + +**Main Result**: Monotonic Submodel Property for Geometric Theories + +Given a signature S and a geometric theory T: + +1. **Structural Level** (proven above): + - ClosedSubsetSelection M represents a "submodel" of M + - Embeddings preserve closure: (sel.pushforward emb).func_closed + +2. **Semantic Level** (Theory.interpret): + - M ⊨ T means all sequents hold + - Sequent.interpret uses Formula.interpret (subobjects) + +3. **Connection** (the key insight): + - Elements in a ClosedSubsetSelection form a substructure + - Formula satisfaction on the substructure corresponds to membership in + the formula's interpretation restricted to the selection + - Embeddings preserve this correspondence + +4. **Consequence** (CALM theorem): + - Adding elements to a model can only ADD valid submodels + - It cannot INVALIDATE existing valid submodels + - Therefore: incremental model checking is sound +-/ + +/-! +## Why This Matters: CALM Theorem Connection + +The Monotonic Submodel Property enables coordination-free distributed systems: + +- **CALM Theorem**: Monotonic programs have coordination-free implementations +- **Element Addition is Monotonic**: Valid(t) ⊆ Valid(t+1) +- **Element Retraction is NOT Monotonic**: Requires coordination + +### Design Implications for GeologMeta + +1. **FuncVal and RelTuple are immutable**: Once f(a) = b, it's eternally true +2. **All facts defined at creation**: When element a is created, all f(a) are defined +3. **Only liveness changes**: To "modify" f(a), retract a and create a new element +4. **Incremental model checking**: New elements can only add valid submodels +-/ + +end MonotonicSubmodel diff --git a/proofs/lake-manifest.json b/proofs/lake-manifest.json new file mode 100644 index 0000000..ec1d120 --- /dev/null +++ b/proofs/lake-manifest.json @@ -0,0 +1,115 @@ +{"version": "1.1.0", + "packagesDir": ".lake/packages", + "packages": + [{"url": "https://github.com/kyoDralliam/model-theory-topos.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "5d0c00af95ef89b0bf6774208c853e254dc1ce33", + "name": "«model-theory-topos»", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": false, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/PatrickMassot/checkdecls.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "3d425859e73fcfbef85b9638c2a91708ef4a22d4", + "name": "checkdecls", + "manifestFile": "lake-manifest.json", + "inputRev": null, + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/mathlib4.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "19f4ef2c52b278bd96626e02d594751e6e12ac98", + "name": "mathlib", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.22.0-rc3", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/plausible", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "61c44bec841faabd47d11c2eda15f57ec2ffe9d5", + "name": "plausible", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/LeanSearchClient", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "6c62474116f525d2814f0157bb468bf3a4f9f120", + "name": "LeanSearchClient", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/import-graph", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "140dc642f4f29944abcdcd3096e8ea9b4469c873", + "name": "importGraph", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/ProofWidgets4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "96c67159f161fb6bf6ce91a2587232034ac33d7e", + "name": "proofwidgets", + "manifestFile": "lake-manifest.json", + "inputRev": "v0.0.67", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "a62ecd0343a2dcfbcac6d1e8243f5821879c0244", + "name": "aesop", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/quote4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "867d9dc77534341321179c9aa40fceda675c50d4", + "name": "Qq", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/batteries", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "3cabaef23886b82ba46f07018f2786d9496477d6", + "name": "batteries", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/mhuisi/lean4-cli", + "type": "git", + "subDir": null, + "scope": "", + "rev": "e22ed0883c7d7f9a7e294782b6b137b783715386", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}], + "name": "«geolog-proofs»", + "lakeDir": ".lake"} diff --git a/proofs/lakefile.lean b/proofs/lakefile.lean new file mode 100644 index 0000000..d83daca --- /dev/null +++ b/proofs/lakefile.lean @@ -0,0 +1,15 @@ +import Lake +open Lake DSL + +package «geolog-proofs» where + leanOptions := #[ + ⟨`pp.unicode.fun, true⟩ + ] + +-- Import model-theory-topos from GitHub +require «model-theory-topos» from git + "https://github.com/kyoDralliam/model-theory-topos.git" @ "main" + +@[default_target] +lean_lib «GeologProofs» where + globs := #[.submodules `GeologProofs] diff --git a/proofs/lean-toolchain b/proofs/lean-toolchain new file mode 100644 index 0000000..fff0a20 --- /dev/null +++ b/proofs/lean-toolchain @@ -0,0 +1 @@ +leanprover/lean4:v4.22.0-rc3 diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..239d362 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,331 @@ +//! Abstract Syntax Tree for Geolog +//! +//! Based on the syntax sketched in loose_thoughts/2025-12-12_12:10.md + +use std::fmt; + +/// A span in the source code, for error reporting +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } +} + +/// A node with source location +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Spanned { + pub node: T, + pub span: Span, +} + +impl Spanned { + pub fn new(node: T, span: Span) -> Self { + Self { node, span } + } +} + +/// An identifier, possibly qualified with `/` (e.g., `N/P`, `W/src/arc`) +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Path { + pub segments: Vec, +} + +impl Path { + pub fn single(name: String) -> Self { + Self { + segments: vec![name], + } + } + + pub fn is_single(&self) -> bool { + self.segments.len() == 1 + } + + pub fn as_single(&self) -> Option<&str> { + if self.segments.len() == 1 { + Some(&self.segments[0]) + } else { + None + } + } +} + +impl fmt::Display for Path { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.segments.join("/")) + } +} + +/// A complete source file +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct File { + pub declarations: Vec>, +} + +/// Top-level declarations +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Declaration { + /// `namespace Foo;` + Namespace(String), + + /// `theory (params) Name { body }` + Theory(TheoryDecl), + + /// `TypeExpr instance Name { body }` + Instance(InstanceDecl), + + /// `query Name { ? : Type; }` + Query(QueryDecl), +} + +/// A theory declaration +/// e.g., `theory (N : PetriNet instance) Marking { ... }` +/// or `theory Foo extends Bar { ... }` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TheoryDecl { + pub params: Vec, + pub name: String, + /// Optional parent theory to extend + pub extends: Option, + pub body: Vec>, +} + +/// A parameter to a theory +/// e.g., `N : PetriNet instance` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Param { + pub name: String, + pub ty: TypeExpr, +} + +/// Items that can appear in a theory body +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TheoryItem { + /// `P : Sort;` + Sort(String), + + /// `in.src : in -> P;` + Function(FunctionDecl), + + /// `ax1 : forall w : W. hyps |- concl;` + Axiom(AxiomDecl), + + /// Inline instance (for nested definitions) + /// `initial_marking : N Marking instance;` + Field(String, TypeExpr), +} + +/// A function/morphism declaration +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionDecl { + pub name: Path, // Can be dotted like `in.src` + pub domain: TypeExpr, + pub codomain: TypeExpr, +} + +/// An axiom declaration +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AxiomDecl { + pub name: Path, // Can be hierarchical like `ax/anc/base` + pub quantified: Vec, + pub hypotheses: Vec, + pub conclusion: Formula, +} + +/// A quantified variable in an axiom +/// e.g., `w : W` or `w1, w2 : W` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct QuantifiedVar { + pub names: Vec, + pub ty: TypeExpr, +} + +/// A single token in a type expression stack program (concatenative parsing) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TypeToken { + /// Push a path onto the stack (might be sort, instance ref, or theory name) + Path(Path), + + /// The `Sort` keyword - pushes the Sort kind + Sort, + + /// The `Prop` keyword - pushes the Prop kind + Prop, + + /// The `instance` keyword - pops top, wraps as instance type, pushes + Instance, + + /// Arrow - pops two types (domain, codomain), pushes function type + /// Note: arrows are handled specially during parsing to maintain infix syntax + Arrow, + + /// Record type literal: `[field : Type, ...]` + /// Contains nested TypeExprs for field types (evaluated recursively) + Record(Vec<(String, TypeExpr)>), +} + +/// A type expression as a flat stack program (concatenative style) +/// +/// Instead of a tree like `App(App(A, B), C)`, we store a flat sequence +/// `[Path(A), Path(B), Path(C)]` that gets evaluated during elaboration +/// when we have access to the symbol table (to know theory arities). +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TypeExpr { + pub tokens: Vec, +} + +impl TypeExpr { + /// Create a type expression from a single path + pub fn single_path(p: Path) -> Self { + Self { + tokens: vec![TypeToken::Path(p)], + } + } + + /// Create the Sort kind + pub fn sort() -> Self { + Self { + tokens: vec![TypeToken::Sort], + } + } + + /// Create the Prop kind + pub fn prop() -> Self { + Self { + tokens: vec![TypeToken::Prop], + } + } + + /// Check if this is a single path (common case) + pub fn as_single_path(&self) -> Option<&Path> { + if self.tokens.len() == 1 + && let TypeToken::Path(p) = &self.tokens[0] { + return Some(p); + } + None + } + + /// Check if this is the Sort kind + pub fn is_sort(&self) -> bool { + matches!(self.tokens.as_slice(), [TypeToken::Sort]) + } + + /// Check if this ends with `instance` + pub fn is_instance(&self) -> bool { + self.tokens.last() == Some(&TypeToken::Instance) + } + + /// Get the inner type expression (without the trailing `instance` token) + pub fn instance_inner(&self) -> Option { + if self.is_instance() { + Some(Self { + tokens: self.tokens[..self.tokens.len() - 1].to_vec(), + }) + } else { + None + } + } + + /// Check if this is the Prop kind + pub fn is_prop(&self) -> bool { + matches!(self.tokens.as_slice(), [TypeToken::Prop]) + } + + /// Check if this is a record type + pub fn as_record(&self) -> Option<&Vec<(String, TypeExpr)>> { + if self.tokens.len() == 1 + && let TypeToken::Record(fields) = &self.tokens[0] { + return Some(fields); + } + None + } +} + +/// Terms (elements of types) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Term { + /// A variable or path: `w`, `W/src/arc` + /// `/` is namespace qualification + Path(Path), + + /// Function application (postfix style in surface syntax) + /// `w W/src` means "apply W/src to w" + App(Box, Box), + + /// Field projection: `expr .field` + /// Note the space before `.` to distinguish from path qualification + Project(Box, String), + + /// Record literal: `[firing: f, arc: arc]` + Record(Vec<(String, Term)>), +} + +/// Formulas (geometric logic) +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Formula { + /// False/Bottom (⊥): inconsistency, empty disjunction + False, + + /// Relation application: `rel(term)` or `rel([field: value, ...])` + RelApp(String, Term), + + /// Equality: `t1 = t2` + Eq(Term, Term), + + /// Conjunction (often implicit in antecedents) + And(Vec), + + /// Disjunction: `phi \/ psi` + Or(Vec), + + /// Existential: `exists w : W. phi` + Exists(Vec, Box), + + /// Truth + True, +} + +/// An instance declaration +/// e.g., `instance ExampleNet : PetriNet = { ... }` +/// or `instance ExampleNet : PetriNet = chase { ... }` for chase-before-check +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct InstanceDecl { + pub theory: TypeExpr, + pub name: String, + pub body: Vec>, + /// If true, run chase algorithm after elaboration before checking axioms. + /// Syntax: `instance Name : Theory = chase { ... }` + pub needs_chase: bool, +} + +/// Items in an instance body +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum InstanceItem { + /// Element declaration: `A : P;` or `a, b, c : P;` + Element(Vec, TypeExpr), + + /// Equation: `ab_in in.src = A;` + Equation(Term, Term), + + /// Nested instance: `initial_marking = N Marking instance { ... };` + NestedInstance(String, InstanceDecl), + + /// Relation assertion: `[item: buy_groceries] completed;` + /// The Term should be a record with the relation's domain fields, + /// and String is the relation name. + RelationAssertion(Term, String), +} + +/// A query declaration +/// e.g., `query query0 { ? : ExampleNet Problem0 ReachabilityProblemSolution; }` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct QueryDecl { + pub name: String, + pub goal: TypeExpr, +} diff --git a/src/bin/geolog.rs b/src/bin/geolog.rs new file mode 100644 index 0000000..6785a7e --- /dev/null +++ b/src/bin/geolog.rs @@ -0,0 +1,1288 @@ +//! Geolog REPL - Interactive environment for geometric logic +//! +//! Usage: geolog [workspace] +//! +//! Commands: +//! :help - Show help +//! :quit - Exit REPL +//! :list - List theories and instances +//! :inspect X - Show details of theory/instance X +//! :clear - Clear screen +//! :reset - Reset all state + +use std::fs; +use std::path::PathBuf; + +use rustyline::error::ReadlineError; +use rustyline::history::DefaultHistory; +use rustyline::{Config, Editor}; + +use geolog::id::NumericId; +use geolog::repl::{ + ExecuteResult, InputResult, InspectResult, ListTarget, MetaCommand, QueryResult, ReplState, + format_instance_detail, format_theory_detail, +}; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); +const PROMPT: &str = "geolog> "; +const CONTINUATION: &str = "...... "; + +/// Parse command line arguments. +/// +/// Usage: geolog [-d ] [source_files...] +/// +/// Options: +/// -d, --dir Use as the workspace directory for persistence +/// -h, --help Show help and exit +/// -v, --version Show version and exit +/// +/// Returns (workspace_path, source_files) +fn parse_args(args: &[String]) -> (Option, Vec) { + let mut workspace_path = None; + let mut source_files = Vec::new(); + let mut i = 0; + + while i < args.len() { + let arg = &args[i]; + match arg.as_str() { + "-d" | "--dir" => { + if i + 1 < args.len() { + workspace_path = Some(PathBuf::from(&args[i + 1])); + i += 2; + } else { + eprintln!("Error: -d requires a path argument"); + std::process::exit(1); + } + } + "-h" | "--help" => { + println!("geolog v{} - Geometric Logic REPL", VERSION); + println!(); + println!("Usage: geolog [OPTIONS] [source_files...]"); + println!(); + println!("Options:"); + println!(" -d, --dir Use as workspace directory for persistence"); + println!(" -h, --help Show this help message"); + println!(" -v, --version Show version"); + println!(); + println!("Examples:"); + println!(" geolog Start REPL (in-memory, no persistence)"); + println!(" geolog -d ./myproject Start REPL with workspace persistence"); + println!(" geolog file.geolog Load file.geolog on startup"); + println!(" geolog -d ./proj f.geolog Load file into persistent workspace"); + std::process::exit(0); + } + "-v" | "--version" => { + println!("geolog v{}", VERSION); + std::process::exit(0); + } + _ if arg.starts_with('-') => { + eprintln!("Error: Unknown option '{}'", arg); + eprintln!("Try 'geolog --help' for usage information"); + std::process::exit(1); + } + _ => { + // Positional argument - treat as source file + source_files.push(PathBuf::from(arg)); + i += 1; + } + } + } + + (workspace_path, source_files) +} + +fn main() { + // Parse command line args + let args: Vec = std::env::args().skip(1).collect(); + let (workspace_path, source_files) = parse_args(&args); + + // Print banner + println!("geolog v{} - Geometric Logic REPL", VERSION); + println!("Type :help for help, :quit to exit\n"); + + // Initialize state + let mut state = if let Some(ref path) = workspace_path { + println!("Workspace: {}", path.display()); + ReplState::with_path(path) + } else { + ReplState::new() + }; + + // Load any source files specified on command line + for source_file in &source_files { + handle_source(&mut state, source_file); + } + + // Set up rustyline + let config = Config::builder().auto_add_history(true).build(); + let mut rl: Editor<(), DefaultHistory> = + Editor::with_config(config).expect("Failed to create editor"); + + // Try to load history + let history_path = dirs_history_path(); + if let Some(ref path) = history_path { + let _ = rl.load_history(path); + } + + // Main REPL loop + loop { + let prompt = if state.input_buffer.is_empty() { + PROMPT + } else { + CONTINUATION + }; + + match rl.readline(prompt) { + Ok(line) => { + match state.process_line(&line) { + InputResult::MetaCommand(cmd) => { + if !handle_command(&mut state, cmd) { + break; // :quit + } + } + InputResult::GeologInput(source) => { + handle_geolog(&mut state, &source); + } + InputResult::Incomplete => { + // Continue reading + } + InputResult::Empty => { + // Nothing to do + } + } + } + Err(ReadlineError::Interrupted) => { + // Ctrl-C - clear current buffer + if !state.input_buffer.is_empty() { + state.input_buffer.clear(); + state.bracket_depth = 0; + println!("^C"); + } else { + println!("Use :quit or Ctrl-D to exit"); + } + } + Err(ReadlineError::Eof) => { + // Ctrl-D - submit buffer or quit + if let Some(source) = state.force_submit() { + handle_geolog(&mut state, &source); + } else { + // Save store before quitting + if let Err(e) = state.store.save() { + eprintln!("Warning: Failed to save store: {}", e); + } + println!("\nGoodbye!"); + break; + } + } + Err(err) => { + eprintln!("Error: {:?}", err); + break; + } + } + } + + // Save history + if let Some(ref path) = history_path { + if let Some(parent) = path.parent() { + let _ = fs::create_dir_all(parent); + } + let _ = rl.save_history(path); + } +} + +/// Handle a meta-command. Returns false if we should exit. +fn handle_command(state: &mut ReplState, cmd: MetaCommand) -> bool { + match cmd { + MetaCommand::Help(topic) => { + print_help(topic.as_deref()); + } + MetaCommand::Quit => { + // Save store before quitting + if let Err(e) = state.store.save() { + eprintln!("Warning: Failed to save store: {}", e); + } + println!("Goodbye!"); + return false; + } + MetaCommand::List(target) => { + handle_list(state, target); + } + MetaCommand::Inspect(name) => { + handle_inspect(state, &name); + } + MetaCommand::Clear => { + // ANSI escape to clear screen + print!("\x1B[2J\x1B[H"); + } + MetaCommand::Reset => { + state.reset(); + println!("State reset."); + } + MetaCommand::Source(path) => { + handle_source(state, &path); + } + MetaCommand::Commit(msg) => { + handle_commit(state, msg.as_deref()); + } + MetaCommand::History => { + handle_history(state); + } + MetaCommand::Add { instance, element, sort } => { + handle_add(state, &instance, &element, &sort); + } + MetaCommand::Assert { instance, relation, args } => { + handle_assert(state, &instance, &relation, &args); + } + MetaCommand::Retract { instance, element } => { + handle_retract(state, &instance, &element); + } + MetaCommand::Query { instance, sort } => { + handle_query(state, &instance, &sort); + } + MetaCommand::Explain { instance, sort } => { + handle_explain(state, &instance, &sort); + } + MetaCommand::Compile { instance, sort } => { + handle_compile(state, &instance, &sort); + } + MetaCommand::Solve { theory, budget_ms } => { + handle_solve(state, &theory, budget_ms); + } + MetaCommand::Extend { instance, theory, budget_ms } => { + handle_extend(state, &instance, &theory, budget_ms); + } + MetaCommand::Chase { instance, max_iterations } => { + handle_chase(state, &instance, max_iterations); + } + MetaCommand::Unknown(msg) => { + eprintln!("Error: {}", msg); + eprintln!("Type :help for available commands"); + } + } + true +} + +/// Handle geolog source input +fn handle_geolog(state: &mut ReplState, source: &str) { + match state.execute_geolog(source) { + Ok(results) => { + for result in results { + match result { + ExecuteResult::Namespace(name) => { + println!("Namespace: {}", name); + } + ExecuteResult::Theory { + name, + num_sorts, + num_functions, + num_relations, + num_axioms, + } => { + let mut parts = vec![format!("{} sorts", num_sorts)]; + if num_functions > 0 { + parts.push(format!("{} functions", num_functions)); + } + if num_relations > 0 { + parts.push(format!("{} relations", num_relations)); + } + if num_axioms > 0 { + parts.push(format!("{} axioms", num_axioms)); + } + println!("Defined theory {} ({})", name, parts.join(", ")); + } + ExecuteResult::Instance { + name, + theory_name, + num_elements, + } => { + println!( + "Defined instance {} : {} ({} elements)", + name, theory_name, num_elements + ); + } + ExecuteResult::Query(result) => { + handle_query_result(state, result); + } + } + } + } + Err(e) => { + eprintln!("Error: {}", e); + } + } +} + +/// Print help message +fn print_help(topic: Option<&str>) { + match topic { + None => { + println!("Geolog REPL Commands:"); + println!(); + println!(" :help [topic] Show help (topics: syntax, examples)"); + println!(" :quit Exit the REPL"); + println!( + " :list [target] List theories/instances (target: theories, instances, all)" + ); + println!(" :inspect Show details of a theory or instance"); + println!(" :source Load and execute a geolog file"); + println!(" :clear Clear the screen"); + println!(" :reset Reset all state"); + println!(); + println!("Version Control:"); + println!(" :commit [msg] Commit current changes"); + println!(" :history Show commit history"); + println!(); + println!("Instance Mutation:"); + println!(" :add Add element to instance"); + println!(" :assert [args] Assert relation tuple"); + println!(" :retract Retract element from instance"); + println!(); + println!("Query:"); + println!(" :query List all elements of a sort"); + println!(" :explain Show query execution plan"); + println!(" :compile Show RelAlgIR compilation"); + println!(" :chase [max_iter] Run chase on instance axioms"); + println!(); + println!("Solver:"); + println!(" :solve [budget_ms] Find model of theory from scratch"); + println!(" :extend [budget_ms] Find extension of instance to theory"); + println!(); + println!("Enter geolog definitions directly (theories, instances)."); + println!("Multi-line input is supported - brackets are matched automatically."); + } + Some("syntax") => { + println!("Geolog Syntax:"); + println!(); + println!(" theory Name {{"); + println!(" Sort1 : Sort;"); + println!(" Sort2 : Sort;"); + println!(" func : Sort1 -> Sort2;"); + println!(" }}"); + println!(); + println!(" instance Name : Theory = {{"); + println!(" elem1 : Sort1;"); + println!(" elem2 : Sort2;"); + println!(" elem1 func = elem2;"); + println!(" }}"); + } + Some("examples") => { + println!("Examples:"); + println!(); + println!(" theory Graph {{"); + println!(" V : Sort;"); + println!(" E : Sort;"); + println!(" src : E -> V;"); + println!(" tgt : E -> V;"); + println!(" }}"); + println!(); + println!(" instance Triangle : Graph = {{"); + println!(" a : V; b : V; c : V;"); + println!(" ab : E; ab src = a; ab tgt = b;"); + println!(" bc : E; bc src = b; bc tgt = c;"); + println!(" ca : E; ca src = c; ca tgt = a;"); + println!(" }}"); + } + Some(other) => { + println!("Unknown help topic: {}", other); + println!("Available topics: syntax, examples"); + } + } +} + +/// Handle :list command +fn handle_list(state: &ReplState, target: ListTarget) { + match target { + ListTarget::Theories | ListTarget::All => { + let theories = state.list_theories(); + if theories.is_empty() { + println!("No theories defined."); + } else { + println!("Theories:"); + for t in theories { + let mut parts = vec![format!("{} sorts", t.num_sorts)]; + if t.num_functions > 0 { + parts.push(format!("{} functions", t.num_functions)); + } + if t.num_relations > 0 { + parts.push(format!("{} relations", t.num_relations)); + } + if t.num_axioms > 0 { + parts.push(format!("{} axioms", t.num_axioms)); + } + println!(" {} ({})", t.name, parts.join(", ")); + } + } + } + ListTarget::Instances => {} + } + + match target { + ListTarget::Instances | ListTarget::All => { + let instances = state.list_instances(); + if instances.is_empty() { + if matches!(target, ListTarget::Instances) { + println!("No instances defined."); + } + } else { + println!("Instances:"); + for i in instances { + println!( + " {} : {} ({} elements)", + i.name, i.theory_name, i.num_elements + ); + } + } + } + ListTarget::Theories => {} + } +} + +/// Handle :inspect command +fn handle_inspect(state: &ReplState, name: &str) { + match state.inspect(name) { + Some(InspectResult::Theory(detail)) => { + println!("{}", format_theory_detail(&detail)); + } + Some(InspectResult::Instance(detail)) => { + println!("{}", format_instance_detail(&detail)); + } + None => { + eprintln!("Not found: {}", name); + eprintln!("Use :list to see available theories and instances"); + } + } +} + +/// Handle :source command +fn handle_source(state: &mut ReplState, path: &PathBuf) { + match fs::read_to_string(path) { + Ok(source) => { + println!("Loading {}...", path.display()); + handle_geolog(state, &source); + } + Err(e) => { + eprintln!("Error reading {}: {}", path.display(), e); + } + } +} + +/// Handle :commit command +fn handle_commit(state: &mut ReplState, message: Option<&str>) { + if !state.is_dirty() { + println!("Nothing to commit."); + return; + } + + match state.commit(message) { + Ok(commit_slid) => { + let msg = message.unwrap_or("(no message)"); + println!("Committed: {} (commit #{})", msg, commit_slid); + } + Err(e) => { + eprintln!("Commit failed: {}", e); + } + } +} + +/// Handle :history command +fn handle_history(state: &ReplState) { + let history = state.commit_history(); + if history.is_empty() { + println!("No commits yet."); + return; + } + + println!("Commit history ({} commits):", history.len()); + for (i, commit_slid) in history.iter().enumerate() { + let marker = if Some(*commit_slid) == state.store.head { + " <- HEAD" + } else { + "" + }; + println!(" {}. commit #{}{}", i + 1, commit_slid, marker); + } +} + +/// Handle :add command +fn handle_add(state: &mut ReplState, instance_name: &str, element_name: &str, sort_name: &str) { + // Look up the instance in the Store + let Some((instance_slid, _)) = state.store.resolve_name(instance_name) else { + eprintln!("Instance '{}' not found", instance_name); + return; + }; + + // Look up the sort in the Store + // For now, we use a simple name-based lookup + // In full implementation, we'd look up the sort from the theory + let sort_slid = match state.store.resolve_name(sort_name) { + Some((slid, _)) => slid, + None => { + // Try to find sort in the theory + eprintln!( + "Sort '{}' not found. Note: Full sort lookup requires querying the theory.", + sort_name + ); + eprintln!("This feature is partially implemented pending query engine (geolog-7tt)."); + return; + } + }; + + match state.store.add_elem(instance_slid, sort_slid, element_name) { + Ok(elem_slid) => { + println!( + "Added element '{}' of sort '{}' to instance '{}' (elem #{})", + element_name, sort_name, instance_name, elem_slid + ); + } + Err(e) => { + eprintln!("Failed to add element: {}", e); + } + } +} + +/// Handle :assert command +fn handle_assert(state: &mut ReplState, instance_name: &str, relation_name: &str, args: &[String]) { + use geolog::core::RelationStorage; + + // Get the instance entry + let entry = match state.instances.get_mut(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory to look up the relation + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the relation by name + let sig = &theory.theory.signature; + let rel_id = match sig.relations.iter().position(|r| r.name == relation_name) { + Some(id) => id, + None => { + eprintln!( + "Relation '{}' not found in theory '{}'", + relation_name, entry.theory_name + ); + eprintln!("Available relations: {:?}", sig.relations.iter().map(|r| &r.name).collect::>()); + return; + } + }; + + let rel = &sig.relations[rel_id]; + + // Resolve argument elements by name from the instance's element_names map + let mut arg_slids = Vec::new(); + for arg_name in args { + if let Some(slid) = entry.element_names.get(arg_name) { + arg_slids.push(*slid); + } else { + eprintln!("Element '{}' not found in instance '{}'", arg_name, instance_name); + eprintln!("Available elements: {:?}", entry.element_names.keys().collect::>()); + return; + } + } + + // Check arity matches (for product domains, flatten the field count) + let expected_arity = match &rel.domain { + geolog::core::DerivedSort::Base(_) => 1, + geolog::core::DerivedSort::Product(fields) => fields.len(), + }; + + if arg_slids.len() != expected_arity { + eprintln!( + "Relation '{}' expects {} argument(s), got {}", + relation_name, expected_arity, arg_slids.len() + ); + return; + } + + // Add the tuple to the relation + if entry.structure.relations.len() <= rel_id { + eprintln!("Relation storage not initialized for relation {}", rel_id); + return; + } + + let already_present = entry.structure.relations[rel_id].contains(&arg_slids); + if already_present { + println!("Tuple already present in relation '{}'", relation_name); + return; + } + + entry.structure.relations[rel_id].insert(arg_slids.clone()); + + let arg_names: Vec<_> = args.to_vec(); + println!( + "Asserted {}({}) in instance '{}'", + relation_name, arg_names.join(", "), instance_name + ); +} + +/// Handle :retract command +fn handle_retract(state: &mut ReplState, instance_name: &str, element_name: &str) { + // Look up the instance + let Some((instance_slid, _)) = state.store.resolve_name(instance_name) else { + eprintln!("Instance '{}' not found", instance_name); + return; + }; + + // Look up the element + let Some((elem_slid, _)) = state.store.resolve_name(element_name) else { + eprintln!("Element '{}' not found", element_name); + return; + }; + + match state.store.retract_elem(instance_slid, elem_slid) { + Ok(retract_slid) => { + println!( + "Retracted element '{}' from instance '{}' (retraction #{})", + element_name, instance_name, retract_slid + ); + } + Err(e) => { + eprintln!("Failed to retract element: {}", e); + } + } +} + +/// Handle :query command +fn handle_query(state: &ReplState, instance_name: &str, sort_name: &str) { + match state.query_sort(instance_name, sort_name) { + Ok(elements) => { + if elements.is_empty() { + println!("No elements of sort '{}' in instance '{}'", sort_name, instance_name); + } else { + println!("Elements of {} in {}:", sort_name, instance_name); + for elem in elements { + println!(" {}", elem); + } + } + } + Err(e) => { + eprintln!("Query error: {}", e); + } + } +} + +/// Handle :explain command - show query execution plan +fn handle_explain(state: &ReplState, instance_name: &str, sort_name: &str) { + use geolog::query::QueryOp; + + // Get the instance + let entry = match state.instances.get(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t, + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the sort index + let sort_idx = match theory.theory.signature.sorts.iter().position(|s| s == sort_name) { + Some(idx) => idx, + None => { + eprintln!( + "Sort '{}' not found in theory '{}'", + sort_name, entry.theory_name + ); + return; + } + }; + + // Build the query plan (same as query_sort in repl.rs) + let plan = QueryOp::Scan { sort_idx }; + + // Display the plan using the Display impl + println!("Query plan for ':query {} {}':", instance_name, sort_name); + println!(); + println!("{}", plan); + println!(); + println!("Sort: {} (index {})", sort_name, sort_idx); + println!("Instance: {} (theory: {})", instance_name, entry.theory_name); +} + +/// Handle :compile command - compile query to RelAlgIR instance +fn handle_compile(state: &mut ReplState, instance_name: &str, sort_name: &str) { + use geolog::query::{to_relalg::compile_to_relalg, QueryOp}; + use geolog::universe::Universe; + + // Get the instance + let entry = match state.instances.get(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t, + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + // Find the sort index + let sort_idx = match theory.theory.signature.sorts.iter().position(|s| s == sort_name) { + Some(idx) => idx, + None => { + eprintln!( + "Sort '{}' not found in theory '{}'", + sort_name, entry.theory_name + ); + return; + } + }; + + // Check if RelAlgIR theory is loaded + let relalg_theory = match state.theories.get("RelAlgIR") { + Some(t) => t.clone(), + None => { + eprintln!("RelAlgIR theory not loaded. Loading it now..."); + // Try to load it + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .unwrap_or_else(|_| { + eprintln!("Could not read theories/GeologMeta.geolog"); + String::new() + }); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .unwrap_or_else(|_| { + eprintln!("Could not read theories/RelAlgIR.geolog"); + String::new() + }); + + if meta_content.is_empty() || ir_content.is_empty() { + return; + } + + if let Err(e) = state.execute_geolog(&meta_content) { + eprintln!("Failed to load GeologMeta: {}", e); + return; + } + if let Err(e) = state.execute_geolog(&ir_content) { + eprintln!("Failed to load RelAlgIR: {}", e); + return; + } + + state.theories.get("RelAlgIR").unwrap().clone() + } + }; + + // Build the query plan + let plan = QueryOp::Scan { sort_idx }; + + // Compile to RelAlgIR + let mut universe = Universe::new(); + match compile_to_relalg(&plan, &relalg_theory, &mut universe) { + Ok(instance) => { + println!("RelAlgIR compilation for ':query {} {}':", instance_name, sort_name); + println!(); + println!("QueryOp plan:"); + println!("{}", plan); + println!(); + println!("Compiled to RelAlgIR instance:"); + println!(" Elements: {}", instance.structure.len()); + println!(" Output wire: {:?}", instance.output_wire); + println!(); + + // Group elements by sort and show with sort names + let sig = &relalg_theory.theory.signature; + println!("Elements by sort:"); + for (sort_idx, sort_name) in sig.sorts.iter().enumerate() { + let count = instance.structure.carrier_size(sort_idx); + if count > 0 { + println!(" {}: {} element(s)", sort_name, count); + } + } + println!(); + + // Show named elements with their sorts + println!("Named elements:"); + for (slid, name) in instance.names.iter() { + let sort_idx = instance.structure.sorts[slid.index()]; + let sort_name = &sig.sorts[sort_idx]; + println!(" {} : {} = {:?}", name, sort_name, slid); + } + } + Err(e) => { + eprintln!("Failed to compile query to RelAlgIR: {}", e); + } + } +} + +/// Handle :solve command - find a model of a theory from scratch +fn handle_solve(state: &ReplState, theory_name: &str, budget_ms: Option) { + use geolog::solver::{solve, Budget, EnumerationResult}; + + // Look up the theory + let theory = match state.theories.get(theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", theory_name); + eprintln!("Use :list theories to see available theories"); + return; + } + }; + + println!("Solving theory '{}'...", theory_name); + let sig = &theory.theory.signature; + println!( + " {} sorts, {} functions, {} relations, {} axioms", + sig.sorts.len(), + sig.functions.len(), + sig.relations.len(), + theory.theory.axioms.len() + ); + + // Use unified solver API + let budget = Budget::new(budget_ms.unwrap_or(5000), 10000); + let result = solve(theory.clone(), budget); + + // Report result + match result { + EnumerationResult::Found { model, time_ms } => { + println!("✓ SOLVED in {:.2}ms", time_ms); + print_witness_structure(&model, sig); + } + EnumerationResult::Unsat { time_ms } => { + println!("✗ UNSAT in {:.2}ms", time_ms); + println!(" The theory has no models (derives False)."); + } + EnumerationResult::Incomplete { time_ms, reason, .. } => { + println!("◯ INCOMPLETE after {:.2}ms", time_ms); + println!(" {}", reason); + println!(" Try increasing the budget: :solve {} ", theory_name); + } + } +} + +/// Print a witness structure (model) to stdout +fn print_witness_structure(model: &geolog::core::Structure, sig: &geolog::core::Signature) { + use geolog::core::RelationStorage; + use geolog::id::NumericId; + + let total_elements: usize = (0..sig.sorts.len()) + .map(|s| model.carrier_size(s)) + .sum(); + + if total_elements == 0 { + println!("\nWitness: empty structure (trivial model)"); + } else { + println!("\nWitness structure:"); + // Show sorts with elements + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let size = model.carrier_size(sort_id); + if size > 0 { + if size <= 10 { + let ids: Vec = (0..size).map(|i| format!("#{}", i)).collect(); + println!(" {}: {{ {} }}", sort_name, ids.join(", ")); + } else { + println!(" {}: {} element(s)", sort_name, size); + } + } + } + // Show relations with tuples + for (rel_id, rel) in sig.relations.iter().enumerate() { + if rel_id < model.relations.len() { + let rel_storage = &model.relations[rel_id]; + let tuple_count = rel_storage.len(); + if tuple_count > 0 { + if tuple_count <= 10 { + let tuples: Vec = rel_storage + .iter() + .map(|t| { + let coords: Vec = + t.iter().map(|s| format!("#{}", s.index())).collect(); + format!("({})", coords.join(", ")) + }) + .collect(); + println!(" {}: {{ {} }}", rel.name, tuples.join(", ")); + } else { + println!(" {}: {} tuple(s)", rel.name, tuple_count); + } + } + } + } + } +} + +/// Handle :extend command - find extensions of an existing instance to a theory +/// +/// This uses the unified model enumeration API: `query(base, theory, budget)` finds +/// models of `theory` that extend `base`. This is the unified generalization of +/// `:solve` (where base is empty) and "find models extending M". +fn handle_extend(state: &ReplState, instance_name: &str, theory_name: &str, budget_ms: Option) { + use geolog::solver::{query, Budget, EnumerationResult}; + use geolog::universe::Universe; + + // Look up the base instance + let base_entry = match state.instances.get(instance_name) { + Some(entry) => entry, + None => { + eprintln!("Instance '{}' not found", instance_name); + eprintln!("Use :list instances to see available instances"); + return; + } + }; + + // Look up the extension theory + let theory = match state.theories.get(theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", theory_name); + eprintln!("Use :list theories to see available theories"); + return; + } + }; + + println!("Extending instance '{}' to theory '{}'...", instance_name, theory_name); + let sig = &theory.theory.signature; + println!( + " Base: {} (theory {})", + instance_name, base_entry.theory_name + ); + println!( + " Target: {} sorts, {} functions, {} relations, {} axioms", + sig.sorts.len(), + sig.functions.len(), + sig.relations.len(), + theory.theory.axioms.len() + ); + + // Clone base structure and create a fresh universe for the extension + // (The solver will allocate new elements as needed) + let base_structure = base_entry.structure.clone(); + let universe = Universe::new(); // Fresh universe for new allocations + + // Use unified query API + let budget = Budget::new(budget_ms.unwrap_or(5000), 10000); + let result = query(base_structure, universe, theory.clone(), budget); + + // Report result + match result { + EnumerationResult::Found { model, time_ms } => { + println!("✓ EXTENDED in {:.2}ms", time_ms); + print_witness_structure(&model, sig); + } + EnumerationResult::Unsat { time_ms } => { + println!("✗ NO EXTENSION EXISTS in {:.2}ms", time_ms); + println!(" The base instance cannot be extended to satisfy '{}'.", theory_name); + } + EnumerationResult::Incomplete { time_ms, reason, .. } => { + println!("◯ INCOMPLETE after {:.2}ms", time_ms); + println!(" {}", reason); + println!(" Try increasing the budget: :extend {} {} ", instance_name, theory_name); + } + } +} + +/// Handle :chase command - run chase algorithm on instance's theory axioms +fn handle_chase(state: &mut ReplState, instance_name: &str, max_iterations: Option) { + use geolog::core::RelationStorage; + use geolog::query::chase::chase_fixpoint; + + // Get the instance + let entry = match state.instances.get_mut(instance_name) { + Some(e) => e, + None => { + eprintln!("Instance '{}' not found", instance_name); + return; + } + }; + + // Get the theory + let theory = match state.theories.get(&entry.theory_name) { + Some(t) => t.clone(), + None => { + eprintln!("Theory '{}' not found", entry.theory_name); + return; + } + }; + + let sig = &theory.theory.signature; + let axioms = &theory.theory.axioms; + + if axioms.is_empty() { + println!("Theory '{}' has no axioms to chase.", entry.theory_name); + return; + } + + println!("Running chase on instance '{}' (theory '{}')...", instance_name, entry.theory_name); + println!(" {} axiom(s) to process", axioms.len()); + + // Snapshot relation tuple counts before chase + let tuple_counts_before: Vec = entry.structure.relations + .iter() + .map(|r| r.len()) + .collect(); + + // Run the chase (tensor-backed: handles existentials in premises, etc.) + let max_iter = max_iterations.unwrap_or(100); + let start = std::time::Instant::now(); + + match chase_fixpoint(axioms, &mut entry.structure, &mut state.store.universe, sig, max_iter) { + Ok(iterations) => { + let elapsed = start.elapsed(); + println!("✓ Chase completed in {} iterations ({:.2}ms)", iterations, elapsed.as_secs_f64() * 1000.0); + println!("\nStructure after chase:"); + print_structure_summary(&entry.structure, sig); + + // Check if any new tuples were added + let tuple_counts_after: Vec = entry.structure.relations + .iter() + .map(|r| r.len()) + .collect(); + let tuples_added = tuple_counts_before.iter() + .zip(tuple_counts_after.iter()) + .any(|(before, after)| after > before); + + // Save info needed for persistence before dropping entry borrow + let theory_name_owned = entry.theory_name.clone(); + + if tuples_added { + // Persist the chase results via columnar batches + // Note: This persists ALL current tuples, not just the delta. + // A more sophisticated implementation would track the delta. + if let Err(e) = persist_chase_results( + state, + instance_name, + &theory_name_owned, + ) { + eprintln!("Warning: Failed to persist chase results: {}", e); + } else { + println!("Chase results persisted to store."); + } + } + } + Err(e) => { + eprintln!("✗ Chase error: {}", e); + } + } +} + +/// Persist chase results (relation tuples) to columnar batches as IDB data. +/// +/// IDB batches are persisted locally but NOT transmitted over the wire. +/// Recipients recompute IDB by running the chase on received EDB patches. +fn persist_chase_results( + state: &mut ReplState, + instance_name: &str, + theory_name: &str, +) -> Result<(), String> { + use geolog::core::RelationStorage; + use geolog::id::{Slid, Uuid}; + use geolog::store::columnar::{InstanceDataBatch, RelationTupleBatch}; + + let entry = state.instances.get(instance_name).ok_or("Instance not found")?; + let structure = &entry.structure; + + // Resolve the instance in the Store + let (instance_slid, _) = state.store.resolve_name(instance_name) + .ok_or_else(|| format!("Instance '{}' not found in store", instance_name))?; + + // Get theory to map relation indices to Slids + let (theory_slid, _) = state.store.resolve_name(theory_name) + .ok_or_else(|| format!("Theory '{}' not found in store", theory_name))?; + + let rel_infos = state.store.query_theory_rels(theory_slid); + + // Build mapping from relation index to Rel UUID + let rel_idx_to_uuid: std::collections::HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, state.store.get_element_uuid(info.slid))) + .collect(); + + // Build mapping from Structure Slid to element UUID + // We need to find the Elem in GeologMeta that corresponds to each Structure element + let elem_infos = state.store.query_instance_elems(instance_slid); + let mut struct_slid_to_uuid: std::collections::HashMap = std::collections::HashMap::new(); + + // Map element names to UUIDs + for info in &elem_infos { + // Try to find the structure Slid by name + if let Some(&struct_slid) = entry.slid_to_name.iter() + .find(|(_, name)| *name == &info.name) + .map(|(slid, _)| slid) + { + struct_slid_to_uuid.insert(struct_slid, state.store.get_element_uuid(info.slid)); + } + } + + // For chase-created elements that might not have names in slid_to_name, + // use the structure's UUID mapping + for slid_u64 in structure.luids.iter().map(|_| 0).enumerate().map(|(i, _)| i) { + let slid = Slid::from_usize(slid_u64); + if !struct_slid_to_uuid.contains_key(&slid) + && let Some(uuid) = structure.get_uuid(slid, &state.store.universe) { + struct_slid_to_uuid.insert(slid, uuid); + } + } + + // Get instance UUID + let instance_uuid = state.store.get_element_uuid(instance_slid); + + // Build columnar batch as IDB (chase-derived, not wire-transmittable) + let mut batch = InstanceDataBatch::new_idb(); + + for (rel_idx, relation) in structure.relations.iter().enumerate() { + let rel_uuid = match rel_idx_to_uuid.get(&rel_idx) { + Some(u) => *u, + None => continue, + }; + + if relation.is_empty() { + continue; + } + + let arity = rel_infos.get(rel_idx).map(|r| r.domain.arity()).unwrap_or(1); + let field_ids: Vec = (0..arity).map(|_| Uuid::nil()).collect(); + + let mut rel_batch = RelationTupleBatch::new(instance_uuid, rel_uuid, field_ids); + + for tuple in relation.iter() { + let uuid_tuple: Vec = tuple + .iter() + .filter_map(|struct_slid| struct_slid_to_uuid.get(struct_slid).copied()) + .collect(); + + if uuid_tuple.len() == tuple.len() { + rel_batch.push(&uuid_tuple); + } + } + + if !rel_batch.is_empty() { + batch.relation_tuples.push(rel_batch); + } + } + + // Save the batch + if !batch.relation_tuples.is_empty() { + let existing_batches = state.store.load_instance_data_batches(instance_uuid) + .unwrap_or_default(); + let version = existing_batches.len() as u64; + state.store.save_instance_data_batch(instance_uuid, version, &batch)?; + } + + Ok(()) +} + +/// Handle query result from `query { ? : Type; }` syntax +fn handle_query_result(_state: &ReplState, result: QueryResult) { + match result { + QueryResult::Found { query_name, theory_name, model, time_ms } => { + println!("✓ Query '{}' SOLVED in {:.2}ms", query_name, time_ms); + println!(" Found model of theory '{}'", theory_name); + + // For now, print a basic summary. We don't have access to the signature here, + // so just show raw structure info. + let total_elements: usize = model.sorts.len(); + if total_elements == 0 { + println!("\n Witness: empty structure (trivial model)"); + } else { + println!("\n Witness structure: {} elements", total_elements); + // Count elements by sort + let mut sort_counts: std::collections::HashMap = std::collections::HashMap::new(); + for &sort_id in &model.sorts { + *sort_counts.entry(sort_id).or_insert(0) += 1; + } + for (sort_id, count) in sort_counts { + println!(" Sort {}: {} element(s)", sort_id, count); + } + } + } + QueryResult::Unsat { query_name, theory_name, time_ms } => { + println!("✗ Query '{}' UNSAT in {:.2}ms", query_name, time_ms); + println!(" No model of '{}' exists extending the base.", theory_name); + } + QueryResult::Incomplete { query_name, theory_name, reason, time_ms } => { + println!("◯ Query '{}' INCOMPLETE after {:.2}ms", query_name, time_ms); + println!(" Theory: {}", theory_name); + println!(" Reason: {}", reason); + } + } +} + +/// Print a summary of structure contents +fn print_structure_summary(structure: &geolog::core::Structure, sig: &geolog::core::Signature) { + use geolog::core::RelationStorage; + + // Show carriers + let total_elements: usize = (0..sig.sorts.len()) + .map(|s| structure.carrier_size(s)) + .sum(); + println!(" Elements: {} total", total_elements); + + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let size = structure.carrier_size(sort_id); + if size > 0 { + println!(" {}: {} element(s)", sort_name, size); + } + } + + // Show relations + let mut has_relations = false; + for (rel_id, rel) in sig.relations.iter().enumerate() { + if rel_id < structure.relations.len() { + let count = structure.relations[rel_id].len(); + if count > 0 { + if !has_relations { + println!(" Relations:"); + has_relations = true; + } + println!(" {}: {} tuple(s)", rel.name, count); + } + } + } +} + +/// Get the history file path +fn dirs_history_path() -> Option { + // Try to use standard config directory + if let Some(config_dir) = dirs_config_dir() { + let mut path = config_dir; + path.push("geolog"); + path.push("history"); + return Some(path); + } + None +} + +/// Get the config directory (cross-platform) +fn dirs_config_dir() -> Option { + // Simple implementation - use HOME/.config on Unix, APPDATA on Windows + #[cfg(unix)] + { + std::env::var("HOME").ok().map(|h| { + let mut p = PathBuf::from(h); + p.push(".config"); + p + }) + } + #[cfg(windows)] + { + std::env::var("APPDATA").ok().map(PathBuf::from) + } + #[cfg(not(any(unix, windows)))] + { + None + } +} diff --git a/src/cc.rs b/src/cc.rs new file mode 100644 index 0000000..46d247c --- /dev/null +++ b/src/cc.rs @@ -0,0 +1,258 @@ +//! Congruence Closure for equality reasoning. +//! +//! This module provides a union-find based congruence closure implementation +//! that can be used by both the solver (for model enumeration) and the chase +//! (for computing derived relations with equality saturation). +//! +//! # Key Types +//! +//! - [`CongruenceClosure`]: Main struct wrapping union-find + pending equation queue +//! - [`PendingEquation`]: An equation waiting to be processed +//! - [`EquationReason`]: Why an equation was created (for debugging/explanation) +//! +//! # Usage +//! +//! ```ignore +//! use geolog::cc::{CongruenceClosure, EquationReason}; +//! +//! let mut cc = CongruenceClosure::new(); +//! +//! // Add equation: a = b +//! cc.add_equation(a, b, EquationReason::UserAsserted); +//! +//! // Process pending equations +//! while let Some(eq) = cc.pop_pending() { +//! cc.merge(eq.lhs, eq.rhs); +//! // Check for function conflicts, add congruence equations... +//! } +//! +//! // Query equivalence +//! assert!(cc.are_equal(a, b)); +//! ``` + +use std::collections::VecDeque; + +use egglog_union_find::UnionFind; + +use crate::id::{NumericId, Slid}; + +/// A pending equation to be processed. +/// +/// Equations arise from: +/// 1. Function conflicts: `f(a) = x` and `f(a) = y` implies `x = y` +/// 2. Axiom consequents: `∀x. P(x) → x = y` +/// 3. Record projections: `[fst: a, snd: b].fst = a` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PendingEquation { + /// Left-hand side element + pub lhs: Slid, + /// Right-hand side element + pub rhs: Slid, + /// Reason for the equation (for debugging/explanation) + pub reason: EquationReason, +} + +/// Reason an equation was created +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum EquationReason { + /// Function already maps domain to different values + FunctionConflict { func_id: usize, domain: Slid }, + /// Axiom consequent required this equality + AxiomConsequent { axiom_idx: usize }, + /// User asserted this equation + UserAsserted, + /// Congruence: f(a) = f(b) because a = b + Congruence { func_id: usize }, + /// Chase-derived: equality conclusion in chase + ChaseConclusion, +} + +/// Congruence closure state. +/// +/// This wraps a union-find structure and pending equation queue, +/// providing methods for merging elements and tracking equivalence classes. +/// +/// Note: This struct handles the union-find bookkeeping but does NOT +/// automatically propagate through function applications. The caller +/// (solver or chase) is responsible for detecting function conflicts +/// and adding congruence equations. +#[derive(Clone)] +pub struct CongruenceClosure { + /// Union-find for tracking equivalence classes + /// Uses Slid indices as keys + pub uf: UnionFind, + /// Pending equations to process + pub pending: VecDeque, + /// Number of merges performed (for statistics) + pub merge_count: usize, +} + +impl std::fmt::Debug for CongruenceClosure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CongruenceClosure") + .field("pending", &self.pending) + .field("merge_count", &self.merge_count) + .finish_non_exhaustive() + } +} + +impl Default for CongruenceClosure { + fn default() -> Self { + Self::new() + } +} + +impl CongruenceClosure { + /// Create a new congruence closure + pub fn new() -> Self { + Self { + uf: UnionFind::default(), + pending: VecDeque::new(), + merge_count: 0, + } + } + + /// Find the canonical representative of an element + /// Note: The UnionFind automatically reserves space as needed + pub fn find(&mut self, slid: Slid) -> usize { + self.uf.find(slid.index()) + } + + /// Check if two elements are in the same equivalence class + pub fn are_equal(&mut self, a: Slid, b: Slid) -> bool { + self.find(a) == self.find(b) + } + + /// Add a pending equation + pub fn add_equation(&mut self, lhs: Slid, rhs: Slid, reason: EquationReason) { + self.pending.push_back(PendingEquation { lhs, rhs, reason }); + } + + /// Pop the next pending equation, if any + pub fn pop_pending(&mut self) -> Option { + self.pending.pop_front() + } + + /// Check if there are pending equations + pub fn has_pending(&self) -> bool { + !self.pending.is_empty() + } + + /// Merge two elements, returning true if they were not already equal + pub fn merge(&mut self, a: Slid, b: Slid) -> bool { + let a_idx = a.index(); + let b_idx = b.index(); + + let ra = self.uf.find(a_idx); + let rb = self.uf.find(b_idx); + + if ra != rb { + self.uf.union(ra, rb); + self.merge_count += 1; + true + } else { + false + } + } + + /// Get the canonical Slid for an element + /// + /// Note: This returns a Slid with the canonical index, but the actual + /// element in the Structure is still at the original Slid. + pub fn canonical(&mut self, slid: Slid) -> Slid { + let idx = self.find(slid); + Slid::from_usize(idx) + } + + /// Get the number of elements tracked + pub fn num_elements(&self) -> usize { + self.merge_count + self.pending.len() // approximation + } + + /// Get statistics about the congruence closure: (merges, pending) + pub fn stats(&self) -> (usize, usize) { + (self.merge_count, self.pending.len()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_congruence_closure_basic() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + let c = Slid::from_usize(2); + + // Initially all different + assert!(!cc.are_equal(a, b)); + assert!(!cc.are_equal(b, c)); + assert!(!cc.are_equal(a, c)); + + // Merge a and b + assert!(cc.merge(a, b)); + assert!(cc.are_equal(a, b)); + assert!(!cc.are_equal(b, c)); + + // Merge b and c (should transitively merge a and c) + assert!(cc.merge(b, c)); + assert!(cc.are_equal(a, c)); + assert!(cc.are_equal(a, b)); + assert!(cc.are_equal(b, c)); + + // Merging already equal elements returns false + assert!(!cc.merge(a, c)); + } + + #[test] + fn test_congruence_closure_pending() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + + assert!(!cc.has_pending()); + + cc.add_equation(a, b, EquationReason::UserAsserted); + assert!(cc.has_pending()); + + let eq = cc.pop_pending().unwrap(); + assert_eq!(eq.lhs, a); + assert_eq!(eq.rhs, b); + assert!(!cc.has_pending()); + } + + #[test] + fn test_congruence_closure_stats() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(0); + let b = Slid::from_usize(1); + + assert_eq!(cc.stats(), (0, 0)); + + cc.merge(a, b); + assert_eq!(cc.stats(), (1, 0)); + + cc.add_equation(a, b, EquationReason::UserAsserted); + assert_eq!(cc.stats(), (1, 1)); + } + + #[test] + fn test_canonical() { + let mut cc = CongruenceClosure::new(); + let a = Slid::from_usize(5); + let b = Slid::from_usize(10); + + // Before merge, each is its own canonical + let ca = cc.canonical(a); + let cb = cc.canonical(b); + assert_ne!(ca, cb); + + // After merge, both have same canonical + cc.merge(a, b); + let ca2 = cc.canonical(a); + let cb2 = cc.canonical(b); + assert_eq!(ca2, cb2); + } +} diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 0000000..00b93b3 --- /dev/null +++ b/src/core.rs @@ -0,0 +1,1511 @@ +//! Core internal representation for Geolog +//! +//! This is the typed, elaborated representation — closer to Owen's Lean formalization. +//! Surface syntax (ast.rs) elaborates into these types. + +use std::collections::HashMap; + +/// A unique identifier for sorts, used internally +pub type SortId = usize; + +/// A unique identifier for function symbols +pub type FuncId = usize; + +/// A unique identifier for relation symbols +pub type RelId = usize; + +/// Derived sorts: base sorts or products of derived sorts +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum DerivedSort { + /// A base sort + Base(SortId), + /// A product of derived sorts (record/tuple) + Product(Vec<(String, DerivedSort)>), +} + +impl DerivedSort { + pub fn base(id: SortId) -> Self { + DerivedSort::Base(id) + } + + pub fn product(fields: Vec<(String, DerivedSort)>) -> Self { + DerivedSort::Product(fields) + } + + pub fn unit() -> Self { + DerivedSort::Product(vec![]) + } + + /// Returns the arity (number of atomic sorts) of this derived sort. + /// For Product([x: A, y: B]), arity is 2. + /// For Base(s), arity is 1. + pub fn arity(&self) -> usize { + match self { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + } + } + + /// Returns the cardinality of this derived sort in a given structure. + /// + /// For Base(s), returns the carrier size of sort s. + /// For Product([x: A, y: B, ...]), returns the product of cardinalities. + /// An empty product (unit type) has cardinality 1. + pub fn cardinality(&self, structure: &Structure) -> usize { + match self { + DerivedSort::Base(sort_id) => structure.carrier_size(*sort_id), + DerivedSort::Product(fields) => { + if fields.is_empty() { + 1 // Unit type has one inhabitant + } else { + fields + .iter() + .map(|(_, field_sort)| field_sort.cardinality(structure)) + .product() + } + } + } + } +} + +/// A function symbol with its domain and codomain +#[derive(Clone, Debug)] +pub struct FunctionSymbol { + pub name: String, + pub domain: DerivedSort, + pub codomain: DerivedSort, +} + +/// A relation symbol with its domain (relations have no codomain — they're predicates) +#[derive(Clone, Debug)] +pub struct RelationSymbol { + pub name: String, + pub domain: DerivedSort, +} + +/// An instance field declaration (a field that holds a sub-instance) +/// e.g., `initial_marking : N Marking instance;` +#[derive(Clone, Debug)] +pub struct InstanceFieldSymbol { + pub name: String, + /// The theory type expression (e.g., "N Marking" as a string for now) + /// This needs to be resolved with actual parameter bindings during instance elaboration + pub theory_type: String, +} + +/// A signature: sorts + function symbols + relation symbols + instance fields +#[derive(Clone, Debug, Default)] +pub struct Signature { + /// Sort names, indexed by SortId + pub sorts: Vec, + /// Map from sort name to SortId + pub sort_names: HashMap, + /// Function symbols + pub functions: Vec, + /// Map from function name to FuncId + pub func_names: HashMap, + /// Relation symbols + pub relations: Vec, + /// Map from relation name to RelId + pub rel_names: HashMap, + /// Instance field declarations (fields that hold sub-instances) + pub instance_fields: Vec, + /// Map from instance field name to index + pub instance_field_names: HashMap, +} + +impl Signature { + pub fn new() -> Self { + Self::default() + } + + pub fn add_sort(&mut self, name: String) -> SortId { + let id = self.sorts.len(); + self.sort_names.insert(name.clone(), id); + self.sorts.push(name); + id + } + + pub fn add_function( + &mut self, + name: String, + domain: DerivedSort, + codomain: DerivedSort, + ) -> FuncId { + let id = self.functions.len(); + self.func_names.insert(name.clone(), id); + self.functions.push(FunctionSymbol { + name, + domain, + codomain, + }); + id + } + + pub fn add_relation(&mut self, name: String, domain: DerivedSort) -> RelId { + let id = self.relations.len(); + self.rel_names.insert(name.clone(), id); + self.relations.push(RelationSymbol { name, domain }); + id + } + + pub fn lookup_sort(&self, name: &str) -> Option { + self.sort_names.get(name).copied() + } + + pub fn lookup_func(&self, name: &str) -> Option { + self.func_names.get(name).copied() + } + + pub fn lookup_rel(&self, name: &str) -> Option { + self.rel_names.get(name).copied() + } + + /// Add an instance field declaration. + /// Returns the field index (0-based). + pub fn add_instance_field(&mut self, name: String, theory_type: String) -> usize { + let id = self.instance_fields.len(); + self.instance_field_names.insert(name.clone(), id); + self.instance_fields.push(InstanceFieldSymbol { name, theory_type }); + id + } + + /// Look up an instance field by name + pub fn lookup_instance_field(&self, name: &str) -> Option { + self.instance_field_names.get(name).copied() + } +} + +// ============ Relation Storage ============ + +use crate::id::{NumericId, Slid}; +use roaring::RoaringTreemap; + +/// Tuple ID: index into the append-only tuple log +pub type TupleId = usize; + +/// Trait for relation storage implementations. +/// +/// Different implementations optimize for different access patterns: +/// - VecRelation: append-only log + membership bitmap (good for patches) +/// - Future: Dancing Cells for backtracking, multi-order tries for joins +pub trait RelationStorage { + /// Check if a tuple is in the relation + fn contains(&self, tuple: &[Slid]) -> bool; + + /// Insert a tuple, returns true if newly inserted + fn insert(&mut self, tuple: Vec) -> bool; + + /// Remove a tuple by marking it as not in extent, returns true if was present + fn remove(&mut self, tuple: &[Slid]) -> bool; + + /// Number of tuples currently in the relation + fn len(&self) -> usize; + + /// Check if empty + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Arity of tuples in this relation + fn arity(&self) -> usize; +} + +/// Append-only tuple log with membership bitmap. +/// +/// Tuples are assigned stable IDs (their index in the log). +/// The extent bitmap tracks which tuples are currently "true". +/// This representation handles cardinality changes gracefully since +/// tuple IDs are independent of sort cardinalities. +#[derive(Clone, Debug)] +pub struct VecRelation { + /// Arity of this relation (number of Slids per tuple) + pub arity: usize, + /// Append-only log of all tuples ever inserted + pub tuples: Vec>, + /// Map from tuple to its ID (for O(1) lookup) + pub tuple_to_id: HashMap, TupleId>, + /// Bitmap of tuple IDs currently in the extent + pub extent: RoaringTreemap, +} + +impl VecRelation { + /// Create a new empty relation with given arity + pub fn new(arity: usize) -> Self { + Self { + arity, + tuples: Vec::new(), + tuple_to_id: HashMap::new(), + extent: RoaringTreemap::new(), + } + } + + /// Get a tuple by ID + pub fn get_tuple(&self, id: TupleId) -> Option<&[Slid]> { + self.tuples.get(id).map(|v| v.as_slice()) + } + + /// Iterate over all tuples currently in the extent + pub fn iter(&self) -> impl Iterator + '_ { + self.extent.iter().filter_map(|id| self.tuples.get(id as usize).map(|v| v.as_slice())) + } + + /// Iterate over tuple IDs currently in the extent + pub fn iter_ids(&self) -> impl Iterator + '_ { + self.extent.iter().map(|id| id as TupleId) + } +} + +impl RelationStorage for VecRelation { + fn contains(&self, tuple: &[Slid]) -> bool { + if let Some(&id) = self.tuple_to_id.get(tuple) { + self.extent.contains(id as u64) + } else { + false + } + } + + fn insert(&mut self, tuple: Vec) -> bool { + debug_assert_eq!(tuple.len(), self.arity, "tuple arity mismatch"); + + if let Some(&id) = self.tuple_to_id.get(&tuple) { + // Tuple exists in log, just mark as present + if self.extent.contains(id as u64) { + false // Already present + } else { + self.extent.insert(id as u64); + true + } + } else { + // New tuple, append to log + let id = self.tuples.len(); + self.tuple_to_id.insert(tuple.clone(), id); + self.tuples.push(tuple); + self.extent.insert(id as u64); + true + } + } + + fn remove(&mut self, tuple: &[Slid]) -> bool { + if let Some(&id) = self.tuple_to_id.get(tuple) { + self.extent.remove(id as u64) + } else { + false + } + } + + fn len(&self) -> usize { + self.extent.len() as usize + } + + fn arity(&self) -> usize { + self.arity + } +} + +/// A typing context: a list of (variable_name, sort) pairs +#[derive(Clone, Debug, Default)] +pub struct Context { + /// Variables in scope, with their sorts + pub vars: Vec<(String, DerivedSort)>, +} + +impl Context { + pub fn new() -> Self { + Self::default() + } + + pub fn extend(&self, name: String, sort: DerivedSort) -> Self { + let mut new_ctx = self.clone(); + new_ctx.vars.push((name, sort)); + new_ctx + } + + pub fn lookup(&self, name: &str) -> Option<(usize, &DerivedSort)> { + self.vars + .iter() + .enumerate() + .rev() + .find(|(_, (n, _))| n == name) + .map(|(i, (_, s))| (i, s)) + } +} + +/// A well-typed term +#[derive(Clone, Debug)] +pub enum Term { + /// Variable reference (de Bruijn index would be cleaner, but names are more debuggable) + Var(String, DerivedSort), + /// Function application + App(FuncId, Box), + /// Record/tuple construction + Record(Vec<(String, Term)>), + /// Field projection + Project(Box, String), +} + +impl Term { + /// Get the sort of this term + pub fn sort(&self, sig: &Signature) -> DerivedSort { + match self { + Term::Var(_, s) => s.clone(), + Term::App(f, _) => sig.functions[*f].codomain.clone(), + Term::Record(fields) => DerivedSort::Product( + fields + .iter() + .map(|(n, t)| (n.clone(), t.sort(sig))) + .collect(), + ), + Term::Project(t, field) => { + if let DerivedSort::Product(fields) = t.sort(sig) { + fields + .into_iter() + .find(|(n, _)| n == field) + .map(|(_, s)| s) + .expect("field not found in product") + } else { + panic!("projection on non-product") + } + } + } + } +} + +/// A well-typed geometric formula +#[derive(Clone, Debug)] +pub enum Formula { + /// Relation application: R(t) where R is a relation symbol and t is a term + Rel(RelId, Term), + /// Truth + True, + /// Falsity + False, + /// Conjunction + Conj(Vec), + /// Disjunction (infinitary in general, but finite for now) + Disj(Vec), + /// Equality of terms (must have same sort) + Eq(Term, Term), + /// Existential quantification + Exists(String, DerivedSort, Box), +} + +/// A sequent: premise ⊢ conclusion (both in the same context) +#[derive(Clone, Debug)] +pub struct Sequent { + /// The context (bound variables) + pub context: Context, + /// The premise (antecedent) + pub premise: Formula, + /// The conclusion (consequent) + pub conclusion: Formula, +} + +/// A theory: a signature plus a set of axioms (sequents) +#[derive(Clone, Debug)] +pub struct Theory { + pub name: String, + pub signature: Signature, + pub axioms: Vec, + /// Axiom names (parallel to axioms vec), e.g. "ax/input_complete" + pub axiom_names: Vec, +} + +/// A theory can have parameters (other theories it depends on) +/// Note: This is forward-declared; the actual type is `Rc` +/// but we can't reference it here due to ordering. We use a type alias. +#[derive(Clone, Debug)] +pub struct TheoryParam { + pub name: String, + // This will be an Rc in practice + pub theory_name: String, +} + +/// An elaborated theory with its parameters +#[derive(Clone, Debug)] +pub struct ElaboratedTheory { + pub params: Vec, + pub theory: Theory, +} + +// ============ Structures (instances/models) ============ + +use crate::id::{Luid, OptLuid, OptSlid, SortSlid, Uuid, get_slid, some_slid}; +use crate::universe::Universe; + +/// A function column: either local (Slid) or external (Luid) references. +/// +/// For functions with local codomain (e.g., `src : in -> P` where P is local), +/// we use `Local(Vec)` for tight columnar storage. +/// +/// For functions with external codomain (e.g., `token/of : token -> N/P` where +/// N/P comes from a parent instance), we use `External(Vec)` to +/// reference elements in the parent by their Luid. +/// Storage for product-domain functions. +/// +/// Uses nested Vecs for efficient access and natural handling of carrier growth. +/// Sort-local indices are append-only, so existing indices remain stable when +/// carriers grow — we just extend the inner/outer Vecs. +#[derive(Clone, Debug)] +pub enum ProductStorage { + /// Binary product `[x: A, y: B]` → `Vec>` + /// Outer dim is A (first field), inner is B (second field). + /// Access: `rows[x_local][y_local]` + Binary(Vec>), + + /// Ternary product `[x: A, y: B, z: C]` → `Vec>>` + Ternary(Vec>>), + + /// Higher-arity products: fall back to HashMap for flexibility. + /// Keys are tuples of sort-local indices. + General(HashMap, Slid>), +} + +impl ProductStorage { + /// Create storage for binary product with given carrier sizes + pub fn new_binary(size_a: usize, size_b: usize) -> Self { + ProductStorage::Binary(vec![vec![None; size_b]; size_a]) + } + + /// Create storage for ternary product with given carrier sizes + pub fn new_ternary(size_a: usize, size_b: usize, size_c: usize) -> Self { + ProductStorage::Ternary(vec![vec![vec![None; size_c]; size_b]; size_a]) + } + + /// Create storage for general (n-ary) product + pub fn new_general() -> Self { + ProductStorage::General(HashMap::new()) + } + + /// Create storage based on arity and carrier sizes + pub fn new(carrier_sizes: &[usize]) -> Self { + match carrier_sizes.len() { + 2 => Self::new_binary(carrier_sizes[0], carrier_sizes[1]), + 3 => Self::new_ternary(carrier_sizes[0], carrier_sizes[1], carrier_sizes[2]), + _ => Self::new_general(), + } + } + + /// Get value at the given tuple of sort-local indices + pub fn get(&self, tuple: &[usize]) -> Option { + match self { + ProductStorage::Binary(rows) => { + debug_assert_eq!(tuple.len(), 2); + let opt = rows.get(tuple[0])?.get(tuple[1])?; + get_slid(*opt) + } + ProductStorage::Ternary(planes) => { + debug_assert_eq!(tuple.len(), 3); + let opt = planes.get(tuple[0])?.get(tuple[1])?.get(tuple[2])?; + get_slid(*opt) + } + ProductStorage::General(map) => map.get(tuple).copied(), + } + } + + /// Set value at the given tuple of sort-local indices + /// Returns Err if conflicting definition exists + pub fn set(&mut self, tuple: &[usize], value: Slid) -> Result<(), Slid> { + match self { + ProductStorage::Binary(rows) => { + debug_assert_eq!(tuple.len(), 2); + // Grow if needed (append-only growth) + while rows.len() <= tuple[0] { + rows.push(Vec::new()); + } + while rows[tuple[0]].len() <= tuple[1] { + rows[tuple[0]].push(None); + } + if let Some(existing) = get_slid(rows[tuple[0]][tuple[1]]) + && existing != value { + return Err(existing); + } + rows[tuple[0]][tuple[1]] = some_slid(value); + Ok(()) + } + ProductStorage::Ternary(planes) => { + debug_assert_eq!(tuple.len(), 3); + while planes.len() <= tuple[0] { + planes.push(Vec::new()); + } + while planes[tuple[0]].len() <= tuple[1] { + planes[tuple[0]].push(Vec::new()); + } + while planes[tuple[0]][tuple[1]].len() <= tuple[2] { + planes[tuple[0]][tuple[1]].push(None); + } + if let Some(existing) = get_slid(planes[tuple[0]][tuple[1]][tuple[2]]) + && existing != value { + return Err(existing); + } + planes[tuple[0]][tuple[1]][tuple[2]] = some_slid(value); + Ok(()) + } + ProductStorage::General(map) => { + if let Some(&existing) = map.get(tuple) + && existing != value { + return Err(existing); + } + map.insert(tuple.to_vec(), value); + Ok(()) + } + } + } + + /// Count of defined (Some) entries + pub fn defined_count(&self) -> usize { + match self { + ProductStorage::Binary(rows) => rows + .iter() + .flat_map(|row| row.iter()) + .filter(|&&v| v.is_some()) + .count(), + ProductStorage::Ternary(planes) => planes + .iter() + .flat_map(|plane| plane.iter()) + .flat_map(|row| row.iter()) + .filter(|&&v| v.is_some()) + .count(), + ProductStorage::General(map) => map.len(), + } + } + + /// Check if all entries are defined (total function) + pub fn is_total(&self, carrier_sizes: &[usize]) -> bool { + let expected = carrier_sizes.iter().product::(); + self.defined_count() == expected + } + + /// Iterate over all defined entries as (tuple, value) pairs + pub fn iter_defined(&self) -> Box, Slid)> + '_> { + match self { + ProductStorage::Binary(rows) => Box::new( + rows.iter() + .enumerate() + .flat_map(|(i, row)| { + row.iter().enumerate().filter_map(move |(j, &v)| { + get_slid(v).map(|s| (vec![i, j], s)) + }) + }), + ), + ProductStorage::Ternary(planes) => Box::new( + planes.iter().enumerate().flat_map(|(i, plane)| { + plane.iter().enumerate().flat_map(move |(j, row)| { + row.iter().enumerate().filter_map(move |(k, &v)| { + get_slid(v).map(|s| (vec![i, j, k], s)) + }) + }) + }), + ), + ProductStorage::General(map) => { + Box::new(map.iter().map(|(k, &v)| (k.clone(), v))) + } + } + } +} + +#[derive(Clone, Debug)] +pub enum FunctionColumn { + /// Base domain with local codomain: values are Slids within this structure + Local(Vec), + /// Base domain with external codomain (from parent): values are Luids + External(Vec), + /// Product domain with local codomain. + /// Stores field sort IDs for carrier size lookups during growth. + ProductLocal { + storage: ProductStorage, + field_sorts: Vec, + }, + /// Base domain with product codomain (multiple fields). + /// Each domain element maps to a tuple of codomain Slids. + ProductCodomain { + /// One column per field - field_columns[i][domain_idx] = codomain Slid for field i + field_columns: Vec>, + /// Field names in order + field_names: Vec, + /// Sort IDs for each codomain field + field_sorts: Vec, + /// Domain sort ID (for carrier size lookups during growth) + domain_sort: SortId, + }, +} + +/// Linearize a tuple of sort-local indices into a flat column index. +/// Uses row-major (lexicographic) order. +/// E.g., for field_sizes = [3, 4], tuple [1, 2] → 1*4 + 2 = 6 +pub fn linearize_tuple(tuple: &[usize], field_sizes: &[usize]) -> usize { + debug_assert_eq!(tuple.len(), field_sizes.len()); + let mut index = 0; + let mut stride = 1; + // Process in reverse for row-major order + for (i, &size) in field_sizes.iter().enumerate().rev() { + index += tuple[i] * stride; + stride *= size; + } + index +} + +/// Delinearize a flat column index back to tuple of sort-local indices. +pub fn delinearize_index(mut index: usize, field_sizes: &[usize]) -> Vec { + let mut tuple = vec![0; field_sizes.len()]; + // Process in reverse for row-major order + for (i, &size) in field_sizes.iter().enumerate().rev() { + tuple[i] = index % size; + index /= size; + } + tuple +} + +/// Compute total size of product domain (product of field carrier sizes) +pub fn product_domain_size(field_sizes: &[usize]) -> usize { + field_sizes.iter().product() +} + +impl FunctionColumn { + /// Get the total number of domain slots (for base domains only). + /// For product domains, returns 0 — use `defined_count()` instead. + pub fn len(&self) -> usize { + match self { + FunctionColumn::Local(v) => v.len(), + FunctionColumn::External(v) => v.len(), + FunctionColumn::ProductLocal { .. } => 0, // Product domains have dynamic size + FunctionColumn::ProductCodomain { field_columns, .. } => { + field_columns.first().map(|c| c.len()).unwrap_or(0) + } + } + } + + /// Get the number of defined entries (not total slots) + pub fn defined_count(&self) -> usize { + match self { + FunctionColumn::Local(v) => v.iter().filter(|x| x.is_some()).count(), + FunctionColumn::External(v) => v.iter().filter(|x| x.is_some()).count(), + FunctionColumn::ProductLocal { storage, .. } => storage.defined_count(), + FunctionColumn::ProductCodomain { field_columns, .. } => { + // Count entries where ALL fields are defined + if field_columns.is_empty() { + return 0; + } + let len = field_columns[0].len(); + (0..len) + .filter(|&i| field_columns.iter().all(|col| col.get(i).and_then(|x| *x).is_some())) + .count() + } + } + } + + /// Check if empty (no defined entries) + pub fn is_empty(&self) -> bool { + self.defined_count() == 0 + } + + /// Check if this is a local column (base domain, local codomain) + pub fn is_local(&self) -> bool { + matches!(self, FunctionColumn::Local(_)) + } + + /// Check if this is a product codomain column + pub fn is_product_codomain(&self) -> bool { + matches!(self, FunctionColumn::ProductCodomain { .. }) + } + + /// Check if this is an external column (base domain, external codomain) + pub fn is_external(&self) -> bool { + matches!(self, FunctionColumn::External(_)) + } + + /// Check if this is a product-domain column + pub fn is_product(&self) -> bool { + matches!(self, FunctionColumn::ProductLocal { .. }) + } + + /// Get local value at index (panics if not local or out of bounds) + pub fn get_local(&self, idx: usize) -> OptSlid { + match self { + FunctionColumn::Local(v) => v[idx], + FunctionColumn::External(_) => panic!("get_local called on external column"), + FunctionColumn::ProductLocal { .. } => panic!("get_local called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("get_local called on product codomain column"), + } + } + + /// Get external value at index (panics if not external or out of bounds) + pub fn get_external(&self, idx: usize) -> OptLuid { + match self { + FunctionColumn::External(v) => v[idx], + FunctionColumn::Local(_) => panic!("get_external called on local column"), + FunctionColumn::ProductLocal { .. } => panic!("get_external called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("get_external called on product codomain column"), + } + } + + /// Iterate over local values (panics if not local) + pub fn iter_local(&self) -> impl Iterator { + match self { + FunctionColumn::Local(v) => v.iter(), + FunctionColumn::External(_) => panic!("iter_local called on external column"), + FunctionColumn::ProductLocal { .. } => panic!("iter_local called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("iter_local called on product codomain column"), + } + } + + /// Iterate over external values (panics if not external) + pub fn iter_external(&self) -> impl Iterator { + match self { + FunctionColumn::External(v) => v.iter(), + FunctionColumn::Local(_) => panic!("iter_external called on local column"), + FunctionColumn::ProductLocal { .. } => panic!("iter_external called on product domain column"), + FunctionColumn::ProductCodomain { .. } => panic!("iter_external called on product codomain column"), + } + } + + /// Get as local column (returns None if external or product) + pub fn as_local(&self) -> Option<&Vec> { + match self { + FunctionColumn::Local(v) => Some(v), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get as mutable local column (returns None if external or product) + pub fn as_local_mut(&mut self) -> Option<&mut Vec> { + match self { + FunctionColumn::Local(v) => Some(v), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get product value for a tuple of sort-local indices + pub fn get_product(&self, tuple: &[usize]) -> Option { + match self { + FunctionColumn::ProductLocal { storage, .. } => storage.get(tuple), + _ => None, + } + } + + /// Get field sort IDs for product column (returns None if not product) + pub fn field_sorts(&self) -> Option<&[SortId]> { + match self { + FunctionColumn::ProductLocal { field_sorts, .. } => Some(field_sorts), + _ => None, + } + } + + /// Get product storage (returns None if not product) + pub fn as_product(&self) -> Option<&ProductStorage> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage), + _ => None, + } + } + + /// Get mutable product storage (returns None if not product) + pub fn as_product_mut(&mut self) -> Option<&mut ProductStorage> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage), + _ => None, + } + } + + /// Iterate over defined product entries as (tuple, value) pairs + pub fn iter_product_defined(&self) -> Option, Slid)> + '_>> { + match self { + FunctionColumn::ProductLocal { storage, .. } => Some(storage.iter_defined()), + _ => None, + } + } +} + +/// A structure: interpretation of a signature in FinSet +/// +/// This is a model/instance of a theory — a functor from the signature to FinSet: +/// - Each sort maps to a finite set of elements +/// - Each function symbol maps to a function between those sets +/// - Each relation symbol maps to a set of tuples (subset of product of carriers) +/// +/// Elements are identified by Luids (Locally Universal IDs) which reference +/// UUIDs in the global Universe. This allows efficient integer operations +/// while maintaining stable identity across versions. +/// +/// Note: Human-readable names are stored separately in a NamingIndex, keyed by UUID. +/// This structure contains only UUIDs and their relationships. +#[derive(Clone, Debug)] +pub struct Structure { + /// Reference to the theory this is an instance of (Luid of the Theory element) + /// None for structures that ARE theories (e.g., GeologMeta instances) + pub theory_luid: Option, + + /// Global identity: Slid → Luid (references Universe for UUID lookup) + pub luids: Vec, + + /// Reverse lookup: Luid → Slid (for finding elements by their global ID) + pub luid_to_slid: HashMap, + + /// Element sorts: Slid → SortId + pub sorts: Vec, + + /// Carriers: SortId → RoaringTreemap of Slids in that sort + pub carriers: Vec, + + /// Functions: FuncId → FunctionColumn + /// Each column is indexed by domain SortSlid and contains codomain references. + /// Local codomains use Slid; external codomains (from parents) use Luid. + pub functions: Vec, + + /// Relations: RelId → VecRelation (append-only tuple log + membership bitmap) + pub relations: Vec, + + /// Parent instances for parameterized theories (virtual import). + /// Maps param name → UUID of immutable parent instance. + /// E.g., for `problem0 : ExampleNet ReachabilityProblem`, this contains {"N": uuid_of_ExampleNet} + pub parents: HashMap, + + /// Nested structures (for instance-valued fields). + /// Maps field name → nested Structure. + /// E.g., for `initial_marking = { ... }`, this contains {"initial_marking": Structure} + pub nested: HashMap, +} + +/// Function init info: domain sort ID and whether codomain is external +#[derive(Clone, Debug)] +pub struct FunctionInitInfo { + pub domain_sort_id: Option, + pub codomain_is_external: bool, +} + +/// Domain info for function initialization +#[derive(Clone, Debug)] +pub enum FunctionDomainInfo { + /// Base sort domain: just the sort ID + Base(SortId), + /// Product domain: list of sort IDs for each field + Product(Vec), +} + +/// Full function initialization info (domain + codomain) +#[derive(Clone, Debug)] +pub struct FunctionFullInfo { + pub domain: FunctionDomainInfo, + pub codomain: FunctionCodomainInfo, +} + +/// Codomain info for function initialization +#[derive(Clone, Debug)] +pub enum FunctionCodomainInfo { + /// Base sort codomain (local): values are Slids within this structure + Local(SortId), + /// Base sort codomain (external): values are Luids from parent + External, + /// Product codomain: field names and sort IDs + Product { field_names: Vec, field_sorts: Vec }, +} + +impl Structure { + /// Create a new empty structure. + /// Note: functions and relations are not pre-allocated here; call + /// `init_functions()` and `init_relations()` after elements are added. + pub fn new(num_sorts: usize) -> Self { + Self { + theory_luid: None, + luids: Vec::new(), + luid_to_slid: HashMap::new(), + sorts: Vec::new(), + carriers: vec![RoaringTreemap::new(); num_sorts], + functions: Vec::new(), // Initialized later via init_functions() + relations: Vec::new(), // Initialized later via init_relations() + parents: HashMap::new(), + nested: HashMap::new(), + } + } + + /// Create a structure that is an instance of the given theory + pub fn new_instance(theory_luid: Luid, num_sorts: usize) -> Self { + Self { + theory_luid: Some(theory_luid), + ..Self::new(num_sorts) + } + } + + /// Initialize function storage based on domain carrier sizes. + /// Must be called after all elements are added. + /// + /// For simple (non-parameterized) instances, use `init_functions_local()`. + /// For parameterized instances with external codomains, use this method. + pub fn init_functions_ext(&mut self, func_info: &[FunctionInitInfo]) { + self.functions = func_info + .iter() + .map(|info| { + let size = match info.domain_sort_id { + Some(sort_id) => self.carrier_size(sort_id), + None => 0, // Product domains deferred + }; + if info.codomain_is_external { + FunctionColumn::External(vec![None; size]) + } else { + FunctionColumn::Local(vec![None; size]) + } + }) + .collect(); + } + + /// Initialize function storage for simple (non-parameterized) instances. + /// All codomains are assumed to be local. + /// Pass `None` for product-domain functions; pass `Some(sort_id)` for base-domain functions. + pub fn init_functions(&mut self, domain_sort_ids: &[Option]) { + self.functions = domain_sort_ids + .iter() + .map(|opt_sort_id| match opt_sort_id { + Some(sort_id) => FunctionColumn::Local(vec![None; self.carrier_size(*sort_id)]), + None => { + // Legacy: product domains without size info get empty ProductLocal + // Use init_functions_full for proper initialization + FunctionColumn::ProductLocal { + storage: ProductStorage::new_general(), + field_sorts: Vec::new(), + } + } + }) + .collect(); + } + + /// Initialize function storage with full domain info (supports product domains). + pub fn init_functions_full(&mut self, domains: &[FunctionDomainInfo]) { + self.functions = domains + .iter() + .map(|domain| match domain { + FunctionDomainInfo::Base(sort_id) => { + FunctionColumn::Local(vec![None; self.carrier_size(*sort_id)]) + } + FunctionDomainInfo::Product(field_sort_ids) => { + let carrier_sizes: Vec = field_sort_ids + .iter() + .map(|&sort_id| self.carrier_size(sort_id)) + .collect(); + FunctionColumn::ProductLocal { + storage: ProductStorage::new(&carrier_sizes), + field_sorts: field_sort_ids.clone(), + } + } + }) + .collect(); + } + + /// Initialize function storage with complete info (domain AND codomain types). + /// This supports product codomains in addition to product domains. + pub fn init_functions_complete(&mut self, funcs: &[FunctionFullInfo]) { + self.functions = funcs + .iter() + .map(|info| { + match (&info.domain, &info.codomain) { + // Base domain, base local codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::Local(_)) => { + FunctionColumn::Local(vec![None; self.carrier_size(*domain_sort)]) + } + // Base domain, external codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::External) => { + FunctionColumn::External(vec![None; self.carrier_size(*domain_sort)]) + } + // Base domain, product codomain + (FunctionDomainInfo::Base(domain_sort), FunctionCodomainInfo::Product { field_names, field_sorts }) => { + let size = self.carrier_size(*domain_sort); + FunctionColumn::ProductCodomain { + field_columns: vec![vec![None; size]; field_names.len()], + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + // Product domain, local codomain + (FunctionDomainInfo::Product(field_sort_ids), FunctionCodomainInfo::Local(_)) => { + let carrier_sizes: Vec = field_sort_ids + .iter() + .map(|&sort_id| self.carrier_size(sort_id)) + .collect(); + FunctionColumn::ProductLocal { + storage: ProductStorage::new(&carrier_sizes), + field_sorts: field_sort_ids.clone(), + } + } + // Product domain with external or product codomain - not yet supported + (FunctionDomainInfo::Product(_), _) => { + // Fall back to ProductLocal with empty storage + FunctionColumn::ProductLocal { + storage: ProductStorage::new_general(), + field_sorts: Vec::new(), + } + } + } + }) + .collect(); + } + + /// Initialize relation storage based on arities. + /// Must be called after all elements are added. + /// + /// `arities[rel_id]` is the number of fields in the relation's domain. + /// For a relation `child : [parent: Node, child: Node]`, arity is 2. + pub fn init_relations(&mut self, arities: &[usize]) { + self.relations = arities.iter().map(|&arity| VecRelation::new(arity)).collect(); + } + + /// Assert a tuple in a relation: R(tuple) + /// Returns true if the tuple was newly inserted. + pub fn assert_relation(&mut self, rel_id: RelId, tuple: Vec) -> bool { + self.relations[rel_id].insert(tuple) + } + + /// Retract a tuple from a relation + /// Returns true if the tuple was present. + pub fn retract_relation(&mut self, rel_id: RelId, tuple: &[Slid]) -> bool { + self.relations[rel_id].remove(tuple) + } + + /// Check if a tuple is in a relation + pub fn query_relation(&self, rel_id: RelId, tuple: &[Slid]) -> bool { + self.relations[rel_id].contains(tuple) + } + + /// Get a reference to a relation's storage + pub fn get_relation(&self, rel_id: RelId) -> &VecRelation { + &self.relations[rel_id] + } + + /// Get a mutable reference to a relation's storage + pub fn get_relation_mut(&mut self, rel_id: RelId) -> &mut VecRelation { + &mut self.relations[rel_id] + } + + /// Get the number of relations in this structure + pub fn num_relations(&self) -> usize { + self.relations.len() + } + + /// Add a new element to the structure, registering its UUID in the universe. + /// Returns the (Slid, Luid) for the new element. + /// Note: Names are registered separately in a NamingIndex. + pub fn add_element(&mut self, universe: &mut Universe, sort_id: SortId) -> (Slid, Luid) { + let uuid = Uuid::now_v7(); + let luid = universe.intern(uuid); + let slid = self.add_element_with_luid(luid, sort_id); + (slid, luid) + } + + /// Add an element with a specific Luid (used when applying patches or loading) + pub fn add_element_with_luid(&mut self, luid: Luid, sort_id: SortId) -> Slid { + let slid = Slid::from_usize(self.luids.len()); + + self.luids.push(luid); + self.luid_to_slid.insert(luid, slid); + self.sorts.push(sort_id); + self.carriers[sort_id].insert(slid.index() as u64); + + slid + } + + /// Add an element with a specific UUID, registering it in the universe. + /// Used when applying patches that reference UUIDs. + pub fn add_element_with_uuid( + &mut self, + universe: &mut Universe, + uuid: Uuid, + sort_id: SortId, + ) -> (Slid, Luid) { + let luid = universe.intern(uuid); + let slid = self.add_element_with_luid(luid, sort_id); + (slid, luid) + } + + /// Define a function value for a local codomain (Slid → Slid). + /// Uses SortSlid indexing into the columnar function storage. + /// Automatically grows the column if needed. + pub fn define_function( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_slid: Slid, + ) -> Result<(), String> { + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::Local(col) => { + // Grow column if needed + if idx >= col.len() { + col.resize(idx + 1, None); // None = undefined + } + if let Some(existing) = get_slid(col[idx]) + && existing != codomain_slid + { + return Err(format!( + "conflicting definition: func {}(slid {}) already defined as slid {}, cannot redefine as slid {}", + func_id, domain_slid, existing, codomain_slid + )); + } + col[idx] = some_slid(codomain_slid); + Ok(()) + } + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for an external codomain (Slid → Luid). + /// Used for functions referencing parent instance elements. + /// Automatically grows the column if needed. + pub fn define_function_ext( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_luid: Luid, + ) -> Result<(), String> { + use crate::id::{get_luid, some_luid}; + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::External(col) => { + // Grow column if needed + if idx >= col.len() { + col.resize(idx + 1, None); // None = undefined + } + if let Some(existing) = get_luid(col[idx]) + && existing != codomain_luid + { + return Err(format!( + "conflicting definition: func {}(slid {}) already defined as luid {}, cannot redefine as luid {}", + func_id, domain_slid, existing, codomain_luid + )); + } + col[idx] = some_luid(codomain_luid); + Ok(()) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has local codomain, use define_function", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for a product domain (tuple of Slids → Slid). + /// Used for functions like `mul : [x: M, y: M] -> M`. + /// + /// The domain_tuple contains Slids which are converted to sort-local indices + /// for storage in the nested Vec structure. + pub fn define_function_product( + &mut self, + func_id: FuncId, + domain_tuple: &[Slid], + codomain_slid: Slid, + ) -> Result<(), String> { + // Convert Slids to sort-local indices for storage + let local_indices: Vec = domain_tuple + .iter() + .map(|&slid| self.sort_local_id(slid).index()) + .collect(); + + match &mut self.functions[func_id] { + FunctionColumn::ProductLocal { storage, .. } => { + storage.set(&local_indices, codomain_slid).map_err(|existing| { + format!( + "conflicting definition: func {}({:?}) already defined as slid {}, cannot redefine as slid {}", + func_id, domain_tuple, existing, codomain_slid + ) + }) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has base domain, use define_function", + func_id + )), + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductCodomain { .. } => Err(format!( + "func {} has product codomain, use define_function_product_codomain", + func_id + )), + } + } + + /// Define a function value for a product codomain (Slid → tuple of Slids). + /// Used for functions like `f : A -> [x: B, y: C]`. + /// + /// The codomain_values is a slice of (field_name, Slid) pairs. + pub fn define_function_product_codomain( + &mut self, + func_id: FuncId, + domain_slid: Slid, + codomain_values: &[(&str, Slid)], + ) -> Result<(), String> { + let domain_sort_slid = self.sort_local_id(domain_slid); + let idx = domain_sort_slid.index(); + + match &mut self.functions[func_id] { + FunctionColumn::ProductCodomain { field_columns, field_names, domain_sort, .. } => { + // Grow columns if needed + for col in field_columns.iter_mut() { + if idx >= col.len() { + col.resize(idx + 1, None); + } + } + + // Set each field value + for (field_name, slid) in codomain_values { + let field_idx = field_names.iter() + .position(|n| n == field_name) + .ok_or_else(|| format!( + "unknown field '{}' in product codomain (available: {:?})", + field_name, field_names + ))?; + + if let Some(existing) = get_slid(field_columns[field_idx][idx]) + && existing != *slid { + return Err(format!( + "conflicting definition: func {}(slid {}).{} already defined as slid {}, cannot redefine as slid {}", + func_id, domain_slid, field_name, existing, slid + )); + } + field_columns[field_idx][idx] = some_slid(*slid); + } + let _ = domain_sort; // silence unused warning + Ok(()) + } + FunctionColumn::Local(_) => Err(format!( + "func {} has local codomain, use define_function", + func_id + )), + FunctionColumn::External(_) => Err(format!( + "func {} has external codomain, use define_function_ext", + func_id + )), + FunctionColumn::ProductLocal { .. } => Err(format!( + "func {} has product domain, use define_function_product", + func_id + )), + } + } + + /// Get function value for local codomain (base domain only). + pub fn get_function(&self, func_id: FuncId, domain_sort_slid: SortSlid) -> Option { + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::Local(col) => col.get(idx).and_then(|&opt| get_slid(opt)), + FunctionColumn::External(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get function value for product codomain. + /// Returns a Vec of (field_name, Slid) pairs, or None if not fully defined. + pub fn get_function_product_codomain( + &self, + func_id: FuncId, + domain_sort_slid: SortSlid, + ) -> Option> { + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::ProductCodomain { field_columns, field_names, .. } => { + // All fields must be defined + let mut result = Vec::with_capacity(field_names.len()); + for (i, name) in field_names.iter().enumerate() { + let slid = get_slid(*field_columns[i].get(idx)?)?; + result.push((name.clone(), slid)); + } + Some(result) + } + _ => None, + } + } + + /// Get function value for external codomain (returns Luid). + pub fn get_function_ext(&self, func_id: FuncId, domain_sort_slid: SortSlid) -> Option { + use crate::id::get_luid; + let idx = domain_sort_slid.index(); + match &self.functions[func_id] { + FunctionColumn::External(col) => col.get(idx).and_then(|&opt| get_luid(opt)), + FunctionColumn::Local(_) + | FunctionColumn::ProductLocal { .. } + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get function value for product domain. + /// Takes a tuple of Slids and converts them to sort-local indices for lookup. + pub fn get_function_product(&self, func_id: FuncId, domain_tuple: &[Slid]) -> Option { + // Convert Slids to sort-local indices + let local_indices: Vec = domain_tuple + .iter() + .map(|&slid| self.sort_local_id(slid).index()) + .collect(); + + match &self.functions[func_id] { + FunctionColumn::ProductLocal { storage, .. } => storage.get(&local_indices), + FunctionColumn::Local(_) + | FunctionColumn::External(_) + | FunctionColumn::ProductCodomain { .. } => None, + } + } + + /// Get the sort-local index for an element (0-based position within its carrier). + /// + /// # Roaring bitmap rank() semantics + /// `rank(x)` returns the count of elements ≤ x in the bitmap. + /// For a bitmap containing {4}: rank(3)=0, rank(4)=1, rank(5)=1. + /// So 0-based index = rank(x) - 1. + pub fn sort_local_id(&self, slid: Slid) -> SortSlid { + let sort_id = self.sorts[slid.index()]; + SortSlid::from_usize((self.carriers[sort_id].rank(slid.index() as u64) - 1) as usize) + } + + /// Look up element by Luid + pub fn lookup_luid(&self, luid: Luid) -> Option { + self.luid_to_slid.get(&luid).copied() + } + + /// Get the Luid for a Slid + pub fn get_luid(&self, slid: Slid) -> Luid { + self.luids[slid.index()] + } + + /// Get the UUID for a Slid (requires Universe lookup) + pub fn get_uuid(&self, slid: Slid, universe: &Universe) -> Option { + universe.get(self.luids[slid.index()]) + } + + /// Get element count + pub fn len(&self) -> usize { + self.luids.len() + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.luids.is_empty() + } + + /// Get carrier size for a sort + pub fn carrier_size(&self, sort_id: SortId) -> usize { + self.carriers[sort_id].len() as usize + } + + /// Get the number of sorts in this structure + pub fn num_sorts(&self) -> usize { + self.carriers.len() + } + + /// Get the number of functions in this structure + pub fn num_functions(&self) -> usize { + self.functions.len() + } +} + +impl std::fmt::Display for DerivedSort { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DerivedSort::Base(id) => write!(f, "Sort#{}", id), + DerivedSort::Product(fields) if fields.is_empty() => write!(f, "()"), + DerivedSort::Product(fields) => { + write!(f, "[")?; + for (i, (name, sort)) in fields.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}: {}", name, sort)?; + } + write!(f, "]") + } + } + } +} + +// ============ Display implementations for debugging ============ + +// Main unit tests moved to tests/proptest_structure.rs + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_derived_sort_cardinality_base() { + let mut structure = Structure::new(2); + // Add elements to sort 0: 3 elements + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Add elements to sort 1: 2 elements + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + + let base0 = DerivedSort::Base(0); + let base1 = DerivedSort::Base(1); + + assert_eq!(base0.cardinality(&structure), 3); + assert_eq!(base1.cardinality(&structure), 2); + } + + #[test] + fn test_derived_sort_cardinality_product() { + let mut structure = Structure::new(2); + // Sort 0: 3 elements + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: 2 elements + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + + // Product [x: A, y: B] where |A| = 3, |B| = 2 should have cardinality 6 + let product = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(1)), + ]); + assert_eq!(product.cardinality(&structure), 6); + } + + #[test] + fn test_derived_sort_cardinality_unit() { + let structure = Structure::new(1); + + // Unit type (empty product) has cardinality 1 + let unit = DerivedSort::unit(); + assert_eq!(unit.cardinality(&structure), 1); + } + + #[test] + fn test_derived_sort_cardinality_empty_carrier() { + let structure = Structure::new(1); + + // Empty carrier has cardinality 0 + let base = DerivedSort::Base(0); + assert_eq!(base.cardinality(&structure), 0); + + // Product with empty carrier has cardinality 0 + let product = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ]); + assert_eq!(product.cardinality(&structure), 0); + } +} diff --git a/src/elaborate/env.rs b/src/elaborate/env.rs new file mode 100644 index 0000000..b2b0e75 --- /dev/null +++ b/src/elaborate/env.rs @@ -0,0 +1,315 @@ +//! Elaboration environment and basic elaboration functions. + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::ast; +use crate::core::*; + +use super::error::{ElabError, ElabResult}; + +/// Environment for elaboration — tracks what's in scope +#[derive(Clone, Debug, Default)] +pub struct Env { + /// Known theories, by name + pub theories: HashMap>, + /// Current theory being elaborated (if any) + pub current_theory: Option, + /// Local signature being built + pub signature: Signature, + /// Parameters in scope (for parameterized theories) + pub params: Vec<(String, Rc)>, +} + +impl Env { + pub fn new() -> Self { + Self::default() + } + + /// Resolve a path like "N/P" where N is a parameter and P is a sort in N's theory. + /// + /// All param sorts are copied into the local signature with qualified names (e.g., "N/P"), + /// so we just need to look up the joined path in the current signature. + pub fn resolve_sort_path(&self, path: &ast::Path) -> ElabResult { + // Join all segments with "/" — this handles both simple names like "F" + // and qualified names like "N/P" + let full_name = path.segments.join("/"); + if let Some(id) = self.signature.lookup_sort(&full_name) { + return Ok(DerivedSort::Base(id)); + } + Err(ElabError::UnknownSort(full_name)) + } + + /// Resolve a function path like "N/in/src" or "F/of". + /// + /// All param functions are copied into the local signature with qualified names, + /// so we just need to look up the joined path. + pub fn resolve_func_path(&self, path: &ast::Path) -> ElabResult { + let full_name = path.segments.join("/"); + if let Some(id) = self.signature.lookup_func(&full_name) { + return Ok(id); + } + Err(ElabError::UnknownFunction(full_name)) + } +} + +/// Elaborate a type expression into a DerivedSort +/// +/// Uses the concatenative stack-based type evaluator. +pub fn elaborate_type(env: &Env, ty: &ast::TypeExpr) -> ElabResult { + use super::types::eval_type_expr; + + let val = eval_type_expr(ty, env)?; + val.as_derived_sort(env) +} + +/// Elaborate a term in a given context +pub fn elaborate_term(env: &Env, ctx: &Context, term: &ast::Term) -> ElabResult { + match term { + ast::Term::Path(path) => { + if path.segments.len() == 1 { + // Simple variable + let name = &path.segments[0]; + if let Some((_, sort)) = ctx.lookup(name) { + return Ok(Term::Var(name.clone(), sort.clone())); + } + return Err(ElabError::UnknownVariable(name.clone())); + } + // Qualified path — could be a variable or a function reference + // For now, treat as variable lookup failure + Err(ElabError::UnknownVariable(path.to_string())) + } + ast::Term::App(base, func) => { + // In surface syntax, application is postfix: `x f` means apply f to x + // So App(base, func) where base is the argument and func is the function + // First, elaborate the base (the argument) + let elab_arg = elaborate_term(env, ctx, base)?; + let arg_sort = elab_arg.sort(&env.signature); + + // Then figure out what the function is + match func.as_ref() { + ast::Term::Path(path) => { + let func_id = env.resolve_func_path(path)?; + let func_sym = &env.signature.functions[func_id]; + + // Type check: argument sort must match function domain + if arg_sort != func_sym.domain { + return Err(ElabError::TypeMismatch { + expected: func_sym.domain.clone(), + got: arg_sort, + }); + } + + Ok(Term::App(func_id, Box::new(elab_arg))) + } + _ => { + // Higher-order application — not supported yet + Err(ElabError::UnsupportedFeature( + "higher-order application".to_string(), + )) + } + } + } + ast::Term::Project(base, field) => { + let elab_base = elaborate_term(env, ctx, base)?; + Ok(Term::Project(Box::new(elab_base), field.clone())) + } + ast::Term::Record(fields) => { + let elab_fields: Result, _> = fields + .iter() + .map(|(name, term)| elaborate_term(env, ctx, term).map(|t| (name.clone(), t))) + .collect(); + Ok(Term::Record(elab_fields?)) + } + } +} + +/// Elaborate a formula +pub fn elaborate_formula(env: &Env, ctx: &Context, formula: &ast::Formula) -> ElabResult { + match formula { + ast::Formula::True => Ok(Formula::True), + ast::Formula::False => Ok(Formula::False), + ast::Formula::Eq(lhs, rhs) => { + let elab_lhs = elaborate_term(env, ctx, lhs)?; + let elab_rhs = elaborate_term(env, ctx, rhs)?; + + // Type check: both sides must have the same sort + let lhs_sort = elab_lhs.sort(&env.signature); + let rhs_sort = elab_rhs.sort(&env.signature); + if lhs_sort != rhs_sort { + return Err(ElabError::TypeMismatch { + expected: lhs_sort, + got: rhs_sort, + }); + } + + Ok(Formula::Eq(elab_lhs, elab_rhs)) + } + ast::Formula::And(conjuncts) => { + let elab: Result, _> = conjuncts + .iter() + .map(|f| elaborate_formula(env, ctx, f)) + .collect(); + Ok(Formula::Conj(elab?)) + } + ast::Formula::Or(disjuncts) => { + let elab: Result, _> = disjuncts + .iter() + .map(|f| elaborate_formula(env, ctx, f)) + .collect(); + Ok(Formula::Disj(elab?)) + } + ast::Formula::Exists(vars, body) => { + // Extend context with quantified variables + let mut extended_ctx = ctx.clone(); + for qv in vars { + let sort = elaborate_type(env, &qv.ty)?; + for name in &qv.names { + extended_ctx = extended_ctx.extend(name.clone(), sort.clone()); + } + } + let elab_body = elaborate_formula(env, &extended_ctx, body)?; + + // Build nested existentials (one for each variable) + let mut result = elab_body; + for qv in vars.iter().rev() { + let sort = elaborate_type(env, &qv.ty)?; + for name in qv.names.iter().rev() { + result = Formula::Exists(name.clone(), sort.clone(), Box::new(result)); + } + } + Ok(result) + } + ast::Formula::RelApp(rel_name, arg) => { + // Look up the relation + let rel_id = env + .signature + .lookup_rel(rel_name) + .ok_or_else(|| ElabError::UnknownRel(rel_name.clone()))?; + + // Elaborate the argument + let elab_arg = elaborate_term(env, ctx, arg)?; + + // Type check: argument must match relation domain + let rel_sym = &env.signature.relations[rel_id]; + let arg_sort = elab_arg.sort(&env.signature); + if arg_sort != rel_sym.domain { + return Err(ElabError::TypeMismatch { + expected: rel_sym.domain.clone(), + got: arg_sort, + }); + } + + Ok(Formula::Rel(rel_id, elab_arg)) + } + } +} + +/// Remap a DerivedSort for nested instance fields. +/// +/// When copying sorts/functions from a nested instance field's theory into the local signature, +/// we need different remapping rules: +/// - Unqualified sorts (like "Token" in Marking) get prefixed with field_prefix (e.g., "RP/initial/Token") +/// - Already-qualified sorts (like "N/P" in Marking) map to the parent param (e.g., just "N/P") +/// +/// # Arguments +/// * `field_prefix` - The prefix for the nested field (e.g., "RP/initial") +/// * `parent_param` - The parent parameter name (e.g., "RP"), used to strip when mapping qualified sorts +#[allow(dead_code)] +pub(crate) fn remap_derived_sort_for_nested( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + field_prefix: &str, + parent_param: &str, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + let qualified_name = if sort_name.contains('/') { + // Already qualified (e.g., "N/P" from a parameterized theory) + // Try to find it directly in the target (e.g., "N/P" should exist from outer param) + // If not found, try with parent param prefix (e.g., "RP/N/P") + if target_sig.lookup_sort(sort_name).is_some() { + sort_name.clone() + } else { + format!("{}/{}", parent_param, sort_name) + } + } else { + // Unqualified sort from the field's theory - prefix with field_prefix + format!("{}/{}", field_prefix, sort_name) + }; + if let Some(target_id) = target_sig.lookup_sort(&qualified_name) { + DerivedSort::Base(target_id) + } else { + // Fallback: just use the source ID (shouldn't happen in well-formed code) + eprintln!( + "Warning: could not remap sort '{}' (qualified: '{}') in nested field", + sort_name, qualified_name + ); + sort.clone() + } + } + DerivedSort::Product(fields) => { + let remapped_fields = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort_for_nested(s, source_sig, target_sig, field_prefix, parent_param), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} + +/// Remap a DerivedSort from one signature namespace to another. +/// +/// When copying sorts/functions from a param theory into the local signature, +/// the sort IDs need to be remapped. For example, if PetriNet has sort P at id=0, +/// and we copy it as "N/P" into local signature at id=2, then any DerivedSort::Base(0) +/// needs to become DerivedSort::Base(2). +/// +/// The `preserve_existing_prefix` flag controls requalification behavior: +/// - false (instance params): always prefix with param_name. N/X becomes M/N/X. +/// - true (extends): preserve existing qualifier. N/X stays N/X. +pub(crate) fn remap_derived_sort( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_name: &str, + preserve_existing_prefix: bool, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + // Look up the sort name in the source signature + let sort_name = &source_sig.sorts[*source_id]; + // Find the corresponding qualified name in target signature + let qualified_name = if preserve_existing_prefix && sort_name.contains('/') { + // Extends case: already-qualified names keep their original qualifier + sort_name.clone() + } else { + // Instance param case OR unqualified name: prefix with param_name + format!("{}/{}", param_name, sort_name) + }; + let target_id = target_sig + .lookup_sort(&qualified_name) + .expect("qualified sort should have been added"); + DerivedSort::Base(target_id) + } + DerivedSort::Product(fields) => { + let remapped_fields = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort(s, source_sig, target_sig, param_name, preserve_existing_prefix), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} diff --git a/src/elaborate/error.rs b/src/elaborate/error.rs new file mode 100644 index 0000000..bdec074 --- /dev/null +++ b/src/elaborate/error.rs @@ -0,0 +1,185 @@ +//! Elaboration error types. + +use crate::core::DerivedSort; + +/// A concrete counterexample showing which variable bindings violate an axiom. +#[derive(Clone, Debug)] +pub struct CounterExample { + /// (variable_name, element_name) pairs showing the violating assignment + pub bindings: Vec<(String, String)>, +} + +impl std::fmt::Display for CounterExample { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let parts: Vec = self + .bindings + .iter() + .map(|(var, elem)| format!("{} = {}", var, elem)) + .collect(); + write!(f, "{{{}}}", parts.join(", ")) + } +} + +/// Elaboration errors +#[derive(Clone, Debug)] +pub enum ElabError { + UnknownSort(String), + UnknownTheory(String), + UnknownFunction(String), + UnknownRel(String), + UnknownVariable(String), + TypeMismatch { + expected: DerivedSort, + got: DerivedSort, + }, + NotASort(String), + NotAFunction(String), + NotARecord(String), + NoSuchField { + record: String, + field: String, + }, + InvalidPath(String), + DuplicateDefinition(String), + UnsupportedFeature(String), + PartialFunction { + func_name: String, + missing_elements: Vec, + }, + /// Type error in function application: element's sort doesn't match function's domain + DomainMismatch { + func_name: String, + element_name: String, + expected_sort: String, + actual_sort: String, + }, + /// Type error in equation: RHS sort doesn't match function's codomain + CodomainMismatch { + func_name: String, + element_name: String, + expected_sort: String, + actual_sort: String, + }, + /// Axiom violation during instance checking + AxiomViolation { + axiom_index: usize, + axiom_name: Option, + num_violations: usize, + /// Concrete counterexamples (limited to first few for readability) + counterexamples: Vec, + }, + /// Chase algorithm failed (e.g., didn't converge) + ChaseFailed(String), + + /// Not enough arguments for a parameterized theory + NotEnoughArgs { + name: String, + expected: usize, + got: usize, + }, + + /// Type expression evaluation error + TypeExprError(String), +} + +impl std::fmt::Display for ElabError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ElabError::UnknownSort(s) => write!(f, "unknown sort: {}", s), + ElabError::UnknownTheory(s) => write!(f, "unknown theory: {}", s), + ElabError::UnknownFunction(s) => write!(f, "unknown function: {}", s), + ElabError::UnknownRel(s) => write!(f, "unknown relation: {}", s), + ElabError::UnknownVariable(s) => write!(f, "unknown variable: {}", s), + ElabError::TypeMismatch { expected, got } => { + write!(f, "type mismatch: expected {}, got {}", expected, got) + } + ElabError::NotASort(s) => write!(f, "not a sort: {}", s), + ElabError::NotAFunction(s) => write!(f, "not a function: {}", s), + ElabError::NotARecord(s) => write!(f, "not a record type: {}", s), + ElabError::NoSuchField { record, field } => { + write!(f, "no field '{}' in record {}", field, record) + } + ElabError::InvalidPath(s) => write!(f, "invalid path: {}", s), + ElabError::DuplicateDefinition(s) => write!(f, "duplicate definition: {}", s), + ElabError::UnsupportedFeature(s) => write!(f, "unsupported feature: {}", s), + ElabError::PartialFunction { + func_name, + missing_elements, + } => { + write!( + f, + "partial function '{}': missing definitions for {:?}", + func_name, missing_elements + ) + } + ElabError::DomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + write!( + f, + "type error: '{}' has sort '{}', but function '{}' expects domain sort '{}'", + element_name, actual_sort, func_name, expected_sort + ) + } + ElabError::CodomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + write!( + f, + "type error: '{}' has sort '{}', but function '{}' has codomain sort '{}'", + element_name, actual_sort, func_name, expected_sort + ) + } + ElabError::AxiomViolation { + axiom_index, + axiom_name, + num_violations, + counterexamples, + } => { + let axiom_desc = if let Some(name) = axiom_name { + format!("axiom '{}' (#{}) violated", name, axiom_index) + } else { + format!("axiom #{} violated", axiom_index) + }; + + if counterexamples.is_empty() { + write!(f, "{}: {} counterexample(s) found", axiom_desc, num_violations) + } else { + writeln!(f, "{}: {} counterexample(s) found", axiom_desc, num_violations)?; + for (i, ce) in counterexamples.iter().enumerate() { + writeln!(f, " #{}: {}", i + 1, ce)?; + } + if *num_violations > counterexamples.len() { + write!( + f, + " ... and {} more", + num_violations - counterexamples.len() + )?; + } + Ok(()) + } + } + ElabError::ChaseFailed(msg) => write!(f, "chase failed: {}", msg), + ElabError::NotEnoughArgs { + name, + expected, + got, + } => { + write!( + f, + "'{}' expects {} argument(s), but only {} provided", + name, expected, got + ) + } + ElabError::TypeExprError(msg) => write!(f, "type expression error: {}", msg), + } + } +} + +pub type ElabResult = Result; diff --git a/src/elaborate/instance.rs b/src/elaborate/instance.rs new file mode 100644 index 0000000..33af6c4 --- /dev/null +++ b/src/elaborate/instance.rs @@ -0,0 +1,1431 @@ +//! Instance elaboration. + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::ast; +use crate::core::*; +use crate::id::{NumericId, Slid}; +use crate::query::chase::chase_fixpoint; +use crate::tensor::check_theory_axioms; +use crate::universe::Universe; + +use super::env::Env; +use super::error::{CounterExample, ElabError, ElabResult}; + +// Re-use remapping utilities from theory elaboration +use super::theory::{collect_type_args_from_theory_type, build_param_subst, remap_sort_for_param_import}; + +/// Minimal context for instance elaboration - what we need from the caller. +/// +/// This replaces the old `Workspace` dependency, making elaboration more modular. +pub struct ElaborationContext<'a> { + /// Available theories + pub theories: &'a HashMap>, + /// Existing instances (for parameterized instance support) + pub instances: &'a HashMap, + /// Universe for allocating new Luids + pub universe: &'a mut Universe, + /// Sibling nested instances (for cross-references within a parent instance) + /// When elaborating nested instances, this contains already-elaborated siblings + pub siblings: HashMap, +} + +/// Result of elaborating an instance. +/// +/// Contains the structure and element name mappings. +#[derive(Debug)] +pub struct InstanceElaborationResult { + /// The elaborated structure + pub structure: Structure, + /// Mapping from Slid to element name (for display) + pub slid_to_name: HashMap, + /// Mapping from element name to Slid (for lookups) + pub name_to_slid: HashMap, + /// Metadata for nested instances (theory name and element names) + pub nested_meta: HashMap, +} + +/// Nested instance metadata for name resolution +#[derive(Clone, Debug, Default)] +pub struct NestedInstanceMeta { + /// Theory name of the nested instance + pub theory_name: String, + /// Map from element names to Slids (within the nested structure) + pub name_to_slid: HashMap, + /// Reverse map from Slids to names + pub slid_to_name: HashMap, +} + +/// An instance entry for elaboration context. +/// +/// This is a simpler version than what's in the REPL - just enough for elaboration. +pub struct InstanceEntry { + /// The structure containing the instance data + pub structure: Structure, + /// The base theory name this instance is of (e.g., "ReachabilityProblem") + pub theory_name: String, + /// The full theory type string (e.g., "ExampleNet ReachabilityProblem") + /// This is needed to compute parameter substitutions when importing elements. + pub theory_type: String, + /// Map from element names to Slids + pub element_names: HashMap, + /// Reverse map from Slids to names + pub slid_to_name: HashMap, + /// Metadata for nested instances (for cross-instance references) + pub nested_meta: HashMap, +} + +impl InstanceEntry { + /// Create a new instance entry + pub fn new(structure: Structure, theory_name: String, theory_type: String) -> Self { + Self { + structure, + theory_name, + theory_type, + element_names: HashMap::new(), + slid_to_name: HashMap::new(), + nested_meta: HashMap::new(), + } + } + + /// Register an element with a name + pub fn register_element(&mut self, name: String, slid: Slid) { + self.element_names.insert(name.clone(), slid); + self.slid_to_name.insert(slid, name); + } + + /// Look up element by local name + pub fn get_element(&self, name: &str) -> Option { + self.element_names.get(name).copied() + } + + /// Get name for Slid + pub fn get_name(&self, slid: Slid) -> Option<&str> { + self.slid_to_name.get(&slid).map(|s| s.as_str()) + } +} + +/// Elaborate an instance declaration into a Structure with element name mappings. +/// +/// This is the context-aware version that supports cross-instance references. +/// For parameterized instances like `marking0 : ExampleNet Marking`, elements +/// from param instances (ExampleNet) are imported into the new structure. +/// +/// Returns both the structure and the element name mappings, so the caller +/// can track names for both local and imported elements. +pub fn elaborate_instance_ctx( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, +) -> ElabResult { + // If needs_chase is set, we skip totality (chase will fill in missing values) + elaborate_instance_ctx_inner(ctx, instance, instance.needs_chase) +} + +/// Elaborate an instance without validating totality. +/// Use this when the chase algorithm will fill in missing function values. +pub fn elaborate_instance_ctx_partial( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, +) -> ElabResult { + elaborate_instance_ctx_inner(ctx, instance, true) +} + +fn elaborate_instance_ctx_inner( + ctx: &mut ElaborationContext<'_>, + instance: &ast::InstanceDecl, + skip_totality: bool, +) -> ElabResult { + // Build Env from context theories for theory lookups + let env = Env { + theories: ctx.theories.clone(), + ..Env::new() + }; + + // 1. Resolve the theory type (handles parameterized types like `ExampleNet ReachabilityProblem`) + let resolved = resolve_instance_type(&env, &instance.theory)?; + let theory = env + .theories + .get(&resolved.theory_name) + .ok_or_else(|| ElabError::UnknownTheory(resolved.theory_name.clone()))?; + + // 2. Initialize structure (functions will be initialized after first pass) + let mut structure = Structure::new(theory.theory.signature.sorts.len()); + + // Initialize relation storage from signature + let relation_arities: Vec = theory + .theory + .signature + .relations + .iter() + .map(|rel| rel.domain.arity()) + .collect(); + structure.init_relations(&relation_arities); + + // Track name → Slid for resolving references within this instance + // Also track Slid → name for error messages + let mut name_to_slid: HashMap = HashMap::new(); + let mut slid_to_name: HashMap = HashMap::new(); + + // Track nested instance metadata for cross-instance references + let mut nested_meta: HashMap = HashMap::new(); + + // 2b. Import elements from param instances + // For each param binding (N, ExampleNet), import all elements from ExampleNet + // with their sorts mapped to the local signature (N/P, N/T, etc.) + // + // Also build a mapping from (param_slid -> local_slid) for each param instance + // so we can later import function values. + let mut param_slid_to_local: HashMap<(String, Slid), Slid> = HashMap::new(); + + for (param_name, arg_value) in &resolved.arguments { + // Case 1: argument is an instance name (e.g., "ExampleNet" for N : PetriNet instance) + if let Some(param_entry) = ctx.instances.get(arg_value) { + // Get the param theory to know sort mappings + let param_theory_name = ¶m_entry.theory_name; + if let Some(param_theory) = ctx.theories.get(param_theory_name) { + // Build parameter substitution map for this param instance + // This tells us how to remap sorts from the param instance to local sorts. + // + // For example, if param_entry is `problem0 : ExampleNet ReachabilityProblem`: + // - param_theory = ReachabilityProblem, which has param (N : PetriNet) + // - type_args = ["ExampleNet"] (from problem0's theory_type) + // - param_subst = {"N" -> "ExampleNet"} + let type_args = collect_type_args_from_theory_type(¶m_entry.theory_type); + let param_subst = build_param_subst(param_theory, &type_args); + + // For each element in the param instance, import it + for (&slid, elem_name) in ¶m_entry.slid_to_name { + // Get the element's sort in the param instance + let param_sort_id = param_entry.structure.sorts[slid.index()]; + let param_sort_name = ¶m_theory.theory.signature.sorts[param_sort_id]; + + // Map to local sort using parameter substitution + // This handles cases like "N/P" in problem0 -> "N/P" in solution0 + // (not "RP/N/P" which doesn't exist) + let local_sort_name = remap_sort_for_param_import( + param_sort_name, + param_name, + ¶m_subst, + &resolved.arguments, + ); + let local_sort_id = theory + .theory + .signature + .lookup_sort(&local_sort_name) + .ok_or_else(|| ElabError::UnknownSort(local_sort_name.clone()))?; + + // Get the Luid for this element + let luid = param_entry.structure.get_luid(slid); + + // Add to local structure with the SAME Luid + let local_slid = structure.add_element_with_luid(luid, local_sort_id); + + // Register names: both "N/elemname" and "InstanceName/elemname" + // Also register unqualified "elemname" for convenient access + // (local elements declared later will shadow these if there's a collision) + let qualified_param = format!("{}/{}", param_name, elem_name); + let qualified_instance = format!("{}/{}", arg_value, elem_name); + + name_to_slid.insert(elem_name.clone(), local_slid); + name_to_slid.insert(qualified_param.clone(), local_slid); + name_to_slid.insert(qualified_instance.clone(), local_slid); + slid_to_name.insert(local_slid, qualified_instance); + + // Record mapping for function value import + param_slid_to_local.insert((arg_value.clone(), slid), local_slid); + } + } + } + // Case 2: argument is a sort path (for Sort params) + // Supports paths like: + // - "As/a" -> Instance/Sort + // - "trace/input_terminal" -> SiblingNestedInstance/Sort + // - "problem0/initial_marking/token" -> Instance/NestedInstance/Sort + else if arg_value.contains('/') { + let segments: Vec<&str> = arg_value.split('/').collect(); + + // Helper closure to import elements from a structure/theory pair + let import_elements_from_structure = | + source_structure: &Structure, + source_slid_to_name: &HashMap, + source_theory: &ElaboratedTheory, + source_sort_name: &str, + qualified_prefix: &str, + structure: &mut Structure, + name_to_slid: &mut HashMap, + slid_to_name: &mut HashMap, + param_slid_to_local: &mut HashMap<(String, Slid), Slid>, + param_name: &str, + theory: &ElaboratedTheory, + | -> ElabResult<()> { + if let Some(source_sort_id) = source_theory.theory.signature.lookup_sort(source_sort_name) { + for (&slid, elem_name) in source_slid_to_name { + let elem_sort_id = source_structure.sorts[slid.index()]; + if elem_sort_id == source_sort_id { + let local_sort_id = theory + .theory + .signature + .lookup_sort(param_name) + .ok_or_else(|| ElabError::UnknownSort(param_name.to_string()))?; + + let luid = source_structure.get_luid(slid); + let local_slid = structure.add_element_with_luid(luid, local_sort_id); + + let qualified_source = format!("{}/{}", qualified_prefix, elem_name); + + name_to_slid.insert(elem_name.clone(), local_slid); + name_to_slid.insert(qualified_source.clone(), local_slid); + slid_to_name.insert(local_slid, qualified_source.clone()); + + param_slid_to_local.insert((qualified_prefix.to_string(), slid), local_slid); + } + } + } + Ok(()) + }; + + match segments.len() { + // Case 2a: "Instance/Sort" or "Sibling/Sort" + 2 => { + let source_instance_name = segments[0]; + let source_sort_name = segments[1]; + + if let Some(source_entry) = ctx.instances.get(source_instance_name) + .or_else(|| ctx.siblings.get(source_instance_name)) { + let source_theory_name = &source_entry.theory_name; + if let Some(source_theory) = ctx.theories.get(source_theory_name) { + import_elements_from_structure( + &source_entry.structure, + &source_entry.slid_to_name, + source_theory, + source_sort_name, + source_instance_name, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } + } + // Case 2b: "Instance/NestedInstance/Sort" (e.g., "problem0/initial_marking/token") + 3 => { + let top_instance_name = segments[0]; + let nested_instance_name = segments[1]; + let source_sort_name = segments[2]; + + if let Some(top_entry) = ctx.instances.get(top_instance_name) + .or_else(|| ctx.siblings.get(top_instance_name)) { + // Find the nested structure + if let Some(nested_structure) = top_entry.structure.nested.get(nested_instance_name) { + // Use nested_meta if available for accurate name resolution + if let Some(nested_meta) = top_entry.nested_meta.get(nested_instance_name) { + if let Some(nested_theory) = ctx.theories.get(&nested_meta.theory_name) { + let qualified_prefix = format!("{}/{}", top_instance_name, nested_instance_name); + import_elements_from_structure( + nested_structure, + &nested_meta.slid_to_name, + nested_theory, + source_sort_name, + &qualified_prefix, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } else { + // Fallback: Try to infer from parent theory's instance fields + if let Some(parent_theory) = ctx.theories.get(&top_entry.theory_name) + && let Some(field_idx) = parent_theory.theory.signature.lookup_instance_field(nested_instance_name) { + let field = &parent_theory.theory.signature.instance_fields[field_idx]; + // Get the nested theory name (last word of the type) + let nested_theory_name = field.theory_type + .split_whitespace() + .last() + .unwrap_or(&field.theory_type); + + if let Some(nested_theory) = ctx.theories.get(nested_theory_name) { + // Build slid_to_name for the nested structure by scanning parent's element_names + let mut nested_slid_to_name: HashMap = HashMap::new(); + + // Check the parent's element_names for nested paths like "initial_marking/tok" + for name in top_entry.element_names.keys() { + if let Some(stripped) = name.strip_prefix(&format!("{}/", nested_instance_name)) { + // Find the corresponding slid in the nested structure + // by matching sort-local indices + for slid_idx in 0..nested_structure.len() { + let slid = Slid::from_usize(slid_idx); + if let std::collections::hash_map::Entry::Vacant(e) = nested_slid_to_name.entry(slid) { + e.insert(stripped.to_string()); + break; + } + } + } + } + + let qualified_prefix = format!("{}/{}", top_instance_name, nested_instance_name); + import_elements_from_structure( + nested_structure, + &nested_slid_to_name, + nested_theory, + source_sort_name, + &qualified_prefix, + &mut structure, + &mut name_to_slid, + &mut slid_to_name, + &mut param_slid_to_local, + param_name, + theory, + )?; + } + } + } + } + } + } + _ => { + // Unsupported path depth - silently skip + } + } + } + } + + // 3. First pass: create elements (new elements declared in this instance) + for item in &instance.body { + if let ast::InstanceItem::Element(names, sort_expr) = &item.node { + // Resolve the sort + let sort_id = resolve_instance_sort(&theory.theory.signature, sort_expr)?; + + // Add element for each name in the comma-separated list + for name in names { + // Add element to structure (returns Slid, Luid) + let (slid, _luid) = structure.add_element(ctx.universe, sort_id); + name_to_slid.insert(name.clone(), slid); + slid_to_name.insert(slid, name.clone()); + } + } + } + + // 3b. Initialize function storage now that carrier sizes are known + // Extract both domain and codomain info for each function + let func_infos: Vec = theory + .theory + .signature + .functions + .iter() + .map(|func| { + let domain = match &func.domain { + DerivedSort::Base(id) => FunctionDomainInfo::Base(*id), + DerivedSort::Product(fields) => { + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, // Nested products not supported + }) + .collect(); + FunctionDomainInfo::Product(field_sorts) + } + }; + let codomain = match &func.codomain { + DerivedSort::Base(id) => FunctionCodomainInfo::Local(*id), + DerivedSort::Product(fields) => { + let field_names: Vec = fields.iter().map(|(name, _)| name.clone()).collect(); + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, // Nested products not supported + }) + .collect(); + FunctionCodomainInfo::Product { field_names, field_sorts } + } + }; + FunctionFullInfo { domain, codomain } + }) + .collect(); + structure.init_functions_complete(&func_infos); + + // 3c. Import function values from param instances + // For each param (N, ExampleNet), for each function in param theory (src, tgt), + // import the function values using the local func name (N/src, N/tgt). + for (param_name, instance_name) in &resolved.arguments { + if let Some(param_entry) = ctx.instances.get(instance_name) { + let param_theory_name = ¶m_entry.theory_name; + if let Some(param_theory) = ctx.theories.get(param_theory_name) { + // Build parameter substitution map (same as for element import) + let type_args = collect_type_args_from_theory_type(¶m_entry.theory_type); + let param_subst = build_param_subst(param_theory, &type_args); + + // For each function in the param theory + for (param_func_id, param_func) in + param_theory.theory.signature.functions.iter().enumerate() + { + // Find the corresponding local function using the same remapping logic + let local_func_name = remap_sort_for_param_import( + ¶m_func.name, + param_name, + ¶m_subst, + &resolved.arguments, + ); + let local_func_id = match theory.theory.signature.lookup_func(&local_func_name) { + Some(id) => id, + None => { + // Function might be from a shared param and already imported + // (e.g., N/in/src when N is shared between params) + continue; + } + }; + + // For each element in the domain, copy the function value + if let DerivedSort::Base(param_domain_sort) = ¶m_func.domain { + for param_domain_slid in + param_entry.structure.carriers[*param_domain_sort].iter() + { + let param_domain_slid = Slid::from_usize(param_domain_slid as usize); + + // Get the function value in the param instance + let param_sort_local_id = + param_entry.structure.sort_local_id(param_domain_slid); + if let Some(param_value_slid) = param_entry + .structure + .get_function(param_func_id, param_sort_local_id) + { + // Map both domain and codomain slids to local + if let (Some(&local_domain_slid), Some(&local_value_slid)) = ( + param_slid_to_local + .get(&(instance_name.clone(), param_domain_slid)), + param_slid_to_local + .get(&(instance_name.clone(), param_value_slid)), + ) { + // Define the function value in the local structure + let _ = structure.define_function( + local_func_id, + local_domain_slid, + local_value_slid, + ); + } + } + } + } + } + } + } + } + + // 4. Second pass: process equations (define function values) with type checking + for item in &instance.body { + if let ast::InstanceItem::Equation(lhs, rhs) = &item.node { + // Decompose lhs: `element func_path` or `[x: a, y: b] func_path` + let decomposed = + decompose_func_app(lhs, &name_to_slid, &theory.theory.signature)?; + + match decomposed { + DecomposedFuncApp::Base { elem, func_id } => { + // Type checking: verify element sort matches function domain + let func = &theory.theory.signature.functions[func_id]; + let elem_sort_id = structure.sorts[elem.index()]; + if let DerivedSort::Base(expected_domain) = &func.domain + && elem_sort_id != *expected_domain + { + return Err(ElabError::DomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&elem) + .cloned() + .unwrap_or_else(|| format!("slid_{}", elem)), + expected_sort: theory.theory.signature.sorts[*expected_domain].clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + + // Check if codomain is a product (needs Record RHS) or base (needs element RHS) + match &func.codomain { + DerivedSort::Base(expected_codomain) => { + // Base codomain: resolve RHS to single element + let value_slid = resolve_instance_element(rhs, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + if value_sort_id != *expected_codomain { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&value_slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", value_slid)), + expected_sort: theory.theory.signature.sorts[*expected_codomain].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + // Define the function value + structure + .define_function(func_id, elem, value_slid) + .map_err(ElabError::DuplicateDefinition)?; + } + DerivedSort::Product(codomain_fields) => { + // Product codomain: RHS must be a Record + let rhs_fields = match rhs { + ast::Term::Record(fields) => fields, + _ => return Err(ElabError::UnsupportedFeature(format!( + "function {} has product codomain, RHS must be a record literal like [field1: v1, field2: v2], got {:?}", + func.name, rhs + ))), + }; + + // Resolve each field value and type-check + let mut codomain_values: Vec<(&str, Slid)> = Vec::with_capacity(rhs_fields.len()); + for (field_name, field_term) in rhs_fields { + // Find the expected sort for this field + let expected_sort = codomain_fields.iter() + .find(|(name, _)| name == field_name) + .ok_or_else(|| ElabError::UnsupportedFeature(format!( + "unknown field '{}' in codomain of function {} (expected: {:?})", + field_name, func.name, + codomain_fields.iter().map(|(n, _)| n).collect::>() + )))?; + + let value_slid = resolve_instance_element(field_term, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + + if let DerivedSort::Base(expected_sort_id) = &expected_sort.1 + && value_sort_id != *expected_sort_id { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: format!("field '{}': {}", field_name, + slid_to_name.get(&value_slid).cloned().unwrap_or_else(|| format!("slid_{}", value_slid))), + expected_sort: theory.theory.signature.sorts[*expected_sort_id].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + + codomain_values.push((field_name.as_str(), value_slid)); + } + + // Define the product codomain function value + structure + .define_function_product_codomain(func_id, elem, &codomain_values) + .map_err(ElabError::DuplicateDefinition)?; + } + } + } + + DecomposedFuncApp::Product { tuple, func_id } => { + let func = &theory.theory.signature.functions[func_id]; + + // Type checking: verify tuple elements match product domain fields + if let DerivedSort::Product(domain_fields) = &func.domain { + if tuple.len() != domain_fields.len() { + return Err(ElabError::UnsupportedFeature(format!( + "product domain arity mismatch: expected {}, got {}", + domain_fields.len(), + tuple.len() + ))); + } + + for (slid, (field_name, field_sort)) in tuple.iter().zip(domain_fields.iter()) { + let elem_sort_id = structure.sorts[slid.index()]; + if let DerivedSort::Base(expected_sort) = field_sort + && elem_sort_id != *expected_sort { + return Err(ElabError::DomainMismatch { + func_name: func.name.clone(), + element_name: format!( + "field {} ({})", + field_name, + slid_to_name + .get(slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)) + ), + expected_sort: theory.theory.signature.sorts[*expected_sort] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id] + .clone(), + }); + } + } + } else { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product LHS but non-product domain {:?}", + func.name, func.domain + ))); + } + + // Handle codomain: base vs product + match &func.codomain { + DerivedSort::Base(expected_codomain) => { + // Resolve RHS to single element + let value_slid = resolve_instance_element(rhs, &name_to_slid)?; + let value_sort_id = structure.sorts[value_slid.index()]; + if value_sort_id != *expected_codomain { + return Err(ElabError::CodomainMismatch { + func_name: func.name.clone(), + element_name: slid_to_name + .get(&value_slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", value_slid)), + expected_sort: theory.theory.signature.sorts[*expected_codomain].clone(), + actual_sort: theory.theory.signature.sorts[value_sort_id].clone(), + }); + } + // Define the function value for product domain + structure + .define_function_product(func_id, &tuple, value_slid) + .map_err(ElabError::DuplicateDefinition)?; + } + DerivedSort::Product(_) => { + // Product domain with product codomain: not yet supported + return Err(ElabError::UnsupportedFeature(format!( + "function {} has both product domain and product codomain (not yet supported)", + func.name + ))); + } + } + } + } + } + } + + // 5. Third pass: relation assertions (assert relation tuples) + for item in &instance.body { + if let ast::InstanceItem::RelationAssertion(term, rel_name) = &item.node { + // Find the relation in the signature + let rel_id = theory + .theory + .signature + .lookup_rel(rel_name) + .ok_or_else(|| ElabError::UnknownRel(rel_name.clone()))?; + + let rel = &theory.theory.signature.relations[rel_id]; + + // Build the tuple of Slids from the term + let domain = &rel.domain; + let tuple = match (term, domain) { + // Unary relation with simple element: `element relation;` + (ast::Term::Path(_), DerivedSort::Product(expected_fields)) + if expected_fields.len() == 1 => + { + let slid = resolve_instance_element(term, &name_to_slid)?; + + // Type check + let elem_sort_id = structure.sorts[slid.index()]; + if let &DerivedSort::Base(expected_sort_id) = &expected_fields[0].1 + && elem_sort_id != expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[expected_sort_id] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + vec![slid] + } + + // Multi-ary relation with record: `[field: value, ...] relation;` + (ast::Term::Record(fields), DerivedSort::Product(expected_fields)) => { + if fields.len() != expected_fields.len() { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} expects {} fields, got {}", + rel_name, + expected_fields.len(), + fields.len() + ))); + } + + // Build tuple in the correct field order + // Supports both named fields and positional fields: + // - Named: `[item: a, on: b]` matches by field name + // - Positional: `[a, b]` maps "0" to first field, "1" to second, etc. + // - Mixed: `[a, on: b]` uses position for "0", name for "on" + let mut tuple = Vec::with_capacity(expected_fields.len()); + for (idx, (expected_name, expected_sort)) in expected_fields.iter().enumerate() + { + let field_value = fields + .iter() + .find(|(name, _)| { + // Positional fields (named "0", "1", etc.) match by index + if let Ok(pos_idx) = name.parse::() { + pos_idx == idx + } else { + // Named fields match by name + name == expected_name.as_str() + } + }) + .ok_or_else(|| { + ElabError::UnsupportedFeature(format!( + "missing field '{}' (position {}) in relation assertion", + expected_name, idx + )) + })?; + + // Resolve the field value to a Slid + let slid = resolve_instance_element(&field_value.1, &name_to_slid)?; + + // Type check: verify element sort matches field sort + let elem_sort_id = structure.sorts[slid.index()]; + if let &DerivedSort::Base(expected_sort_id) = expected_sort + && elem_sort_id != expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[expected_sort_id] + .clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + + tuple.push(slid); + } + tuple + } + + // Unary relation with base sort domain: `element relation;` + // This handles `rel : Sort -> Prop` (without bracket syntax) + (ast::Term::Path(_), DerivedSort::Base(expected_sort_id)) => { + let slid = resolve_instance_element(term, &name_to_slid)?; + + // Type check + let elem_sort_id = structure.sorts[slid.index()]; + if elem_sort_id != *expected_sort_id { + return Err(ElabError::DomainMismatch { + func_name: rel_name.clone(), + element_name: slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("slid_{}", slid)), + expected_sort: theory.theory.signature.sorts[*expected_sort_id].clone(), + actual_sort: theory.theory.signature.sorts[elem_sort_id].clone(), + }); + } + vec![slid] + } + + // Mismatch: using simple element for non-unary relation + (ast::Term::Path(_), DerivedSort::Product(expected_fields)) => { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} has {} fields, use record syntax [field: value, ...]", + rel_name, + expected_fields.len() + ))); + } + + _ => { + return Err(ElabError::UnsupportedFeature(format!( + "relation {} has non-product domain {:?}", + rel_name, domain + ))); + } + }; + + // Assert the relation tuple + structure.assert_relation(rel_id, tuple); + } + } + + // 6. Fourth pass: nested instances + // For each nested instance like `initial_marking = { t : Token; ... };` + // 1. Find the instance field declaration in the theory + // 2. Resolve its theory type (e.g., "N Marking") with parameter substitution + // 3. Recursively elaborate the nested instance body + // 4. Store in the parent structure's `nested` HashMap + for item in &instance.body { + if let ast::InstanceItem::NestedInstance(field_name, nested_decl) = &item.node { + // 1. Look up the instance field in the parent theory signature + let field_idx = theory + .theory + .signature + .lookup_instance_field(field_name) + .ok_or_else(|| { + ElabError::UnknownVariable(format!("nested instance field: {}", field_name)) + })?; + + let instance_field = &theory.theory.signature.instance_fields[field_idx]; + + // 2. Resolve the theory type with parameter substitution + // The theory_type string can be like: + // - "N Marking" -> simple param at start + // - "(trace/input_terminal) (RP/initial_marking/token) Iso" -> params in paths + // We need to substitute parameter names in paths, handling: + // - Exact matches: "N" -> "ExampleNet" + // - Path prefixes: "RP/initial_marking/token" -> "problem0/initial_marking/token" + let resolved_theory_type = { + let mut result = instance_field.theory_type.clone(); + for (param_name, actual_instance_name) in &resolved.arguments { + // Replace param at path start: "RP/..." -> "problem0/..." + let path_pattern = format!("{}/", param_name); + let path_replacement = format!("{}/", actual_instance_name); + result = result.replace(&path_pattern, &path_replacement); + + // Replace exact param (word boundary): only if surrounded by non-alphanumeric + // Split by whitespace and handle each token + let parts: Vec = result + .split_whitespace() + .map(|p| { + // Strip parens for comparison + let stripped = p.trim_start_matches('(').trim_end_matches(')'); + if stripped == param_name { + // Replace the param name, keeping any parens + let prefix = if p.starts_with('(') { "(" } else { "" }; + let suffix = if p.ends_with(')') { ")" } else { "" }; + format!("{}{}{}", prefix, actual_instance_name, suffix) + } else { + p.to_string() + } + }) + .collect(); + result = parts.join(" "); + } + result + }; + + // 3. Find the resolved theory + // Parse the resolved type string to get the theory name + // For "ExampleNet Marking", we need to get the "Marking" theory + let nested_theory_name = resolved_theory_type + .split_whitespace() + .last() + .unwrap_or(&resolved_theory_type) + .to_string(); + + let nested_theory = ctx.theories.get(&nested_theory_name).ok_or_else(|| { + ElabError::UnknownTheory(format!( + "nested instance theory: {} (from field type: {})", + nested_theory_name, instance_field.theory_type + )) + })?; + + // 4. Create a new instance declaration with the resolved type + // Build the type expression from the resolved string + let nested_instance_decl = ast::InstanceDecl { + theory: parse_type_expr_from_string(&resolved_theory_type)?, + name: format!("{}_{}", instance.name, field_name), + body: nested_decl.body.clone(), + needs_chase: false, // Nested instances don't separately chase + }; + + // 5. Recursively elaborate the nested instance + let nested_result = elaborate_instance_ctx(ctx, &nested_instance_decl)?; + + // 6. Store the nested structure using the field name as the key + structure.nested.insert(field_name.clone(), nested_result.structure.clone()); + + // 7. Add this nested instance to siblings for cross-referencing by subsequent nested instances + // e.g., after elaborating `trace = {...}`, make it available so `initial_iso` can reference `trace/it` + let sibling_entry = InstanceEntry { + structure: nested_result.structure.clone(), + theory_name: nested_theory_name.clone(), + theory_type: resolved_theory_type.clone(), + element_names: nested_result.name_to_slid.clone(), + slid_to_name: nested_result.slid_to_name.clone(), + nested_meta: nested_result.nested_meta.clone(), + }; + ctx.siblings.insert(field_name.clone(), sibling_entry); + + // 8. Record nested metadata for inclusion in elaboration result + nested_meta.insert(field_name.clone(), NestedInstanceMeta { + theory_name: nested_theory_name.clone(), + name_to_slid: nested_result.name_to_slid, + slid_to_name: nested_result.slid_to_name, + }); + + // Suppress unused variable warning + let _ = nested_theory; // Used for type checking (could add validation later) + } + } + + // 6. Validate totality: all functions must be defined on all elements of their domain + // Skip this check when creating instances for chase (which will fill in missing values) + if !skip_totality { + validate_totality(&structure, &theory.theory.signature, &slid_to_name)?; + } + + // 7. Run chase if requested (fills in missing values according to axioms) + if instance.needs_chase { + const MAX_CHASE_ITERATIONS: usize = 1000; + // Chase now uses tensor system for premise evaluation - handles existentials, etc. + chase_fixpoint( + &theory.theory.axioms, + &mut structure, + ctx.universe, + &theory.theory.signature, + MAX_CHASE_ITERATIONS, + ) + .map_err(|e| ElabError::ChaseFailed(e.to_string()))?; + } + + // 8. Check axioms - all instances must satisfy the theory's axioms + let axioms: Vec<_> = theory.theory.axioms.clone(); + let violations = check_theory_axioms(&axioms, &structure, &theory.theory.signature); + + if !violations.is_empty() { + // Report the first violation with detailed counterexamples + let (axiom_idx, violation_list) = &violations[0]; + + // Get the actual axiom name from the theory + let axiom_name = theory + .theory + .axiom_names + .get(*axiom_idx) + .cloned(); + + // Build counterexamples with element names (limit to 5 for readability) + let axiom = &theory.theory.axioms[*axiom_idx]; + let counterexamples: Vec = violation_list + .iter() + .take(5) + .map(|v| { + let bindings: Vec<(String, String)> = v + .variable_names + .iter() + .zip(&v.assignment) + .map(|(var_name, &idx)| { + // Look up the variable's sort from the axiom context by name + let elem_name = axiom + .context + .vars + .iter() + .find(|(name, _)| name == var_name) + .and_then(|(_, sort)| { + // Get the sort id (assuming DerivedSort::Base for now) + if let DerivedSort::Base(sort_id) = sort { + // Get the Slid at index idx from the carrier (RoaringTreemap) + structure.carriers.get(*sort_id).and_then(|carrier| { + // Iterate to the idx-th element + carrier.iter().nth(idx).map(|slid_u64| { + let slid = Slid::from_usize(slid_u64 as usize); + slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("#{}", idx)) + }) + }) + } else { + None + } + }) + .unwrap_or_else(|| format!("#{}", idx)); + (var_name.clone(), elem_name) + }) + .collect(); + CounterExample { bindings } + }) + .collect(); + + return Err(ElabError::AxiomViolation { + axiom_index: *axiom_idx, + axiom_name, + num_violations: violation_list.len(), + counterexamples, + }); + } + + Ok(InstanceElaborationResult { + structure, + slid_to_name, + name_to_slid, + nested_meta, + }) +} + +// ============================================================================ +// HELPER TYPES AND FUNCTIONS +// ============================================================================ + +/// Result of resolving a (possibly parameterized) instance type. +/// +/// For `ExampleNet ReachabilityProblem`: +/// - theory_name = "ReachabilityProblem" +/// - arguments = vec![("N", "ExampleNet")] +/// +/// For simple `PetriNet`: +/// - theory_name = "PetriNet" +/// - arguments = vec![] +struct ResolvedInstanceType { + theory_name: String, + /// (param_name, instance_name) pairs + arguments: Vec<(String, String)>, +} + +/// Resolve a type expression to a theory name and its arguments. +/// +/// In curried application syntax, the theory is at the end: +/// - Simple: `PetriNet` -> ("PetriNet", []) +/// - Single param: `ExampleNet Marking` -> ("Marking", [("N", "ExampleNet")]) +/// - Multiple params: `ExampleNet problem0 ReachabilityProblem/Solution` -> ("ReachabilityProblem/Solution", [("N", "ExampleNet"), ("RP", "problem0")]) +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// The last path token is the theory name, earlier ones are arguments. +fn resolve_instance_type(env: &Env, ty: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // Collect all path tokens (the theory and its arguments) + let paths: Vec = ty + .tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + if paths.is_empty() { + return Err(ElabError::TypeExprError( + "no theory name in type expression".to_string(), + )); + } + + // Last path is the theory name + let theory_name = paths.last().unwrap().clone(); + + // Earlier paths are arguments (in order) + let args: Vec = paths[..paths.len() - 1].to_vec(); + + // Look up the theory to get parameter names + let theory = env + .theories + .get(&theory_name) + .ok_or_else(|| ElabError::UnknownTheory(theory_name.clone()))?; + + // Match up arguments with parameters + if args.len() != theory.params.len() { + return Err(ElabError::NotEnoughArgs { + name: theory_name, + expected: theory.params.len(), + got: args.len(), + }); + } + + let arguments: Vec<(String, String)> = theory + .params + .iter() + .zip(args.iter()) + .map(|(param, arg)| (param.name.clone(), arg.clone())) + .collect(); + + Ok(ResolvedInstanceType { + theory_name, + arguments, + }) +} + +/// Resolve a sort expression within an instance (using the theory's signature) +fn resolve_instance_sort(sig: &Signature, sort_expr: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // For sort expressions, we expect a single path token + if let Some(path) = sort_expr.as_single_path() { + let name = path.to_string(); + sig.lookup_sort(&name) + .ok_or(ElabError::UnknownSort(name)) + } else { + // Check if there's any path token at all + for token in &sort_expr.tokens { + if let TypeToken::Path(path) = token { + let name = path.to_string(); + return sig + .lookup_sort(&name) + .ok_or(ElabError::UnknownSort(name)); + } + } + Err(ElabError::TypeExprError(format!( + "no path in sort expression: {:?}", + sort_expr + ))) + } +} + +/// Result of decomposing a function application's LHS +enum DecomposedFuncApp { + /// Base domain: `element func` → single element + Base { elem: Slid, func_id: FuncId }, + /// Product domain: `[x: a, y: b] func` → tuple of elements + Product { tuple: Vec, func_id: FuncId }, +} + +/// Decompose a function application term like `ab_in in/src` or `[x: u, y: u] mul` +/// Returns either Base (single element) or Product (tuple of elements) with func_id +fn decompose_func_app( + term: &ast::Term, + name_to_slid: &HashMap, + sig: &Signature, +) -> ElabResult { + match term { + ast::Term::App(base, func) => { + // func should be a function path + let func_id = match func.as_ref() { + ast::Term::Path(path) => { + let func_name = path.to_string(); + sig.lookup_func(&func_name) + .ok_or(ElabError::UnknownFunction(func_name)) + } + _ => Err(ElabError::NotAFunction(format!("{:?}", func))), + }?; + + // base can be either: + // - a single element name (base domain) + // - a record like [x: a, y: b] (product domain) + match base.as_ref() { + ast::Term::Record(fields) => { + // Product domain: [x: a, y: b] func + let tuple: Vec = fields + .iter() + .map(|(_, term)| resolve_instance_element(term, name_to_slid)) + .collect::>>()?; + Ok(DecomposedFuncApp::Product { tuple, func_id }) + } + _ => { + // Base domain: element func + let elem_slid = resolve_instance_element(base, name_to_slid)?; + Ok(DecomposedFuncApp::Base { + elem: elem_slid, + func_id, + }) + } + } + } + _ => Err(ElabError::NotAFunction(format!( + "expected function application, got {:?}", + term + ))), + } +} + +/// Resolve a term to an element Slid +/// +/// Handles both simple names ("v1") and qualified paths ("ExampleNet/t1"). +/// For multi-segment paths, joins with "/" and looks up in name_to_slid. +fn resolve_instance_element( + term: &ast::Term, + name_to_slid: &HashMap, +) -> ElabResult { + match term { + ast::Term::Path(path) => { + // Join all segments with "/" for lookup + // This handles both "v1" and "ExampleNet/t1" + let name = path.segments.join("/"); + name_to_slid + .get(&name) + .copied() + .ok_or(ElabError::UnknownVariable(name)) + } + _ => Err(ElabError::UnsupportedFeature(format!( + "complex element reference: {:?}", + term + ))), + } +} + +/// Check that all functions in the structure are total (defined on every element of their domain) +fn validate_totality( + structure: &Structure, + sig: &Signature, + slid_to_name: &HashMap, +) -> ElabResult<()> { + use crate::core::FunctionColumn; + + for (func_id, func_sym) in sig.functions.iter().enumerate() { + let mut missing = Vec::new(); + let func_col = &structure.functions[func_id]; + + match (&func_sym.domain, func_col) { + // Base domain with local codomain + (DerivedSort::Base(domain_sort_id), FunctionColumn::Local(col)) => { + for (sort_slid, opt_slid) in col.iter().enumerate() { + if opt_slid.is_none() { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + missing.push(name); + } + } + } + + // Base domain with external codomain + (DerivedSort::Base(domain_sort_id), FunctionColumn::External(col)) => { + for (sort_slid, opt_luid) in col.iter().enumerate() { + if opt_luid.is_none() { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + missing.push(name); + } + } + } + + // Product domain: check all tuples in the cartesian product + (DerivedSort::Product(fields), FunctionColumn::ProductLocal { storage, .. }) => { + // Collect carriers for each field + let field_carriers: Vec> = fields + .iter() + .map(|(_, ds)| match ds { + DerivedSort::Base(sort_id) => structure.carriers[*sort_id] + .iter() + .map(|s| Slid::from_usize(s as usize)) + .collect(), + DerivedSort::Product(_) => { + // Nested products not yet supported + vec![] + } + }) + .collect(); + + // Enumerate all tuples via cartesian product + for tuple in cartesian_product(&field_carriers) { + // Convert Slids to sort-local indices for storage lookup + let local_indices: Vec = tuple + .iter() + .map(|slid| structure.sort_local_id(*slid).index()) + .collect(); + + if storage.get(&local_indices).is_none() { + // Format the missing tuple nicely + let tuple_str: Vec = tuple + .iter() + .zip(fields.iter()) + .map(|(slid, (field_name, _))| { + let elem_name = slid_to_name + .get(slid) + .cloned() + .unwrap_or_else(|| format!("#{}", slid)); + format!("{}: {}", field_name, elem_name) + }) + .collect(); + missing.push(format!("[{}]", tuple_str.join(", "))); + } + } + } + + // Base domain with product codomain: check all field columns + (DerivedSort::Base(domain_sort_id), FunctionColumn::ProductCodomain { field_columns, field_names, .. }) => { + // For product codomains, a value is defined if ALL fields are defined + let carrier_size = structure.carrier_size(*domain_sort_id); + for sort_slid in 0..carrier_size { + // Check if any field is undefined for this element + let all_defined = field_columns.iter().all(|col| { + col.get(sort_slid) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_some() + }); + if !all_defined { + let slid = Slid::from_usize( + structure.carriers[*domain_sort_id] + .select(sort_slid as u64) + .expect("sort_slid should be valid") as usize, + ); + let name = slid_to_name + .get(&slid) + .cloned() + .unwrap_or_else(|| format!("element#{}", slid)); + // Find which fields are missing + let missing_fields: Vec<_> = field_columns.iter() + .zip(field_names.iter()) + .filter(|(col, _)| { + col.get(sort_slid) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_none() + }) + .map(|(_, name)| name.as_str()) + .collect(); + missing.push(format!("{} (fields: {:?})", name, missing_fields)); + } + } + } + + // Mismatched domain/column types (shouldn't happen if init is correct) + (DerivedSort::Base(_), FunctionColumn::ProductLocal { .. }) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has base domain but product storage", + func_sym.name + ))); + } + (DerivedSort::Product(_), FunctionColumn::Local(_) | FunctionColumn::External(_)) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product domain but columnar storage", + func_sym.name + ))); + } + (DerivedSort::Product(_), FunctionColumn::ProductCodomain { .. }) => { + return Err(ElabError::UnsupportedFeature(format!( + "function {} has product domain with product codomain (not yet supported)", + func_sym.name + ))); + } + } + + if !missing.is_empty() { + return Err(ElabError::PartialFunction { + func_name: func_sym.name.clone(), + missing_elements: missing, + }); + } + } + + Ok(()) +} + +/// Generate cartesian product of vectors +fn cartesian_product(sets: &[Vec]) -> Vec> { + if sets.is_empty() { + return vec![vec![]]; // Single empty tuple for nullary product + } + + let mut result = vec![vec![]]; + for set in sets { + let mut new_result = Vec::new(); + for tuple in &result { + for &elem in set { + let mut new_tuple = tuple.clone(); + new_tuple.push(elem); + new_result.push(new_tuple); + } + } + result = new_result; + } + result +} + +/// Parse a simple type expression from a string like "ExampleNet Marking" +/// +/// With concatenative parsing, this just creates a flat list of path tokens. +fn parse_type_expr_from_string(s: &str) -> ElabResult { + use crate::ast::TypeToken; + + let tokens: Vec<&str> = s.split_whitespace().collect(); + + if tokens.is_empty() { + return Err(ElabError::TypeExprError( + "empty type expression".to_string(), + )); + } + + // Simply create a TypeToken::Path for each token + let type_tokens: Vec = tokens + .iter() + .map(|&t| TypeToken::Path(ast::Path::single(t.to_string()))) + .collect(); + + Ok(ast::TypeExpr { tokens: type_tokens }) +} diff --git a/src/elaborate/mod.rs b/src/elaborate/mod.rs new file mode 100644 index 0000000..f8a5976 --- /dev/null +++ b/src/elaborate/mod.rs @@ -0,0 +1,17 @@ +//! Elaboration: surface syntax → typed core representation +//! +//! This module transforms the untyped AST into the typed core representation, +//! performing name resolution and type checking along the way. + +mod env; +mod error; +mod instance; +mod theory; +pub mod types; + +// Re-export main types and functions +pub use env::{elaborate_formula, elaborate_term, elaborate_type, Env}; +pub use error::{ElabError, ElabResult}; +pub use instance::{ElaborationContext, InstanceElaborationResult, InstanceEntry, elaborate_instance_ctx, elaborate_instance_ctx_partial}; +pub use theory::elaborate_theory; +pub use types::{eval_type_expr, TypeValue}; diff --git a/src/elaborate/theory.rs b/src/elaborate/theory.rs new file mode 100644 index 0000000..fde2549 --- /dev/null +++ b/src/elaborate/theory.rs @@ -0,0 +1,739 @@ +//! Theory elaboration. + +use std::collections::HashMap; + +use crate::ast; +use crate::core::*; + +use super::env::{elaborate_formula, elaborate_type, remap_derived_sort, Env}; +use super::error::{ElabError, ElabResult}; + +/// Elaborate a theory declaration +pub fn elaborate_theory(env: &mut Env, theory: &ast::TheoryDecl) -> ElabResult { + // Set up the environment for this theory + let mut local_env = env.clone(); + local_env.current_theory = Some(theory.name.clone()); + local_env.signature = Signature::new(); + + // Track extended theories for transitive closure semantics + let mut extends_chain: Vec = Vec::new(); + + // Process extends clause (if any) + // This is like a parameter, but: + // 1. Uses the parent theory name as the qualifier (e.g., GeologMeta/Srt) + // 2. Establishes an "is-a" relationship with transitive closure + // + // For transitive extends (A extends B extends C), we use "requalified" semantics: + // - Sorts/funcs already qualified (from grandparents) keep their original qualifier + // - Only unqualified items (parent's own) get the parent prefix + // This gives A: { C/X, C/Y, B/Foo } rather than { B/C/X, B/C/Y, B/Foo } + if let Some(ref parent_path) = theory.extends { + let parent_name = parent_path.segments.join("/"); + if let Some(parent_theory) = env.theories.get(&parent_name) { + // Record the extends relationship (including transitive parents) + extends_chain.push(parent_name.clone()); + + // Helper: check if a name is already qualified from a grandparent + // A name like "Grandparent/X" is grandparent-qualified if "Grandparent" is NOT + // a sort in the parent theory (i.e., it's a theory name, not a naming convention). + // Names like "Func/dom" where "Func" IS a sort use '/' as naming convention. + let is_grandparent_qualified = |name: &str| -> bool { + if let Some((prefix, _)) = name.split_once('/') { + // If the prefix is a sort in parent, it's naming convention, not grandparent + parent_theory.theory.signature.lookup_sort(prefix).is_none() + } else { + false + } + }; + + // Helper: qualify a name - only prefix if not already qualified from grandparent + let qualify = |name: &str| -> String { + if is_grandparent_qualified(name) { + // Already qualified from grandparent - keep as-is + name.to_string() + } else { + // Parent's own item (possibly with naming convention '/') - add parent prefix + format!("{}/{}", parent_name, name) + } + }; + + // Copy all sorts with requalified names + for sort_name in &parent_theory.theory.signature.sorts { + let qualified_name = qualify(sort_name); + local_env.signature.add_sort(qualified_name); + } + + // Copy all functions with requalified names + for func in &parent_theory.theory.signature.functions { + let qualified_name = qualify(&func.name); + // For domain/codomain remapping, always use parent_name because + // the source signature uses the parent's namespace. The + // preserve_existing_prefix flag handles grandparent-qualified sorts. + let domain = remap_derived_sort( + &func.domain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + let codomain = remap_derived_sort( + &func.codomain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + local_env + .signature + .add_function(qualified_name, domain, codomain); + } + + // Copy all relations with requalified names + for rel in &parent_theory.theory.signature.relations { + let qualified_name = qualify(&rel.name); + // Same as functions: always use parent_name for remapping + let domain = remap_derived_sort( + &rel.domain, + &parent_theory.theory.signature, + &local_env.signature, + &parent_name, + true, // preserve_existing_prefix for extends + ); + local_env.signature.add_relation(qualified_name, domain); + } + + // Note: axioms are inherited but we don't copy them yet + // (they reference the parent's sort/func IDs which need remapping) + } else { + return Err(ElabError::UnknownTheory(parent_name)); + } + } + + // Process parameters + // When we have `theory (N : PetriNet instance) Trace { ... }`, we need to: + // 1. Copy all sorts from PetriNet into local signature with qualified names (N/P, N/T, etc.) + // 2. Copy all functions with qualified names (N/in/src, etc.) + // This ensures all sort/func IDs are in a single namespace. + let mut params = Vec::new(); + for param in &theory.params { + // "T instance" parameters — the theory depends on an instance of another theory + if param.ty.is_instance() { + let inner = param.ty.instance_inner().unwrap(); + // Handle both simple (PetriNet instance) and parameterized (N ReachabilityProblem instance) cases + let theory_name = extract_theory_name(&inner)?; + if let Some(base_theory) = env.theories.get(&theory_name) { + // Build mapping from base_theory's instance params to our type args + // For `RP : N ReachabilityProblem instance`: + // - collect_type_args returns ["N"] (all paths except the theory name) + // - base_theory.params = [("N", "PetriNet")] + // - mapping = {"N" -> "N"} + let mut type_args = Vec::new(); + collect_type_args(&inner, &mut type_args); + + // Build param substitution map: base_theory param name -> our type arg value + let mut param_subst: HashMap = HashMap::new(); + for (bp, arg) in base_theory.params.iter().zip(type_args.iter()) { + if bp.theory_name != "Sort" { + // Instance param - map its name to the type arg + param_subst.insert(bp.name.clone(), arg.clone()); + } + } + + // Copy all sorts from param theory into local signature + // But for sorts that come from a param that we're binding to an outer param, + // reuse the outer param's sort instead of creating a duplicate. + for sort_name in &base_theory.theory.signature.sorts { + // Check if this sort starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = sort_name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + // This sort is from a param we're binding - use the substituted prefix + let substituted_name = format!("{}/{}", subst, suffix); + // If this sort already exists (from an outer param), don't add it again + if local_env.signature.lookup_sort(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + // Not from a substituted param - prefix with our param name + format!("{}/{}", param.name, sort_name) + } + } else { + // Unqualified sort (the theory's own sort) - prefix with our param name + format!("{}/{}", param.name, sort_name) + }; + local_env.signature.add_sort(qualified_name); + } + + // Copy all functions from param theory with qualified names + for func in &base_theory.theory.signature.functions { + // Check if this function starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = func.name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + // This func is from a param we're binding - use the substituted prefix + let substituted_name = format!("{}/{}", subst, suffix); + // If this function already exists (from an outer param), don't add it again + if local_env.signature.lookup_func(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + // Not from a substituted param - prefix with our param name + format!("{}/{}", param.name, func.name) + } + } else { + // Unqualified func - prefix with our param name + format!("{}/{}", param.name, func.name) + }; + // Remap domain and codomain to use local signature's sort IDs + // We need to handle substitution for sorts too + let domain = remap_derived_sort_with_subst( + &func.domain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + let codomain = remap_derived_sort_with_subst( + &func.codomain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + local_env + .signature + .add_function(qualified_name, domain, codomain); + } + + // Copy all relations from param theory with qualified names + for rel in &base_theory.theory.signature.relations { + // Check if this relation starts with a param name that we're substituting + let qualified_name = if let Some((prefix, suffix)) = rel.name.split_once('/') { + if let Some(subst) = param_subst.get(prefix) { + let substituted_name = format!("{}/{}", subst, suffix); + if local_env.signature.lookup_rel(&substituted_name).is_some() { + continue; + } + substituted_name + } else { + format!("{}/{}", param.name, rel.name) + } + } else { + format!("{}/{}", param.name, rel.name) + }; + let domain = remap_derived_sort_with_subst( + &rel.domain, + &base_theory.theory.signature, + &local_env.signature, + ¶m.name, + ¶m_subst, + ); + local_env.signature.add_relation(qualified_name, domain); + } + + // NOTE: Instance field content (sorts/functions) is already included in + // base_theory.theory.signature because it was added when that theory + // was elaborated. We don't need to process instance fields again here. + + params.push(TheoryParam { + name: param.name.clone(), + theory_name: theory_name.clone(), + }); + local_env + .params + .push((param.name.clone(), base_theory.clone())); + } else { + return Err(ElabError::UnknownTheory(theory_name)); + } + } else if param.ty.is_sort() { + // "Sort" parameters — the theory is parameterized over a sort + // Add the parameter as a sort in the local signature + local_env.signature.add_sort(param.name.clone()); + // Also record it as a "sort parameter" for the theory + params.push(TheoryParam { + name: param.name.clone(), + theory_name: "Sort".to_string(), // Special marker + }); + } else { + return Err(ElabError::UnsupportedFeature(format!( + "parameter type {:?}", + param.ty + ))); + } + } + + // First pass: collect all sorts + for item in &theory.body { + if let ast::TheoryItem::Sort(name) = &item.node { + local_env.signature.add_sort(name.clone()); + } + } + + // Second pass: collect all functions and relations + for item in &theory.body { + match &item.node { + ast::TheoryItem::Function(f) => { + // Check if codomain is Prop — if so, this is a relation declaration + if f.codomain.is_prop() { + let domain = elaborate_type(&local_env, &f.domain)?; + local_env + .signature + .add_relation(f.name.to_string(), domain); + } else { + let domain = elaborate_type(&local_env, &f.domain)?; + let codomain = elaborate_type(&local_env, &f.codomain)?; + local_env + .signature + .add_function(f.name.to_string(), domain, codomain); + } + } + // Legacy: A Field with a Record type is a relation declaration + // (kept for backwards compatibility, may remove later) + ast::TheoryItem::Field(name, ty) if ty.as_record().is_some() => { + let domain = elaborate_type(&local_env, ty)?; + local_env.signature.add_relation(name.clone(), domain); + } + // Instance-typed field declarations (nested instances) + // e.g., `initial_marking : N Marking instance;` + ast::TheoryItem::Field(name, ty) if ty.is_instance() => { + let inner = ty.instance_inner().unwrap(); + // Store the theory type expression as a string + let theory_type_str = format_type_expr(&inner); + local_env + .signature + .add_instance_field(name.clone(), theory_type_str.clone()); + + // Also add the content (sorts, functions) from the field's theory + // This enables accessing things like iso/fwd when we have `iso : X Y Iso instance` + if let Ok(field_theory_name) = extract_theory_name(&inner) + && let Some(field_theory) = env.theories.get(&field_theory_name) { + let field_prefix = name.clone(); + + // Build a mapping from source sort names to target sort names + // - Sort parameters get substituted from type expression args + // - Instance param sorts (e.g., "N/P") map to local sorts with same name + // - Local sorts (e.g., "Token") get prefixed with field name + let sort_param_map = collect_sort_params(&inner, field_theory); + + // First, add any non-param sorts from the field's theory with prefix + for sort_name in &field_theory.theory.signature.sorts { + // Skip sorts that came from instance params (already qualified) + if sort_name.contains('/') { + continue; + } + // Skip Sort parameters (will be substituted) + let is_sort_param = field_theory + .params + .iter() + .any(|p| p.theory_name == "Sort" && p.name == *sort_name); + if is_sort_param { + continue; + } + // Add as prefixed sort + let qualified_name = format!("{}/{}", field_prefix, sort_name); + local_env.signature.add_sort(qualified_name); + } + + // Add functions from the field's theory + for func in &field_theory.theory.signature.functions { + // Skip functions that came from instance params (prefix matches param name) + // But keep naming-convention functions like "input_terminal/of" + let is_from_param = if let Some(prefix) = func.name.split('/').next() { + field_theory.params.iter().any(|p| p.name == prefix) + } else { + false + }; + if is_from_param { + continue; + } + let qualified_name = format!("{}/{}", field_prefix, func.name); + let domain = remap_for_instance_field( + &func.domain, + &field_theory.theory.signature, + &local_env.signature, + &sort_param_map, + &field_prefix, + ); + let codomain = remap_for_instance_field( + &func.codomain, + &field_theory.theory.signature, + &local_env.signature, + &sort_param_map, + &field_prefix, + ); + if let (Some(d), Some(c)) = (domain, codomain) { + local_env.signature.add_function(qualified_name, d, c); + } + } + } + } + _ => {} + } + } + + // Third pass: elaborate axioms + let mut axioms = Vec::new(); + let mut axiom_names = Vec::new(); + for item in &theory.body { + if let ast::TheoryItem::Axiom(ax) = &item.node { + // Build context from quantified variables + let mut ctx = Context::new(); + for qv in &ax.quantified { + let sort = elaborate_type(&local_env, &qv.ty)?; + for name in &qv.names { + ctx = ctx.extend(name.clone(), sort.clone()); + } + } + + // Elaborate hypothesis (conjunction of all hypotheses) + let premise = if ax.hypotheses.is_empty() { + Formula::True + } else { + let hyps: Result, _> = ax + .hypotheses + .iter() + .map(|h| elaborate_formula(&local_env, &ctx, h)) + .collect(); + Formula::Conj(hyps?) + }; + + // Elaborate conclusion + let conclusion = elaborate_formula(&local_env, &ctx, &ax.conclusion)?; + + // Collect axiom name (e.g., "ax/input_complete") + axiom_names.push(ax.name.to_string()); + + axioms.push(Sequent { + context: ctx, + premise, + conclusion, + }); + } + } + + Ok(ElaboratedTheory { + params, + theory: Theory { + name: theory.name.clone(), + signature: local_env.signature, + axioms, + axiom_names, + }, + }) +} + +/// Remap a DerivedSort for an instance-typed field in a theory body. +/// Handles both Sort parameters (substituted from type args) and instance param sorts. +fn remap_for_instance_field( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + sort_param_map: &HashMap, + field_prefix: &str, +) -> Option { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + + // Check Sort parameter substitution (e.g., X -> RP/initial/Token) + if let Some(replacement) = sort_param_map.get(sort_name) + && let Some(target_id) = target_sig.lookup_sort(replacement) { + return Some(DerivedSort::Base(target_id)); + } + + // Check if it's an instance param sort (already qualified, e.g., N/P) + if sort_name.contains('/') + && let Some(target_id) = target_sig.lookup_sort(sort_name) { + return Some(DerivedSort::Base(target_id)); + } + + // Check if it's a local sort (needs prefix, e.g., Token -> initial/Token) + let prefixed = format!("{}/{}", field_prefix, sort_name); + if let Some(target_id) = target_sig.lookup_sort(&prefixed) { + return Some(DerivedSort::Base(target_id)); + } + + None + } + DerivedSort::Product(fields) => { + let remapped: Option> = fields + .iter() + .map(|(n, s)| { + remap_for_instance_field(s, source_sig, target_sig, sort_param_map, field_prefix) + .map(|r| (n.clone(), r)) + }) + .collect(); + remapped.map(DerivedSort::Product) + } + } +} + +/// Collect sort parameter mappings from a type expression. +/// E.g., `RP/initial/Token RP/target/Token Iso` returns {"X" -> "RP/initial/Token", "Y" -> "RP/target/Token"} +fn collect_sort_params( + ty: &ast::TypeExpr, + field_theory: &std::rc::Rc, +) -> HashMap { + let mut args = Vec::new(); + collect_type_args(ty, &mut args); + + // Match args with sort parameters in order + let mut map = HashMap::new(); + for (param, arg) in field_theory.params.iter().zip(args.iter()) { + if param.theory_name == "Sort" { + map.insert(param.name.clone(), arg.clone()); + } + } + map +} + +/// Recursively collect type arguments from an App chain. +/// For `A B C Foo`, this returns ["A", "B", "C"] (Foo is the theory name). +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// All path tokens except the last one are type arguments. +pub fn collect_type_args(ty: &ast::TypeExpr, args: &mut Vec) { + use crate::ast::TypeToken; + + // Collect all path tokens + let paths: Vec = ty + .tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + // All but the last one are type arguments + if paths.len() > 1 { + args.extend(paths[..paths.len() - 1].iter().cloned()); + } +} + +/// Substitute sort parameters in a DerivedSort using a mapping. +/// Returns None if the sort cannot be resolved in the target signature. +#[allow(dead_code)] +fn substitute_sort_params( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_map: &HashMap, +) -> Option { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + // Check if this sort is a parameter that should be substituted + if let Some(replacement) = param_map.get(sort_name) { + // Look up the replacement sort in the target signature + if let Some(target_id) = target_sig.lookup_sort(replacement) { + return Some(DerivedSort::Base(target_id)); + } + // Couldn't find the replacement - this is an error case + eprintln!( + "Warning: sort param substitution failed for {} -> {}", + sort_name, replacement + ); + return None; + } + // Not a parameter - try to find in target as-is + target_sig.lookup_sort(sort_name).map(DerivedSort::Base) + } + DerivedSort::Product(fields) => { + let remapped_fields: Option> = fields + .iter() + .map(|(name, s)| { + substitute_sort_params(s, source_sig, target_sig, param_map) + .map(|remapped| (name.clone(), remapped)) + }) + .collect(); + remapped_fields.map(DerivedSort::Product) + } + } +} + +/// Remap a DerivedSort with instance parameter substitution. +/// For sorts like "N/P" where N is being substituted for an outer param, +/// we look up the substituted name instead of prefixing. +fn remap_derived_sort_with_subst( + sort: &DerivedSort, + source_sig: &Signature, + target_sig: &Signature, + param_name: &str, + param_subst: &HashMap, +) -> DerivedSort { + match sort { + DerivedSort::Base(source_id) => { + let sort_name = &source_sig.sorts[*source_id]; + + // Check if this sort starts with a param name that we're substituting + if let Some((prefix, suffix)) = sort_name.split_once('/') + && let Some(subst) = param_subst.get(prefix) { + // Substitute the prefix + let substituted_name = format!("{}/{}", subst, suffix); + if let Some(target_id) = target_sig.lookup_sort(&substituted_name) { + return DerivedSort::Base(target_id); + } + } + + // Otherwise, use the default prefixing behavior + let qualified_name = format!("{}/{}", param_name, sort_name); + if let Some(target_id) = target_sig.lookup_sort(&qualified_name) { + DerivedSort::Base(target_id) + } else if let Some(target_id) = target_sig.lookup_sort(sort_name) { + // Fallback: try without prefix (for sorts that weren't duplicated) + DerivedSort::Base(target_id) + } else { + panic!( + "remap_derived_sort_with_subst: could not find sort {} or {}", + qualified_name, sort_name + ); + } + } + DerivedSort::Product(fields) => { + let remapped_fields: Vec<_> = fields + .iter() + .map(|(name, s)| { + ( + name.clone(), + remap_derived_sort_with_subst( + s, + source_sig, + target_sig, + param_name, + param_subst, + ), + ) + }) + .collect(); + DerivedSort::Product(remapped_fields) + } + } +} + +/// Extract the base theory name from a type expression. +/// +/// With concatenative parsing, tokens are in order: [arg1, arg2, ..., theory_name] +/// The last path token is the theory name. +fn extract_theory_name(ty: &ast::TypeExpr) -> ElabResult { + use crate::ast::TypeToken; + + // Find the last path token - that's the theory name + for token in ty.tokens.iter().rev() { + if let TypeToken::Path(path) = token { + return Ok(path.to_string()); + } + } + + Err(ElabError::TypeExprError(format!( + "cannot extract theory name from {:?}", + ty + ))) +} + +/// Collect type arguments from a theory type string like "ExampleNet ReachabilityProblem". +/// Returns the arguments (everything except the final theory name). +pub fn collect_type_args_from_theory_type(theory_type: &str) -> Vec { + let tokens: Vec<&str> = theory_type.split_whitespace().collect(); + if tokens.len() <= 1 { + vec![] + } else { + // All but the last token are arguments + tokens[..tokens.len()-1].iter().map(|s| s.to_string()).collect() + } +} + +/// Build a parameter substitution map for importing elements from a parameterized instance. +/// +/// Given a param instance with a certain theory type (e.g., "ExampleNet ReachabilityProblem"), +/// this builds a mapping from that theory's param names to the actual bindings. +/// +/// For example, if: +/// - `param_theory_type` = "ExampleNet ReachabilityProblem" +/// - ReachabilityProblem has param `(N : PetriNet instance)` +/// - The type args are ["ExampleNet"] +/// +/// Returns: {"N" -> "ExampleNet"} +pub fn build_param_subst( + param_theory: &ElaboratedTheory, + type_args: &[String], +) -> HashMap { + let mut param_subst = HashMap::new(); + for (bp, arg) in param_theory.params.iter().zip(type_args.iter()) { + if bp.theory_name != "Sort" { + // Instance param - map its name to the type arg + param_subst.insert(bp.name.clone(), arg.clone()); + } + } + param_subst +} + +/// Remap a sort name from a param instance to the local theory's sort namespace. +/// +/// This handles the case where a param instance has sorts from its own params, +/// and we need to figure out which local sorts they correspond to. +/// +/// For example, when importing from `problem0` (an `ExampleNet ReachabilityProblem`) +/// into `solution0` (an `ExampleNet problem0 Solution`): +/// - problem0 has sort "N/P" where N = ExampleNet +/// - solution0 has sort "N/P" where N = ExampleNet (from outer param) +/// - So "N/P" from problem0 maps to "N/P" in solution0 (not "RP/N/P") +/// +/// Arguments: +/// - `sort_name`: The sort name in the param instance's signature (e.g., "N/P") +/// - `param_name`: The local param name (e.g., "RP") +/// - `param_subst`: Mapping from param instance's param names to their bindings (e.g., {"N" -> "ExampleNet"}) +/// - `local_arguments`: The local instance's param bindings (e.g., [("N", "ExampleNet"), ("RP", "problem0")]) +/// +/// Returns the sort name to use in the local signature. +pub fn remap_sort_for_param_import( + sort_name: &str, + param_name: &str, + param_subst: &HashMap, + local_arguments: &[(String, String)], +) -> String { + // Check if this sort starts with a param name that we're substituting + if let Some((prefix, suffix)) = sort_name.split_once('/') + && let Some(bound_instance) = param_subst.get(prefix) { + // This sort is from a param in the param instance. + // Find which local param is bound to the same instance. + for (local_param_name, local_instance) in local_arguments { + if local_instance == bound_instance { + // Found it! Use the local param's prefix instead. + return format!("{}/{}", local_param_name, suffix); + } + } + // Fallback: the instance isn't directly a local param, + // just use param_name prefix + return format!("{}/{}", param_name, sort_name); + } + + // Unqualified sort or no substitution applicable - prefix with param_name + format!("{}/{}", param_name, sort_name) +} + +/// Format a type expression as a string (for storing instance field types) +fn format_type_expr(ty: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + let mut parts = Vec::new(); + + for token in &ty.tokens { + match token { + TypeToken::Path(path) => parts.push(path.to_string()), + TypeToken::Sort => parts.push("Sort".to_string()), + TypeToken::Prop => parts.push("Prop".to_string()), + TypeToken::Instance => parts.push("instance".to_string()), + TypeToken::Arrow => parts.push("->".to_string()), + TypeToken::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, field_ty)| format!("{}: {}", name, format_type_expr(field_ty))) + .collect(); + parts.push(format!("[{}]", field_strs.join(", "))); + } + } + } + + parts.join(" ") +} diff --git a/src/elaborate/types.rs b/src/elaborate/types.rs new file mode 100644 index 0000000..16640d7 --- /dev/null +++ b/src/elaborate/types.rs @@ -0,0 +1,265 @@ +//! Type expression evaluation (concatenative stack-based) +//! +//! Evaluates flat TypeExpr token sequences into resolved types, +//! using the symbol table to determine theory arities. + +use crate::ast::{Path, TypeExpr, TypeToken}; +use crate::core::DerivedSort; +use crate::elaborate::error::{ElabError, ElabResult}; +use crate::elaborate::Env; + +/// A value on the type evaluation stack +#[derive(Clone, Debug)] +pub enum TypeValue { + /// The Sort kind (for parameter declarations like `X : Sort`) + SortKind, + + /// The Prop kind (for relation codomains) + PropKind, + + /// A resolved base sort (index into signature) + Sort(DerivedSort), + + /// An unresolved path (instance ref, sort path, or theory name) + /// Will be resolved based on context + Path(Path), + + /// A theory applied to arguments + AppliedTheory { + theory_name: String, + args: Vec, + }, + + /// Instance type: wraps another type value + Instance(Box), + + /// Function/arrow type + Arrow { + domain: Box, + codomain: Box, + }, + + /// Record/product type + Record(Vec<(String, TypeValue)>), +} + +impl TypeValue { + /// Try to convert this type value to a DerivedSort + pub fn as_derived_sort(&self, env: &Env) -> ElabResult { + match self { + TypeValue::Sort(s) => Ok(s.clone()), + + TypeValue::Path(path) => { + // Try to resolve as a sort path + env.resolve_sort_path(path) + } + + TypeValue::Record(fields) => { + let resolved: Result, _> = fields + .iter() + .map(|(name, val)| val.as_derived_sort(env).map(|s| (name.clone(), s))) + .collect(); + Ok(DerivedSort::Product(resolved?)) + } + + TypeValue::SortKind => Err(ElabError::NotASort( + "Sort is a kind, not a type".to_string(), + )), + + TypeValue::PropKind => Err(ElabError::NotASort( + "Prop is a kind, not a type".to_string(), + )), + + TypeValue::AppliedTheory { theory_name, .. } => Err(ElabError::NotASort(format!( + "applied theory '{}' is not a sort", + theory_name + ))), + + TypeValue::Instance(_) => Err(ElabError::NotASort( + "instance type is not a sort".to_string(), + )), + + TypeValue::Arrow { .. } => Err(ElabError::NotASort( + "arrow type is not a sort".to_string(), + )), + } + } + + /// Check if this is the Sort kind + pub fn is_sort_kind(&self) -> bool { + matches!(self, TypeValue::SortKind) + } + + /// Check if this is an instance type + pub fn is_instance(&self) -> bool { + matches!(self, TypeValue::Instance(_)) + } + + /// Get the inner type if this is an instance type + pub fn instance_inner(&self) -> Option<&TypeValue> { + match self { + TypeValue::Instance(inner) => Some(inner), + _ => None, + } + } + + /// Get the theory name and args if this is an applied theory + pub fn as_applied_theory(&self) -> Option<(&str, &[TypeValue])> { + match self { + TypeValue::AppliedTheory { theory_name, args } => Some((theory_name, args)), + _ => None, + } + } +} + +/// Evaluate a type expression using the environment +/// +/// This is the core stack-based evaluator. It processes tokens left-to-right, +/// using the symbol table to determine theory arities. +pub fn eval_type_expr(expr: &TypeExpr, env: &Env) -> ElabResult { + let mut stack: Vec = Vec::new(); + + for token in &expr.tokens { + match token { + TypeToken::Sort => { + stack.push(TypeValue::SortKind); + } + + TypeToken::Prop => { + stack.push(TypeValue::PropKind); + } + + TypeToken::Path(path) => { + // Check if this is a theory name with known arity + let path_str = path.to_string(); + + if let Some(theory) = env.theories.get(&path_str) { + let arity = theory.params.len(); + if arity > 0 { + // Theory takes arguments - pop them from stack + if stack.len() < arity { + return Err(ElabError::NotEnoughArgs { + name: path_str, + expected: arity, + got: stack.len(), + }); + } + let args = stack.split_off(stack.len() - arity); + stack.push(TypeValue::AppliedTheory { + theory_name: path_str, + args, + }); + } else { + // Zero-arity theory + stack.push(TypeValue::AppliedTheory { + theory_name: path_str, + args: vec![], + }); + } + } else { + // Not a theory - could be a sort path or instance reference + // Push as unresolved path + stack.push(TypeValue::Path(path.clone())); + } + } + + TypeToken::Instance => { + let top = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'instance' with empty stack".to_string()) + })?; + stack.push(TypeValue::Instance(Box::new(top))); + } + + TypeToken::Arrow => { + // Pop codomain first (right-associative) + let codomain = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'->' missing codomain".to_string()) + })?; + let domain = stack.pop().ok_or_else(|| { + ElabError::TypeExprError("'->' missing domain".to_string()) + })?; + stack.push(TypeValue::Arrow { + domain: Box::new(domain), + codomain: Box::new(codomain), + }); + } + + TypeToken::Record(fields) => { + // Evaluate each field's type expression recursively + let mut resolved_fields = Vec::new(); + for (name, field_expr) in fields { + let field_val = eval_type_expr(field_expr, env)?; + resolved_fields.push((name.clone(), field_val)); + } + stack.push(TypeValue::Record(resolved_fields)); + } + } + } + + // Stack should have exactly one element + if stack.is_empty() { + return Err(ElabError::TypeExprError("empty type expression".to_string())); + } + if stack.len() > 1 { + return Err(ElabError::TypeExprError(format!( + "type expression left {} values on stack (expected 1)", + stack.len() + ))); + } + + Ok(stack.pop().unwrap()) +} + +/// Convenience: evaluate a type expression and convert to DerivedSort +pub fn eval_as_sort(expr: &TypeExpr, env: &Env) -> ElabResult { + let val = eval_type_expr(expr, env)?; + val.as_derived_sort(env) +} + +/// Extract the theory name from a type expression (for simple cases) +/// +/// This is used when we just need the theory name without full evaluation. +/// Returns None if the expression is more complex than a simple path or applied theory. +pub fn extract_theory_name(expr: &TypeExpr) -> Option { + // Look for the last path token that isn't followed by Instance + let mut last_theory_candidate: Option<&Path> = None; + + for token in &expr.tokens { + match token { + TypeToken::Path(p) => { + last_theory_candidate = Some(p); + } + TypeToken::Instance => { + // The previous path was the theory name + if let Some(p) = last_theory_candidate { + return Some(p.to_string()); + } + } + _ => {} + } + } + + // If no Instance token, the last path is the theory name + last_theory_candidate.map(|p| p.to_string()) +} + +/// Check if a type expression represents the Sort kind +pub fn is_sort_kind(expr: &TypeExpr) -> bool { + expr.tokens.len() == 1 && matches!(expr.tokens[0], TypeToken::Sort) +} + +/// Check if a type expression ends with `instance` +pub fn is_instance_type(expr: &TypeExpr) -> bool { + expr.tokens.last() == Some(&TypeToken::Instance) +} + +/// Get all path tokens from a type expression (useful for parameter extraction) +pub fn get_paths(expr: &TypeExpr) -> Vec<&Path> { + expr.tokens + .iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p), + _ => None, + }) + .collect() +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..fa02ecd --- /dev/null +++ b/src/error.rs @@ -0,0 +1,211 @@ +//! Error formatting for Geolog +//! +//! Provides user-friendly error messages using ariadne for nice formatting. + +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::prelude::Simple; +use std::ops::Range; + +use crate::lexer::Token; + +/// Format lexer errors into a user-friendly string +pub fn format_lexer_errors(source: &str, errors: Vec>) -> String { + let mut output = Vec::new(); + + for error in errors { + let span = error.span(); + let report = Report::build(ReportKind::Error, (), span.start) + .with_message("Lexical error") + .with_label( + Label::new(span.clone()) + .with_message(format_lexer_error(&error)) + .with_color(Color::Red), + ); + + report + .finish() + .write(Source::from(source), &mut output) + .expect("Failed to write error report"); + } + + String::from_utf8(output).unwrap_or_else(|_| "Error formatting failed".to_string()) +} + +/// Format a single lexer error into a readable message +fn format_lexer_error(error: &Simple) -> String { + let found = error + .found() + .map(|c| format!("'{}'", c)) + .unwrap_or_else(|| "end of input".to_string()); + + if let Some(_expected) = error.expected().next() { + format!( + "Unexpected {}, expected {}", + found, + format_char_set(error.expected()) + ) + } else { + format!("Unexpected character {}", found) + } +} + +/// Format parser errors into a user-friendly string +pub fn format_parser_errors( + source: &str, + errors: Vec>, + token_spans: &[(Token, Range)], +) -> String { + let mut output = Vec::new(); + + for error in errors { + let span = error.span(); + + // Map token span to character span + // The span could be either: + // 1. A token index (0, 1, 2, ..., n-1 for n tokens) - look up in token_spans + // 2. Already a character position (from custom errors that captured spans) + // + // Best heuristic: check if the span matches a token's character range. + // If so, it's a character position. Otherwise, treat as token index. + let is_char_position = token_spans + .iter() + .any(|(_, char_range)| char_range.start == span.start && char_range.end == span.end); + + let char_span = if is_char_position { + // Span exactly matches a token's character range - use as-is + span.clone() + } else if span.start < token_spans.len() { + // Span.start is a valid token index - use token's character range + token_spans[span.start].1.clone() + } else if span.start == token_spans.len() { + // End of input marker - use the end of the last token + if let Some((_, last_range)) = token_spans.last() { + last_range.end..last_range.end + } else { + 0..0 + } + } else { + // Fallback: treat as character position + let start = span.start.min(source.len()); + let end = span.end.min(source.len()); + start..end + }; + + let report = Report::build(ReportKind::Error, (), char_span.start) + .with_message("Parse error") + .with_label( + Label::new(char_span.clone()) + .with_message(format_parser_error(&error)) + .with_color(Color::Red), + ); + + report + .finish() + .write(Source::from(source), &mut output) + .expect("Failed to write error report"); + } + + String::from_utf8(output).unwrap_or_else(|_| "Error formatting failed".to_string()) +} + +/// Format a single parser error into a readable message +fn format_parser_error(error: &Simple) -> String { + use chumsky::error::SimpleReason; + + let found = error + .found() + .map(|t| format!("'{}'", format_token(t))) + .unwrap_or_else(|| "end of input".to_string()); + + // Check for custom error messages first (from Simple::custom()) + if let SimpleReason::Custom(msg) = error.reason() { + return msg.clone(); + } + + let expected = format_token_set(error.expected()); + + if !expected.is_empty() { + // Check for common patterns and provide helpful messages + let expected_str = expected.join(", "); + + // Detect common mistakes + if expected.contains(&"';'".to_string()) && error.found() == Some(&Token::Colon) { + return format!( + "Expected semicolon ';' to end declaration, found '{}'", + format_token(error.found().unwrap()) + ); + } + + if expected.contains(&"':'".to_string()) && error.found() == Some(&Token::Semicolon) { + return format!( + "Expected colon ':' before type, found '{}'", + format_token(error.found().unwrap()) + ); + } + + format!("Unexpected {}, expected one of: {}", found, expected_str) + } else if let Some(label) = error.label() { + label.to_string() + } else { + format!("Unexpected token {}", found) + } +} + +/// Format a token for display +fn format_token(token: &Token) -> String { + match token { + Token::Namespace => "namespace".to_string(), + Token::Theory => "theory".to_string(), + Token::Instance => "instance".to_string(), + Token::Query => "query".to_string(), + Token::Sort => "Sort".to_string(), + Token::Prop => "Prop".to_string(), + Token::Forall => "forall".to_string(), + Token::Exists => "exists".to_string(), + Token::True => "true".to_string(), + Token::False => "false".to_string(), + Token::Ident(s) => s.clone(), + Token::LBrace => "{".to_string(), + Token::RBrace => "}".to_string(), + Token::LParen => "(".to_string(), + Token::RParen => ")".to_string(), + Token::LBracket => "[".to_string(), + Token::RBracket => "]".to_string(), + Token::Colon => ":".to_string(), + Token::Semicolon => ";".to_string(), + Token::Comma => ",".to_string(), + Token::Dot => ".".to_string(), + Token::Slash => "/".to_string(), + Token::Arrow => "->".to_string(), + Token::Eq => "=".to_string(), + Token::Turnstile => "|-".to_string(), + Token::And => r"/\".to_string(), + Token::Or => r"\/".to_string(), + Token::Question => "?".to_string(), + Token::Chase => "chase".to_string(), + } +} + +/// Format a set of expected tokens +fn format_token_set<'a>(expected: impl Iterator>) -> Vec { + expected + .filter_map(|opt| opt.as_ref()) + .map(|t| format!("'{}'", format_token(t))) + .collect() +} + +/// Format a set of expected characters +fn format_char_set<'a>(expected: impl Iterator>) -> String { + let chars: Vec = expected + .filter_map(|opt| opt.as_ref()) + .map(|c| format!("'{}'", c)) + .collect(); + + if chars.is_empty() { + "valid character".to_string() + } else if chars.len() == 1 { + chars[0].clone() + } else { + chars.join(" or ") + } +} diff --git a/src/id.rs b/src/id.rs new file mode 100644 index 0000000..253be8b --- /dev/null +++ b/src/id.rs @@ -0,0 +1,114 @@ +//! ID types for geolog, following chit's multi-level ID design +//! +//! The key insight is that different operations benefit from different ID granularities: +//! - UUIDs for global identity (persistence, version control, cross-structure references) +//! - Luids for installation-wide identity (stable across structures, persisted) +//! - Slids for structure-local computation (cache-friendly, compact) +//! +//! We use egglog's `define_id!` macro to create newtype wrappers around usize, +//! giving us type safety (can't mix up Slid with Luid) and nice Debug output. + +// Re-export NumericId trait and IdVec for typed indexing +pub use egglog_numeric_id::{define_id, IdVec, NumericId}; +pub use nonminmax::NonMaxUsize; +pub use uuid::Uuid; + +// We define our own macro that wraps egglog's define_id! and adds rkyv derives +macro_rules! define_id_with_rkyv { + ($v:vis $name:ident, $repr:ty, $doc:tt) => { + #[doc = $doc] + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + #[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] + #[archive(check_bytes)] + #[repr(transparent)] + $v struct $name { + /// The underlying representation (public for zero-copy archived access) + pub rep: $repr, + } + + impl NumericId for $name { + type Rep = $repr; + type Atomic = std::sync::atomic::AtomicUsize; + + fn new(val: $repr) -> Self { + Self { rep: val } + } + + fn from_usize(index: usize) -> Self { + Self { rep: index as $repr } + } + + fn index(self) -> usize { + self.rep as usize + } + + fn rep(self) -> $repr { + self.rep + } + } + + impl std::fmt::Debug for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}({})", stringify!($name), self.rep) + } + } + + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.rep) + } + } + }; +} + +define_id_with_rkyv!( + pub Luid, + usize, + "Locally Universal ID: index into the global universe of UUIDs. Stable across installation, persisted." +); + +define_id_with_rkyv!( + pub Slid, + usize, + "Structure-Local ID: index within a structure's element universe. Primary working ID." +); + +define_id_with_rkyv!( + pub SortSlid, + usize, + "Sort-Local ID: index within a particular sort's carrier. Computed on-demand." +); + +/// A Slid that can be stored in Option without doubling size. +/// Uses `NonMaxUsize` so that `Option` is the same size as `usize`, +/// with `usize::MAX` serving as the niche for `None`. +pub type OptSlid = Option; + +/// Convert a Slid to OptSlid. +/// Returns None if slid == usize::MAX (which would be an astronomically large structure). +#[inline] +pub fn some_slid(slid: Slid) -> OptSlid { + NonMaxUsize::new(slid.index()) +} + +/// Extract a Slid from OptSlid. +#[inline] +pub fn get_slid(opt: OptSlid) -> Option { + opt.map(|n| Slid::from_usize(n.get())) +} + +/// A Luid that can be stored in Option without doubling size. +/// Analogous to OptSlid but for cross-instance references. +pub type OptLuid = Option; + +/// Convert a Luid to OptLuid. +#[inline] +pub fn some_luid(luid: Luid) -> OptLuid { + NonMaxUsize::new(luid.index()) +} + +/// Extract a Luid from OptLuid. +#[inline] +pub fn get_luid(opt: OptLuid) -> Option { + opt.map(|n| Luid::from_usize(n.get())) +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..117b1ff --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,143 @@ +//! Lexer for Geolog +//! +//! Tokenizes source into a stream for the parser. + +use chumsky::prelude::*; +use std::ops::Range; + +/// Token types for Geolog +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Token { + // Keywords + Namespace, + Theory, + Instance, + Query, + Sort, + Prop, + Forall, + Exists, + True, + False, + Chase, + + // Identifiers + Ident(String), + + // Punctuation + LBrace, // { + RBrace, // } + LParen, // ( + RParen, // ) + LBracket, // [ + RBracket, // ] + Colon, // : + Semicolon, // ; + Comma, // , + Dot, // . + Slash, // / + Arrow, // -> + Eq, // = + Turnstile, // |- + And, // /\ + Or, // \/ + Question, // ? +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::Namespace => write!(f, "namespace"), + Token::Theory => write!(f, "theory"), + Token::Instance => write!(f, "instance"), + Token::Query => write!(f, "query"), + Token::Sort => write!(f, "Sort"), + Token::Prop => write!(f, "Prop"), + Token::Forall => write!(f, "forall"), + Token::Exists => write!(f, "exists"), + Token::True => write!(f, "true"), + Token::False => write!(f, "false"), + Token::Chase => write!(f, "chase"), + Token::Ident(s) => write!(f, "{}", s), + Token::LBrace => write!(f, "{{"), + Token::RBrace => write!(f, "}}"), + Token::LParen => write!(f, "("), + Token::RParen => write!(f, ")"), + Token::LBracket => write!(f, "["), + Token::RBracket => write!(f, "]"), + Token::Colon => write!(f, ":"), + Token::Semicolon => write!(f, ";"), + Token::Comma => write!(f, ","), + Token::Dot => write!(f, "."), + Token::Slash => write!(f, "/"), + Token::Arrow => write!(f, "->"), + Token::Eq => write!(f, "="), + Token::Turnstile => write!(f, "|-"), + Token::And => write!(f, r"/\"), + Token::Or => write!(f, r"\/"), + Token::Question => write!(f, "?"), + } + } +} + +/// Type alias for spans +pub type Span = Range; + +/// Create a lexer for Geolog +pub fn lexer() -> impl Parser, Error = Simple> { + let keyword_or_ident = text::ident().map(|s: String| match s.as_str() { + "namespace" => Token::Namespace, + "theory" => Token::Theory, + "instance" => Token::Instance, + "query" => Token::Query, + "Sort" => Token::Sort, + "Prop" => Token::Prop, + "forall" => Token::Forall, + "exists" => Token::Exists, + "true" => Token::True, + "false" => Token::False, + "chase" => Token::Chase, + _ => Token::Ident(s), + }); + + let punctuation = choice(( + just("->").to(Token::Arrow), + just("|-").to(Token::Turnstile), + just(r"/\").to(Token::And), + just(r"\/").to(Token::Or), + just('{').to(Token::LBrace), + just('}').to(Token::RBrace), + just('(').to(Token::LParen), + just(')').to(Token::RParen), + just('[').to(Token::LBracket), + just(']').to(Token::RBracket), + just(':').to(Token::Colon), + just(';').to(Token::Semicolon), + just(',').to(Token::Comma), + just('.').to(Token::Dot), + just('/').to(Token::Slash), + just('=').to(Token::Eq), + just('?').to(Token::Question), + )); + + // Comments: // to end of line (handles both mid-file and end-of-file) + // IMPORTANT: Must check for // BEFORE single / to avoid tokenizing as two Slash tokens + let line_comment = just("//") + .then(none_of('\n').repeated()) + .then(just('\n').or_not()) // Either newline or EOF + .ignored(); + + // Token OR comment - comments produce None, tokens produce Some + let token_or_skip = line_comment + .to(None) + .or(keyword_or_ident.or(punctuation).map(Some)); + + token_or_skip + .map_with_span(|opt_tok, span| opt_tok.map(|tok| (tok, span))) + .padded() + .repeated() + .then_ignore(end()) + .map(|items| items.into_iter().flatten().collect()) +} + +// Unit tests moved to tests/unit_parsing.rs diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e5f8c89 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,51 @@ +//! Geolog: A language for geometric logic +//! +//! Geolog is a type theory with semantics in topoi and geometric morphisms, +//! designed as a unified language for database schemas, queries, and migrations. + +pub mod ast; +pub mod cc; +pub mod core; +pub mod elaborate; +pub mod error; +pub mod id; +pub mod lexer; +pub mod meta; +pub mod naming; +pub mod overlay; +pub mod parser; +pub mod patch; +pub mod pretty; +pub mod query; +pub mod repl; +pub mod serialize; +pub mod solver; +pub mod store; +pub mod tensor; +pub mod universe; +pub mod version; +pub mod zerocopy; + +pub use ast::*; +pub use lexer::lexer; +pub use parser::parser; +pub use pretty::pretty_print; + +/// Parse a Geolog source string into an AST +pub fn parse(input: &str) -> Result { + use chumsky::prelude::*; + + let tokens = lexer::lexer() + .parse(input) + .map_err(|errs| error::format_lexer_errors(input, errs))?; + + let token_stream: Vec<_> = tokens.iter().map(|(t, s)| (t.clone(), s.clone())).collect(); + let len = input.len(); + + parser::parser() + .parse(chumsky::Stream::from_iter( + len..len + 1, + token_stream.into_iter(), + )) + .map_err(|errs| error::format_parser_errors(input, errs, &tokens)) +} diff --git a/src/meta.rs b/src/meta.rs new file mode 100644 index 0000000..dac4499 --- /dev/null +++ b/src/meta.rs @@ -0,0 +1,1106 @@ +//! Conversion between ElaboratedTheory and GeologMeta instances +//! +//! This module provides homoiconic serialization: geolog theories can be +//! represented as instances of the GeologMeta theory, enabling persistence +//! and meta-programming. +//! +//! Note: Human-readable names are stored separately in a NamingIndex (keyed by UUID), +//! not in the Structure itself. This module populates both the Structure and NamingIndex. + +use std::collections::HashMap; +use std::sync::{Arc, OnceLock}; + +use crate::core::{ + Context, DerivedSort, ElaboratedTheory, Formula, FuncId, RelId, Sequent, Signature, SortId, + Structure, Term, TheoryParam, +}; +use crate::elaborate::{Env, elaborate_theory}; +use crate::id::{NumericId, Slid}; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +/// GeologMeta source, embedded at compile time +const GEOLOG_META_SOURCE: &str = include_str!("../theories/GeologMeta.geolog"); + +/// Cached elaborated GeologMeta theory +static GEOLOG_META: OnceLock> = OnceLock::new(); + +/// Get the elaborated GeologMeta theory, parsing and elaborating on first access +pub fn geolog_meta() -> Arc { + GEOLOG_META + .get_or_init(|| { + let file = crate::parse(GEOLOG_META_SOURCE).expect("GeologMeta.geolog should parse"); + + let mut env = Env::new(); + for decl in &file.declarations { + if let crate::ast::Declaration::Theory(t) = &decl.node { + let elab = elaborate_theory(&mut env, t).expect("GeologMeta should elaborate"); + return Arc::new(elab); + } + } + panic!("GeologMeta.geolog should contain a theory declaration"); + }) + .clone() +} + +/// A builder for constructing GeologMeta instances +/// +/// This manages the mapping from theory components to element IDs (Slids) +/// in the target structure. +pub struct MetaBuilder { + /// The theory element (there's exactly one per ElaboratedTheory) + pub theory_slid: u32, + + /// Maps SortId -> Srt element slid + pub sort_map: HashMap, + + /// Maps function name -> Func element slid + pub func_map: HashMap, + + /// Maps relation name -> Rel element slid + pub rel_map: HashMap, + + /// Maps field name -> Field element slid (for RecEntry/field and ProjT/field) + /// Note: This is a simplification; properly would need to track by product type + pub field_map: HashMap, + + /// Counter for generating fresh element IDs + next_slid: u32, + + /// Accumulated elements by sort (sort_name -> [(elem_name, slid)]) + /// Names are stored here for NamingIndex population, not in Structure + elements: HashMap>, + + /// Accumulated function values (func_name -> [(domain_slid, codomain_slid)]) + functions: HashMap>, +} + +impl Default for MetaBuilder { + fn default() -> Self { + Self::new() + } +} + +impl MetaBuilder { + pub fn new() -> Self { + Self { + theory_slid: 0, + sort_map: HashMap::new(), + func_map: HashMap::new(), + rel_map: HashMap::new(), + field_map: HashMap::new(), + next_slid: 0, + elements: HashMap::new(), + functions: HashMap::new(), + } + } + + /// Allocate a fresh element in a given sort + fn alloc(&mut self, sort: &str, name: String) -> u32 { + let slid = self.next_slid; + self.next_slid += 1; + self.elements + .entry(sort.to_string()) + .or_default() + .push((name, slid)); + slid + } + + /// Record a function value: domain_slid maps to codomain_slid via func_name + fn set_func(&mut self, func_name: &str, domain_slid: u32, codomain_slid: u32) { + self.functions + .entry(func_name.to_string()) + .or_default() + .push((domain_slid, codomain_slid)); + } +} + +/// Convert an ElaboratedTheory to a GeologMeta instance description +/// +/// Returns a MetaBuilder containing all the elements and function values +/// needed to construct the Structure. +pub fn theory_to_meta(theory: &ElaboratedTheory, _universe: &mut Universe) -> MetaBuilder { + let mut builder = MetaBuilder::new(); + + // Create the Theory element + let theory_name = &theory.theory.name; + builder.theory_slid = builder.alloc("Theory", theory_name.clone()); + + // Convert signature + convert_signature(&mut builder, &theory.theory.signature); + + // Convert params + for param in &theory.params { + convert_param(&mut builder, param); + } + + // Convert axioms (using axiom_names if available, otherwise fallback to ax_N) + for (i, axiom) in theory.theory.axioms.iter().enumerate() { + let axiom_name = theory + .theory + .axiom_names + .get(i) + .cloned() + .unwrap_or_else(|| format!("ax_{}", i)); + convert_sequent(&mut builder, axiom, &axiom_name); + } + + builder +} + +fn convert_signature(builder: &mut MetaBuilder, sig: &Signature) { + // Convert sorts + // Note: Human-readable names are stored in NamingIndex, not here + for (sort_id, sort_name) in sig.sorts.iter().enumerate() { + let srt_slid = builder.alloc("Srt", sort_name.clone()); + builder.sort_map.insert(sort_id, srt_slid); + + // Srt/theory points to the theory + builder.set_func("Srt/theory", srt_slid, builder.theory_slid); + } + + // Convert functions + for func in &sig.functions { + let func_slid = builder.alloc("Func", func.name.clone()); + builder.func_map.insert(func.name.clone(), func_slid); + + // Func/theory + builder.set_func("Func/theory", func_slid, builder.theory_slid); + + // Func/dom and Func/cod + let dom_slid = convert_dsort(builder, &func.domain); + let cod_slid = convert_dsort(builder, &func.codomain); + builder.set_func("Func/dom", func_slid, dom_slid); + builder.set_func("Func/cod", func_slid, cod_slid); + } + + // Convert relations + for rel in &sig.relations { + let rel_slid = builder.alloc("Rel", rel.name.clone()); + builder.rel_map.insert(rel.name.clone(), rel_slid); + + // Rel/theory + builder.set_func("Rel/theory", rel_slid, builder.theory_slid); + + // Rel/dom + let dom_slid = convert_dsort(builder, &rel.domain); + builder.set_func("Rel/dom", rel_slid, dom_slid); + } +} + +fn convert_dsort(builder: &mut MetaBuilder, dsort: &DerivedSort) -> u32 { + match dsort { + DerivedSort::Base(sort_id) => { + // Create a BaseDS element + let base_slid = builder.alloc("BaseDS", format!("base_{}", sort_id)); + + // BaseDS/dsort - embed into DSort + let dsort_slid = builder.alloc("DSort", format!("dsort_base_{}", sort_id)); + builder.set_func("BaseDS/dsort", base_slid, dsort_slid); + + // BaseDS/srt - point to the Srt element + if let Some(&srt_slid) = builder.sort_map.get(sort_id) { + builder.set_func("BaseDS/srt", base_slid, srt_slid); + } + + dsort_slid + } + DerivedSort::Product(fields) => { + // Create a ProdDS element + let prod_slid = builder.alloc("ProdDS", "prod".to_string()); + + // ProdDS/dsort - embed into DSort + let dsort_slid = builder.alloc("DSort", "dsort_prod".to_string()); + builder.set_func("ProdDS/dsort", prod_slid, dsort_slid); + + // Create Field elements for each field + for (field_name, field_type) in fields { + let field_slid = builder.alloc("Field", field_name.clone()); + + // Track field for later use in RecEntry/field and ProjT/field + builder.field_map.insert(field_name.clone(), field_slid); + + // Field/prod + builder.set_func("Field/prod", field_slid, prod_slid); + + // Field/type (recursive) + let type_slid = convert_dsort(builder, field_type); + builder.set_func("Field/type", field_slid, type_slid); + } + + dsort_slid + } + } +} + +fn convert_param(builder: &mut MetaBuilder, param: &TheoryParam) { + let param_slid = builder.alloc("Param", param.name.clone()); + + // Param/theory (which theory has this param) + builder.set_func("Param/theory", param_slid, builder.theory_slid); + + // Param/type - we'd need to look up the theory by name + // For now, create a placeholder Theory element + let type_theory_slid = builder.alloc("Theory", param.theory_name.clone()); + builder.set_func("Param/type", param_slid, type_theory_slid); +} + +fn convert_sequent(builder: &mut MetaBuilder, sequent: &Sequent, name: &str) -> u32 { + let seq_slid = builder.alloc("Sequent", name.to_string()); + + // Sequent/theory + builder.set_func("Sequent/theory", seq_slid, builder.theory_slid); + + // Create binders for context variables + let mut binder_map = HashMap::new(); + for (var_name, var_type) in &sequent.context.vars { + let binder_slid = builder.alloc("Binder", var_name.clone()); + + // Binder/type + let type_slid = convert_dsort(builder, var_type); + builder.set_func("Binder/type", binder_slid, type_slid); + + // CtxVar linking binder to sequent + let ctxvar_slid = builder.alloc("CtxVar", format!("cv_{}", var_name)); + builder.set_func("CtxVar/sequent", ctxvar_slid, seq_slid); + builder.set_func("CtxVar/binder", ctxvar_slid, binder_slid); + + binder_map.insert(var_name.clone(), binder_slid); + } + + // Sequent/premise + let premise_slid = convert_formula(builder, &sequent.premise, &binder_map); + builder.set_func("Sequent/premise", seq_slid, premise_slid); + + // Sequent/conclusion + let conclusion_slid = convert_formula(builder, &sequent.conclusion, &binder_map); + builder.set_func("Sequent/conclusion", seq_slid, conclusion_slid); + + seq_slid +} + +fn convert_formula( + builder: &mut MetaBuilder, + formula: &Formula, + binder_map: &HashMap, +) -> u32 { + match formula { + Formula::True => { + let truef_slid = builder.alloc("TrueF", "true".to_string()); + let formula_slid = builder.alloc("Formula", "true".to_string()); + builder.set_func("TrueF/formula", truef_slid, formula_slid); + formula_slid + } + Formula::False => { + let falsef_slid = builder.alloc("FalseF", "false".to_string()); + let formula_slid = builder.alloc("Formula", "false".to_string()); + builder.set_func("FalseF/formula", falsef_slid, formula_slid); + formula_slid + } + Formula::Eq(lhs, rhs) => { + let eqf_slid = builder.alloc("EqF", "eq".to_string()); + let formula_slid = builder.alloc("Formula", "eq".to_string()); + builder.set_func("EqF/formula", eqf_slid, formula_slid); + + let lhs_slid = convert_term(builder, lhs, binder_map); + let rhs_slid = convert_term(builder, rhs, binder_map); + builder.set_func("EqF/lhs", eqf_slid, lhs_slid); + builder.set_func("EqF/rhs", eqf_slid, rhs_slid); + + formula_slid + } + Formula::Conj(conjuncts) => { + let conjf_slid = builder.alloc("ConjF", "conj".to_string()); + let formula_slid = builder.alloc("Formula", "conj".to_string()); + builder.set_func("ConjF/formula", conjf_slid, formula_slid); + + for (i, conjunct) in conjuncts.iter().enumerate() { + let arm_slid = builder.alloc("ConjArm", format!("arm_{}", i)); + builder.set_func("ConjArm/conj", arm_slid, conjf_slid); + + let child_slid = convert_formula(builder, conjunct, binder_map); + builder.set_func("ConjArm/child", arm_slid, child_slid); + } + + formula_slid + } + Formula::Disj(disjuncts) => { + let disjf_slid = builder.alloc("DisjF", "disj".to_string()); + let formula_slid = builder.alloc("Formula", "disj".to_string()); + builder.set_func("DisjF/formula", disjf_slid, formula_slid); + + for (i, disjunct) in disjuncts.iter().enumerate() { + let arm_slid = builder.alloc("DisjArm", format!("arm_{}", i)); + builder.set_func("DisjArm/disj", arm_slid, disjf_slid); + + let child_slid = convert_formula(builder, disjunct, binder_map); + builder.set_func("DisjArm/child", arm_slid, child_slid); + } + + formula_slid + } + Formula::Exists(var_name, var_type, body) => { + let existsf_slid = builder.alloc("ExistsF", format!("exists_{}", var_name)); + let formula_slid = builder.alloc("Formula", format!("exists_{}", var_name)); + builder.set_func("ExistsF/formula", existsf_slid, formula_slid); + + // Create a new binder for this existential + let binder_slid = builder.alloc("Binder", var_name.clone()); + + let type_slid = convert_dsort(builder, var_type); + builder.set_func("Binder/type", binder_slid, type_slid); + + builder.set_func("ExistsF/binder", existsf_slid, binder_slid); + + // Extend binder map for body + let mut extended_map = binder_map.clone(); + extended_map.insert(var_name.clone(), binder_slid); + + let body_slid = convert_formula(builder, body, &extended_map); + builder.set_func("ExistsF/body", existsf_slid, body_slid); + + formula_slid + } + Formula::Rel(rel_id, arg) => { + let relf_slid = builder.alloc("RelF", format!("rel_{}", rel_id)); + let formula_slid = builder.alloc("Formula", format!("rel_{}", rel_id)); + builder.set_func("RelF/formula", relf_slid, formula_slid); + + // RelF/rel - need to look up the Rel element by ID + // For now, just use the ID directly (we'd need the rel_map to be indexed by ID) + // This is a simplification - in practice we'd track rel_id -> slid + + let arg_slid = convert_term(builder, arg, binder_map); + builder.set_func("RelF/arg", relf_slid, arg_slid); + + formula_slid + } + } +} + +fn convert_term(builder: &mut MetaBuilder, term: &Term, binder_map: &HashMap) -> u32 { + match term { + Term::Var(name, _sort) => { + let vart_slid = builder.alloc("VarT", name.clone()); + let term_slid = builder.alloc("Term", name.clone()); + builder.set_func("VarT/term", vart_slid, term_slid); + + // VarT/binder - look up in binder map + if let Some(&binder_slid) = binder_map.get(name) { + builder.set_func("VarT/binder", vart_slid, binder_slid); + } + + term_slid + } + Term::App(func_id, arg) => { + let appt_slid = builder.alloc("AppT", format!("app_{}", func_id)); + let term_slid = builder.alloc("Term", format!("app_{}", func_id)); + builder.set_func("AppT/term", appt_slid, term_slid); + + // AppT/func - need to look up Func element by ID + // Similar simplification as with relations + + let arg_slid = convert_term(builder, arg, binder_map); + builder.set_func("AppT/arg", appt_slid, arg_slid); + + term_slid + } + Term::Record(fields) => { + let recordt_slid = builder.alloc("RecordT", "record".to_string()); + let term_slid = builder.alloc("Term", "record".to_string()); + builder.set_func("RecordT/term", recordt_slid, term_slid); + + for (field_name, field_val) in fields { + let entry_slid = builder.alloc("RecEntry", field_name.clone()); + builder.set_func("RecEntry/record", entry_slid, recordt_slid); + + // RecEntry/field points to the Field element (if known) + if let Some(&field_slid) = builder.field_map.get(field_name) { + builder.set_func("RecEntry/field", entry_slid, field_slid); + } + + let val_slid = convert_term(builder, field_val, binder_map); + builder.set_func("RecEntry/val", entry_slid, val_slid); + } + + term_slid + } + Term::Project(base, field) => { + let projt_slid = builder.alloc("ProjT", format!("proj_{}", field)); + let term_slid = builder.alloc("Term", format!("proj_{}", field)); + builder.set_func("ProjT/term", projt_slid, term_slid); + + let base_slid = convert_term(builder, base, binder_map); + builder.set_func("ProjT/base", projt_slid, base_slid); + + // ProjT/field points to the Field element (if known) + if let Some(&field_slid) = builder.field_map.get(field) { + builder.set_func("ProjT/field", projt_slid, field_slid); + } + + term_slid + } + } +} + +/// Convert a MetaBuilder into an actual Structure (GeologMeta instance) +/// +/// This is the final step in theory serialization: +/// ElaboratedTheory → MetaBuilder → Structure +/// +/// Also populates the NamingIndex with human-readable names for all elements. +pub fn builder_to_structure( + builder: &MetaBuilder, + universe: &mut Universe, + naming: &mut NamingIndex, + theory_name: &str, +) -> Structure { + let meta_theory = geolog_meta(); + let sig = &meta_theory.theory.signature; + + let num_sorts = sig.sorts.len(); + let mut structure = Structure::new(num_sorts); + + // Map MetaBuilder internal slids → Structure Slids + let mut slid_map: HashMap = HashMap::new(); + + // Phase 1: Add all elements + // Iterate through MetaBuilder's elements by sort, adding them to Structure + for (sort_name, elems) in &builder.elements { + let sort_id = sig + .lookup_sort(sort_name) + .unwrap_or_else(|| panic!("Sort '{}' not found in GeologMeta", sort_name)); + + for (elem_name, internal_slid) in elems { + let (struct_slid, luid) = structure.add_element(universe, sort_id); + slid_map.insert(*internal_slid, struct_slid); + + // Register name in NamingIndex (qualified by theory name) + let uuid = universe + .get(luid) + .expect("freshly created luid should have uuid"); + naming.insert(uuid, vec![theory_name.to_string(), elem_name.clone()]); + } + } + + // Phase 2: Initialize function storage + // Build domain sort mapping for each function + let domain_sort_ids: Vec> = sig + .functions + .iter() + .map(|f| { + match &f.domain { + DerivedSort::Base(sort_id) => Some(*sort_id), + DerivedSort::Product(_) => None, // Product domains deferred + } + }) + .collect(); + + structure.init_functions(&domain_sort_ids); + + // Phase 3: Define function values + for (func_name, values) in &builder.functions { + let func_id = sig + .lookup_func(func_name) + .unwrap_or_else(|| panic!("Function '{}' not found in GeologMeta", func_name)); + + for (internal_dom, internal_cod) in values { + let dom_slid = slid_map + .get(internal_dom) + .unwrap_or_else(|| panic!("Domain slid {} not mapped", internal_dom)); + let cod_slid = slid_map + .get(internal_cod) + .unwrap_or_else(|| panic!("Codomain slid {} not mapped", internal_cod)); + + structure + .define_function(func_id, *dom_slid, *cod_slid) + .unwrap_or_else(|e| panic!("Function definition failed: {}", e)); + } + } + + structure +} + +/// Full conversion: ElaboratedTheory → Structure (GeologMeta instance) +/// +/// This is the main entry point for theory serialization. +/// Names are registered in the provided NamingIndex. +pub fn theory_to_structure( + theory: &ElaboratedTheory, + universe: &mut Universe, + naming: &mut NamingIndex, +) -> Structure { + let builder = theory_to_meta(theory, universe); + builder_to_structure(&builder, universe, naming, &theory.theory.name) +} + +// ============================================================================ +// REVERSE CONVERSION: Structure → ElaboratedTheory +// ============================================================================ + +/// A reader for navigating GeologMeta structures +/// +/// Provides convenient access to follow function pointers and collect elements. +/// Uses NamingIndex and Universe to look up human-readable names. +pub struct MetaReader<'a> { + structure: &'a Structure, + universe: &'a Universe, + naming: &'a NamingIndex, + /// The GeologMeta theory (Arc keeps signature alive) + meta: Arc, + // Cached function IDs for quick lookup + func_ids: HashMap<&'static str, FuncId>, + // Cached sort IDs + sort_ids: HashMap<&'static str, SortId>, +} + +impl<'a> MetaReader<'a> { + pub fn new(structure: &'a Structure, universe: &'a Universe, naming: &'a NamingIndex) -> Self { + let meta = geolog_meta(); + let sig = &meta.theory.signature; + + // Pre-cache commonly used function IDs + // Note: No */name functions - names are in NamingIndex + let func_names = [ + "Srt/theory", + "Func/theory", + "Func/dom", + "Func/cod", + "Rel/theory", + "Rel/dom", + "Param/theory", + "Param/type", + "BaseDS/dsort", + "BaseDS/srt", + "ProdDS/dsort", + "Field/prod", + "Field/type", + "Sequent/theory", + "Sequent/premise", + "Sequent/conclusion", + "CtxVar/sequent", + "CtxVar/binder", + "Binder/type", + "VarT/term", + "VarT/binder", + "AppT/term", + "AppT/func", + "AppT/arg", + "RecordT/term", + "RecEntry/record", + "RecEntry/val", + "RecEntry/field", + "ProjT/term", + "ProjT/base", + "ProjT/field", + "TrueF/formula", + "FalseF/formula", + "EqF/formula", + "EqF/lhs", + "EqF/rhs", + "ConjF/formula", + "ConjArm/conj", + "ConjArm/child", + "DisjF/formula", + "DisjArm/disj", + "DisjArm/child", + "ExistsF/formula", + "ExistsF/binder", + "ExistsF/body", + "RelF/formula", + "RelF/rel", + "RelF/arg", + "Term/node", + "Formula/node", + ]; + + let mut func_ids = HashMap::new(); + for name in func_names { + if let Some(id) = sig.lookup_func(name) { + func_ids.insert(name, id); + } + } + + // Note: No "Name" sort - names are in NamingIndex + let sort_names = [ + "Theory", "Param", "Srt", "DSort", "BaseDS", "ProdDS", "Field", "Func", "Rel", + "Binder", "Term", "VarT", "AppT", "RecordT", "RecEntry", "ProjT", "Formula", "RelF", + "TrueF", "FalseF", "EqF", "ConjF", "ConjArm", "DisjF", "DisjArm", "ExistsF", "Sequent", + "CtxVar", "Node", + ]; + + let mut sort_ids = HashMap::new(); + for name in sort_names { + if let Some(id) = sig.lookup_sort(name) { + sort_ids.insert(name, id); + } + } + + Self { + structure, + universe, + naming, + meta, + func_ids, + sort_ids, + } + } + + /// Get all elements of a given sort + fn elements_of_sort(&self, sort_name: &str) -> Vec { + let sort_id = self.sort_ids.get(sort_name).copied().unwrap_or(usize::MAX); + if sort_id == usize::MAX { + return vec![]; + } + self.structure.carriers[sort_id] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect() + } + + /// Follow a function from an element, returning the target Slid if defined + fn follow(&self, func_name: &str, elem: Slid) -> Option { + let func_id = *self.func_ids.get(func_name)?; + let sort_slid = self.structure.sort_local_id(elem); + self.structure.get_function(func_id, sort_slid) + } + + /// Get the name of an element (from NamingIndex via UUID lookup) + fn name(&self, elem: Slid) -> String { + let luid = self.structure.get_luid(elem); + if let Some(uuid) = self.universe.get(luid) { + self.naming.display_name(&uuid) + } else { + format!("slid_{}", elem) + } + } + + /// Find elements where a given function points to target + fn find_by_func(&self, func_name: &str, target: Slid) -> Vec { + let Some(&func_id) = self.func_ids.get(func_name) else { + return vec![]; + }; + + // Get the domain sort for this function + let func = &self.meta.theory.signature.functions[func_id]; + let DerivedSort::Base(domain_sort) = &func.domain else { + return vec![]; // Product domains not supported yet + }; + + // Iterate through all elements of the domain sort + let mut results = vec![]; + for elem in self.structure.carriers[*domain_sort].iter() { + let elem = Slid::from_usize(elem as usize); + if self.follow(func_name, elem) == Some(target) { + results.push(elem); + } + } + results + } +} + +/// Reconstruct a DerivedSort from its GeologMeta representation +fn reconstruct_dsort( + reader: &MetaReader, + dsort_elem: Slid, + slid_to_sort_id: &HashMap, +) -> DerivedSort { + // Check if it's a BaseDS (find BaseDS where BaseDS/dsort = dsort_elem) + let base_elems = reader.find_by_func("BaseDS/dsort", dsort_elem); + if !base_elems.is_empty() { + let base_elem = base_elems[0]; + if let Some(srt_elem) = reader.follow("BaseDS/srt", base_elem) + && let Some(&sort_id) = slid_to_sort_id.get(&srt_elem) + { + return DerivedSort::Base(sort_id); + } + } + + // Check if it's a ProdDS + let prod_elems = reader.find_by_func("ProdDS/dsort", dsort_elem); + if !prod_elems.is_empty() { + let prod_elem = prod_elems[0]; + let field_elems = reader.find_by_func("Field/prod", prod_elem); + + let mut fields = vec![]; + for field_elem in field_elems { + let field_name = reader.name(field_elem); + // Recursively reconstruct field type + if let Some(type_dsort) = reader.follow("Field/type", field_elem) { + let field_type = reconstruct_dsort(reader, type_dsort, slid_to_sort_id); + fields.push((field_name, field_type)); + } + } + return DerivedSort::Product(fields); + } + + // Default to unit + DerivedSort::unit() +} + +/// Recursively reconstruct a Term from its GeologMeta representation +fn reconstruct_term_inner( + reader: &MetaReader, + term_elem: Slid, + binder_map: &HashMap, + slid_to_func_id: &HashMap, +) -> Option { + // Check VarT + let var_elems = reader.find_by_func("VarT/term", term_elem); + if !var_elems.is_empty() { + let var_t = var_elems[0]; + if let Some(binder) = reader.follow("VarT/binder", var_t) + && let Some((name, sort)) = binder_map.get(&binder) { + return Some(Term::Var(name.clone(), sort.clone())); + } + return None; + } + + // Check AppT + let app_elems = reader.find_by_func("AppT/term", term_elem); + if !app_elems.is_empty() { + let app_t = app_elems[0]; + if let Some(func_elem) = reader.follow("AppT/func", app_t) + && let Some(&func_id) = slid_to_func_id.get(&func_elem) + && let Some(arg_term) = reader.follow("AppT/arg", app_t) + { + // Recursively reconstruct argument term + if let Some(arg) = reconstruct_term_inner(reader, arg_term, binder_map, slid_to_func_id) { + return Some(Term::App(func_id, Box::new(arg))); + } + } + return None; + } + + // Check ProjT + let proj_elems = reader.find_by_func("ProjT/term", term_elem); + if !proj_elems.is_empty() { + let proj_t = proj_elems[0]; + let field_name = reader + .follow("ProjT/field", proj_t) + .map(|f| reader.name(f)) + .unwrap_or_default(); + if let Some(base_term) = reader.follow("ProjT/base", proj_t) { + // Recursively reconstruct base term + if let Some(base) = reconstruct_term_inner(reader, base_term, binder_map, slid_to_func_id) { + return Some(Term::Project(Box::new(base), field_name)); + } + } + return None; + } + + // Check RecordT + let rec_elems = reader.find_by_func("RecordT/term", term_elem); + if !rec_elems.is_empty() { + let rec_t = rec_elems[0]; + let entry_elems = reader.find_by_func("RecEntry/record", rec_t); + let mut fields = vec![]; + for entry_elem in entry_elems { + let field_name = reader + .follow("RecEntry/field", entry_elem) + .map(|f| reader.name(f)) + .unwrap_or_default(); + if let Some(val_term) = reader.follow("RecEntry/val", entry_elem) { + // Recursively reconstruct value term + if let Some(val) = reconstruct_term_inner(reader, val_term, binder_map, slid_to_func_id) { + fields.push((field_name, val)); + } + } + } + return Some(Term::Record(fields)); + } + + None +} + +/// Recursively reconstruct a Formula from its GeologMeta representation +fn reconstruct_formula_inner( + reader: &MetaReader, + formula_elem: Slid, + binder_map: &HashMap, + slid_to_sort_id: &HashMap, + slid_to_func_id: &HashMap, + slid_to_rel_id: &HashMap, +) -> Option { + // Check TrueF + let true_elems = reader.find_by_func("TrueF/formula", formula_elem); + if !true_elems.is_empty() { + return Some(Formula::True); + } + + // Check FalseF + let false_elems = reader.find_by_func("FalseF/formula", formula_elem); + if !false_elems.is_empty() { + return Some(Formula::False); + } + + // Check EqF + let eq_elems = reader.find_by_func("EqF/formula", formula_elem); + if !eq_elems.is_empty() { + let eq_f = eq_elems[0]; + if let Some(lhs_term) = reader.follow("EqF/lhs", eq_f) + && let Some(rhs_term) = reader.follow("EqF/rhs", eq_f) + && let Some(lhs) = reconstruct_term_inner(reader, lhs_term, binder_map, slid_to_func_id) + && let Some(rhs) = reconstruct_term_inner(reader, rhs_term, binder_map, slid_to_func_id) + { + return Some(Formula::Eq(lhs, rhs)); + } + return None; + } + + // Check RelF + let rel_elems = reader.find_by_func("RelF/formula", formula_elem); + if !rel_elems.is_empty() { + let rel_f = rel_elems[0]; + if let Some(rel_elem) = reader.follow("RelF/rel", rel_f) + && let Some(&rel_id) = slid_to_rel_id.get(&rel_elem) + && let Some(arg_term) = reader.follow("RelF/arg", rel_f) + && let Some(arg) = reconstruct_term_inner(reader, arg_term, binder_map, slid_to_func_id) + { + return Some(Formula::Rel(rel_id, arg)); + } + return None; + } + + // Check ConjF + let conj_elems = reader.find_by_func("ConjF/formula", formula_elem); + if !conj_elems.is_empty() { + let conj_f = conj_elems[0]; + let arm_elems = reader.find_by_func("ConjArm/conj", conj_f); + let mut children = vec![]; + for arm_elem in arm_elems { + if let Some(child_formula) = reader.follow("ConjArm/child", arm_elem) + && let Some(child) = reconstruct_formula_inner( + reader, + child_formula, + binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + children.push(child); + } + } + return Some(Formula::Conj(children)); + } + + // Check DisjF + let disj_elems = reader.find_by_func("DisjF/formula", formula_elem); + if !disj_elems.is_empty() { + let disj_f = disj_elems[0]; + let arm_elems = reader.find_by_func("DisjArm/disj", disj_f); + let mut children = vec![]; + for arm_elem in arm_elems { + if let Some(child_formula) = reader.follow("DisjArm/child", arm_elem) + && let Some(child) = reconstruct_formula_inner( + reader, + child_formula, + binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + children.push(child); + } + } + return Some(Formula::Disj(children)); + } + + // Check ExistsF + let exists_elems = reader.find_by_func("ExistsF/formula", formula_elem); + if !exists_elems.is_empty() { + let exists_f = exists_elems[0]; + // Get the binder for this existential + if let Some(binder_elem) = reader.follow("ExistsF/binder", exists_f) { + let var_name = reader.name(binder_elem); + let var_sort = reader + .follow("Binder/type", binder_elem) + .map(|d| reconstruct_dsort(reader, d, slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + // Create new binder map with this binder + let mut new_binder_map = binder_map.clone(); + new_binder_map.insert(binder_elem, (var_name.clone(), var_sort.clone())); + + // Recursively reconstruct body + if let Some(body_formula) = reader.follow("ExistsF/body", exists_f) + && let Some(body) = reconstruct_formula_inner( + reader, + body_formula, + &new_binder_map, + slid_to_sort_id, + slid_to_func_id, + slid_to_rel_id, + ) { + return Some(Formula::Exists(var_name, var_sort, Box::new(body))); + } + } + return None; + } + + None +} + +/// Convert a GeologMeta Structure back to an ElaboratedTheory +/// +/// This is the reverse of theory_to_structure, used for loading saved theories. +/// Requires Universe and NamingIndex to look up human-readable names. +pub fn structure_to_theory( + structure: &Structure, + universe: &Universe, + naming: &NamingIndex, +) -> Result { + let reader = MetaReader::new(structure, universe, naming); + + // Find the Theory element (assume exactly one for now) + let theory_elems = reader.elements_of_sort("Theory"); + if theory_elems.is_empty() { + return Err("No Theory element found".to_string()); + } + let theory_elem = theory_elems[0]; + let theory_name = reader.name(theory_elem); + + // Build signature + let mut sig = Signature::new(); + + // Reconstruct sorts: find all Srt elements pointing to this theory + let srt_elems = reader.find_by_func("Srt/theory", theory_elem); + let mut slid_to_sort_id: HashMap = HashMap::new(); + + for srt_elem in &srt_elems { + let name = reader.name(*srt_elem); + let sort_id = sig.add_sort(name); + slid_to_sort_id.insert(*srt_elem, sort_id); + } + + // Reconstruct functions (using standalone reconstruct_dsort helper) + let func_elems = reader.find_by_func("Func/theory", theory_elem); + let mut slid_to_func_id: HashMap = HashMap::new(); + + for func_elem in &func_elems { + let name = reader.name(*func_elem); + + let domain = reader + .follow("Func/dom", *func_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let codomain = reader + .follow("Func/cod", *func_elem) + .map(|c| reconstruct_dsort(&reader, c, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let func_id = sig.add_function(name, domain, codomain); + slid_to_func_id.insert(*func_elem, func_id); + } + + // Reconstruct relations + let rel_elems = reader.find_by_func("Rel/theory", theory_elem); + let mut slid_to_rel_id: HashMap = HashMap::new(); + + for rel_elem in &rel_elems { + let name = reader.name(*rel_elem); + + let domain = reader + .follow("Rel/dom", *rel_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + + let rel_id = sig.add_relation(name, domain); + slid_to_rel_id.insert(*rel_elem, rel_id); + } + + // Reconstruct params + let param_elems = reader.find_by_func("Param/theory", theory_elem); + let mut params = vec![]; + + for param_elem in param_elems { + let name = reader.name(param_elem); + let type_theory = reader + .follow("Param/type", param_elem) + .map(|t| reader.name(t)) + .unwrap_or_default(); + + params.push(TheoryParam { + name, + theory_name: type_theory, + }); + } + + // Reconstruct axioms (sequents) + let sequent_elems = reader.find_by_func("Sequent/theory", theory_elem); + let mut axioms = vec![]; + let mut axiom_names = vec![]; + + for sequent_elem in sequent_elems { + // Collect the axiom name from the sequent element + axiom_names.push(reader.name(sequent_elem)); + // Build binder map: Slid -> (name, DerivedSort) + let mut binder_map: HashMap = HashMap::new(); + + // Get context variables (CtxVar elements for this sequent) + let ctx_var_elems = reader.find_by_func("CtxVar/sequent", sequent_elem); + let mut context_vars = vec![]; + + for ctx_var_elem in ctx_var_elems { + let var_name = reader.name(ctx_var_elem); + if let Some(binder_elem) = reader.follow("CtxVar/binder", ctx_var_elem) { + let var_sort = reader + .follow("Binder/type", binder_elem) + .map(|d| reconstruct_dsort(&reader, d, &slid_to_sort_id)) + .unwrap_or_else(DerivedSort::unit); + binder_map.insert(binder_elem, (var_name.clone(), var_sort.clone())); + context_vars.push((var_name, var_sort)); + } + } + + let context = Context { vars: context_vars }; + + // Get premise and conclusion using standalone recursive helpers + let premise = reader + .follow("Sequent/premise", sequent_elem) + .and_then(|f| { + reconstruct_formula_inner( + &reader, + f, + &binder_map, + &slid_to_sort_id, + &slid_to_func_id, + &slid_to_rel_id, + ) + }) + .unwrap_or(Formula::True); + + let conclusion = reader + .follow("Sequent/conclusion", sequent_elem) + .and_then(|f| { + reconstruct_formula_inner( + &reader, + f, + &binder_map, + &slid_to_sort_id, + &slid_to_func_id, + &slid_to_rel_id, + ) + }) + .unwrap_or(Formula::True); + + axioms.push(Sequent { + context, + premise, + conclusion, + }); + } + + Ok(ElaboratedTheory { + params, + theory: crate::core::Theory { + name: theory_name, + signature: sig, + axioms, + axiom_names, + }, + }) +} + +// Unit tests moved to tests/unit_meta.rs diff --git a/src/naming.rs b/src/naming.rs new file mode 100644 index 0000000..880e4a2 --- /dev/null +++ b/src/naming.rs @@ -0,0 +1,355 @@ +//! Global naming index for human-readable names +//! +//! Names are purely a UI concern - all data in structures is identified by UUID. +//! This index maps UUIDs to human-readable names for display and provides +//! reverse lookup for parsing. +//! +//! Following chit's design: "namings are purely a user interface (input/output +//! for humans and large language models)" +//! +//! ## Suffix-based lookup via ReversedPath +//! +//! To efficiently look up names by suffix (e.g., find all `*/A` when given just `A`), +//! we store paths reversed in a BTreeMap. For example: +//! - `["PetriNet", "P"]` is stored as `ReversedPath(["P", "PetriNet"])` +//! - A prefix scan for `["A"]` finds all paths ending in `A` +//! +//! This enables O(log n + k) suffix lookups where k is the number of matches. + +use crate::id::Uuid; +use indexmap::IndexMap; +use memmap2::Mmap; +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Archive, Deserialize, Serialize, check_archived_root}; +use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::Write; +use std::path::PathBuf; + +/// A qualified name path (e.g., ["PetriNet", "P"] for sort P in theory PetriNet) +pub type QualifiedName = Vec; + +/// A path stored with segments reversed for efficient suffix-based lookup. +/// +/// `["PetriNet", "P"]` becomes `ReversedPath(["P", "PetriNet"])`. +/// This allows BTreeMap range queries to find all paths with a given suffix. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReversedPath(Vec); + +impl ReversedPath { + /// Create a reversed path from a qualified name. + pub fn from_qualified(segments: &[String]) -> Self { + Self(segments.iter().rev().cloned().collect()) + } + + /// Convert back to a qualified name (forward order). + pub fn to_qualified(&self) -> QualifiedName { + self.0.iter().rev().cloned().collect() + } + + /// Create a prefix for range queries (just the suffix segments, reversed). + /// For looking up all paths ending in `["A"]`, create `ReversedPath(["A"])`. + pub fn from_suffix(suffix: &[String]) -> Self { + // Suffix is already in forward order, just reverse it + Self(suffix.iter().rev().cloned().collect()) + } + + /// Check if this path starts with the given prefix (for range iteration). + pub fn starts_with(&self, prefix: &ReversedPath) -> bool { + self.0.len() >= prefix.0.len() && self.0[..prefix.0.len()] == prefix.0[..] + } + + /// Get the inner segments (reversed order). + pub fn segments(&self) -> &[String] { + &self.0 + } +} + +/// Serializable form of the naming index +#[derive(Archive, Deserialize, Serialize, Default)] +#[archive(check_bytes)] +struct NamingData { + /// UUID → qualified name mapping + entries: Vec<(Uuid, QualifiedName)>, +} + +/// Global naming index +/// +/// Provides bidirectional mapping between UUIDs and human-readable names. +/// Names are qualified paths like ["PetriNet", "P"] for sort P in theory PetriNet. +/// +/// ## Lookup modes +/// - **By UUID**: O(1) via `uuid_to_name` +/// - **By exact path**: O(log n) via `path_to_uuid` +/// - **By suffix**: O(log n + k) via BTreeMap range query on reversed paths +#[derive(Debug, Default)] +pub struct NamingIndex { + /// UUID → qualified name (for display) + uuid_to_name: IndexMap, + /// Reversed path → UUIDs (for suffix-based lookup) + /// Paths are stored reversed so that suffix queries become prefix scans. + /// Multiple UUIDs can share the same path (ambiguous names). + path_to_uuid: BTreeMap>, + /// Persistence path + path: Option, + /// Dirty flag + dirty: bool, +} + +impl NamingIndex { + /// Create a new empty naming index + pub fn new() -> Self { + Self::default() + } + + /// Create a naming index with a persistence path + pub fn with_path(path: impl Into) -> Self { + Self { + uuid_to_name: IndexMap::new(), + path_to_uuid: BTreeMap::new(), + path: Some(path.into()), + dirty: false, + } + } + + /// Load a naming index from disk + pub fn load(path: impl Into) -> Result { + let path = path.into(); + + if !path.exists() { + return Ok(Self::with_path(path)); + } + + let file = File::open(&path).map_err(|e| format!("Failed to open naming index: {}", e))?; + + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap naming index: {}", e))?; + + if mmap.is_empty() { + return Ok(Self::with_path(path)); + } + + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate naming index: {}", e))?; + + let data: NamingData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize naming index")?; + + let mut index = Self::with_path(path); + for (uuid, name) in data.entries { + index.insert_internal(uuid, name); + } + + Ok(index) + } + + /// Save the naming index to disk + pub fn save(&mut self) -> Result<(), String> { + let path = self + .path + .as_ref() + .ok_or("Naming index has no persistence path")?; + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create naming directory: {}", e))?; + } + + let data = NamingData { + entries: self + .uuid_to_name + .iter() + .map(|(k, v)| (*k, v.clone())) + .collect(), + }; + + let mut serializer = AllocSerializer::<4096>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize naming index: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let temp_path = path.with_extension("tmp"); + { + let mut file = File::create(&temp_path) + .map_err(|e| format!("Failed to create temp file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write naming index: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync naming index: {}", e))?; + } + + fs::rename(&temp_path, path) + .map_err(|e| format!("Failed to rename naming index: {}", e))?; + + self.dirty = false; + Ok(()) + } + + /// Internal insert without setting dirty flag + fn insert_internal(&mut self, uuid: Uuid, name: QualifiedName) { + // Add to reverse index (reversed path → UUIDs) + let reversed = ReversedPath::from_qualified(&name); + self.path_to_uuid + .entry(reversed) + .or_default() + .push(uuid); + self.uuid_to_name.insert(uuid, name); + } + + /// Register a name for a UUID + pub fn insert(&mut self, uuid: Uuid, name: QualifiedName) { + self.insert_internal(uuid, name); + self.dirty = true; + } + + /// Register a simple (unqualified) name for a UUID + pub fn insert_simple(&mut self, uuid: Uuid, name: String) { + self.insert(uuid, vec![name]); + } + + /// Get the qualified name for a UUID + pub fn get(&self, uuid: &Uuid) -> Option<&QualifiedName> { + self.uuid_to_name.get(uuid) + } + + /// Get the simple (last component) name for a UUID + pub fn get_simple(&self, uuid: &Uuid) -> Option<&str> { + self.uuid_to_name + .get(uuid) + .and_then(|name| name.last()) + .map(|s| s.as_str()) + } + + /// Get the display name for a UUID (simple name, or UUID if unnamed) + pub fn display_name(&self, uuid: &Uuid) -> String { + self.get_simple(uuid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("{}", uuid)) + } + + /// Look up all UUIDs whose qualified name ends with the given suffix. + /// + /// Examples: + /// - `lookup_suffix(&["A"])` returns UUIDs for "ExampleNet/A", "OtherNet/A", etc. + /// - `lookup_suffix(&["ExampleNet", "A"])` returns just "ExampleNet/A" + /// + /// Returns an iterator over matching UUIDs. + pub fn lookup_suffix<'a>(&'a self, suffix: &[String]) -> impl Iterator + 'a { + let prefix = ReversedPath::from_suffix(suffix); + self.path_to_uuid + .range(prefix.clone()..) + .take_while(move |(k, _)| k.starts_with(&prefix)) + .flat_map(|(_, uuids)| uuids.iter().copied()) + } + + /// Look up UUID by exact qualified path. + /// Returns None if ambiguous (multiple UUIDs share the exact path). + pub fn lookup_exact(&self, path: &[String]) -> Option { + let reversed = ReversedPath::from_qualified(path); + match self.path_to_uuid.get(&reversed) { + Some(uuids) if uuids.len() == 1 => Some(uuids[0]), + _ => None, + } + } + + /// Resolve a path to a UUID. + /// - If exact match exists, return it. + /// - If suffix matches exactly one UUID, return it. + /// - Otherwise return Err with all candidates (empty if not found, multiple if ambiguous). + pub fn resolve(&self, path: &[String]) -> Result> { + // First try exact match + if let Some(uuid) = self.lookup_exact(path) { + return Ok(uuid); + } + + // Fall back to suffix match + let candidates: Vec = self.lookup_suffix(path).collect(); + match candidates.len() { + 1 => Ok(candidates[0]), + _ => Err(candidates), + } + } + + /// Look up UUIDs by simple (single-segment) name. + /// This is a convenience wrapper around `lookup_suffix` for single names. + pub fn lookup(&self, name: &str) -> Vec { + self.lookup_suffix(&[name.to_string()]).collect() + } + + /// Look up a unique UUID by simple name (returns None if ambiguous or not found) + pub fn lookup_unique(&self, name: &str) -> Option { + let results: Vec = self.lookup_suffix(&[name.to_string()]).collect(); + if results.len() == 1 { + Some(results[0]) + } else { + None + } + } + + /// Check if dirty + pub fn is_dirty(&self) -> bool { + self.dirty + } + + /// Number of entries + pub fn len(&self) -> usize { + self.uuid_to_name.len() + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.uuid_to_name.is_empty() + } + + /// Iterate over all (UUID, name) pairs + pub fn iter(&self) -> impl Iterator { + self.uuid_to_name.iter() + } +} + +impl Drop for NamingIndex { + fn drop(&mut self) { + if self.dirty && self.path.is_some() { + let _ = self.save(); + } + } +} + +/// Get the global naming index path +pub fn global_naming_path() -> Option { + #[cfg(unix)] + { + std::env::var("HOME").ok().map(|h| { + let mut p = PathBuf::from(h); + p.push(".config"); + p.push("geolog"); + p.push("names.bin"); + p + }) + } + #[cfg(windows)] + { + std::env::var("APPDATA").ok().map(|mut p| { + p.push("geolog"); + p.push("names.bin"); + p + }) + } + #[cfg(not(any(unix, windows)))] + { + None + } +} + +/// Load or create the global naming index +pub fn global_naming_index() -> NamingIndex { + match global_naming_path() { + Some(path) => NamingIndex::load(&path).unwrap_or_else(|_| NamingIndex::with_path(path)), + None => NamingIndex::new(), + } +} + +// Unit tests moved to tests/proptest_naming.rs diff --git a/src/overlay.rs b/src/overlay.rs new file mode 100644 index 0000000..8c6eae7 --- /dev/null +++ b/src/overlay.rs @@ -0,0 +1,574 @@ +//! Overlay structures: patch-on-write semantics for efficient mutations. +//! +//! Instead of copying a structure to mutate it, we layer changes on top of an +//! immutable base. The base is memory-mapped (zero-copy), and mutations accumulate +//! in a thin delta layer. Cost of mutation is O(Δ), never O(base). +//! +//! # Architecture +//! +//! ```text +//! ┌────────────────────────────────────────────────────────┐ +//! │ MappedStructure (immutable, mmap'd, potentially huge) │ +//! └────────────────────────────────────────────────────────┘ +//! ↑ read fallthrough +//! ┌────────────────────────────────────────────────────────┐ +//! │ StructureDelta (tiny: just the changes) │ +//! └────────────────────────────────────────────────────────┘ +//! ``` +//! +//! # Slid Addressing +//! +//! Base elements have Slids `0..base_len`. New overlay elements get Slids +//! `base_len..base_len+delta_len`, so the address space is contiguous. +//! +//! # Usage +//! +//! ```ignore +//! // Load base (fast, zero-copy) +//! let base = MappedStructure::open(path)?; +//! +//! // Create overlay for mutations +//! let mut overlay = OverlayStructure::new(Arc::new(base)); +//! +//! // Mutate (changes go to delta) +//! let elem = overlay.add_element(luid, sort_id); +//! overlay.assert_relation(rel_id, vec![elem, other]); +//! +//! // Read (checks delta first, falls back to base) +//! let sort = overlay.get_sort(elem); +//! +//! // Commit (materialize to new immutable structure) +//! let new_base = overlay.commit(new_path)?; +//! +//! // Or rollback (instant - just clears delta) +//! overlay.rollback(); +//! ``` + +use std::collections::{BTreeSet, HashMap}; +use std::path::Path; +use std::sync::Arc; + +use crate::core::{SortId, Structure}; +use crate::id::{Luid, NumericId, Slid}; +use crate::serialize::save_structure; +use crate::zerocopy::{MappedRelation, MappedStructure}; + +// ============================================================================ +// DELTA TYPES +// ============================================================================ + +/// A delta/patch representing changes to a structure. +/// +/// This is the runtime-efficient analog of `Patch` (which uses UUIDs for +/// persistence). `StructureDelta` uses Slids for fast in-memory operations. +#[derive(Clone, Debug, Default)] +pub struct StructureDelta { + /// New elements: (Luid, SortId). Slids start at base.len(). + pub new_elements: Vec<(Luid, SortId)>, + + /// Per-relation deltas (indexed by rel_id) + pub relations: Vec, + + /// Per-function deltas (indexed by func_id) + pub functions: Vec, +} + +impl StructureDelta { + /// Create a new empty delta with the given number of relations and functions. + pub fn new(num_relations: usize, num_functions: usize) -> Self { + Self { + new_elements: Vec::new(), + relations: vec![RelationDelta::default(); num_relations], + functions: vec![FunctionDelta::default(); num_functions], + } + } + + /// Check if the delta is empty (no changes). + pub fn is_empty(&self) -> bool { + self.new_elements.is_empty() + && self.relations.iter().all(|r| r.is_empty()) + && self.functions.iter().all(|f| f.is_empty()) + } +} + +/// Delta for a single relation: assertions and retractions. +#[derive(Clone, Debug, Default)] +pub struct RelationDelta { + /// New tuples to assert (by content) + pub assertions: BTreeSet>, + + /// Tuples to retract (by content, not by ID) + pub retractions: BTreeSet>, +} + +impl RelationDelta { + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.assertions.is_empty() && self.retractions.is_empty() + } +} + +/// Delta for a single function: updated mappings. +#[derive(Clone, Debug, Default)] +pub struct FunctionDelta { + /// Updated mappings: domain Slid -> codomain Slid. + /// Only supports local functions in this version. + pub updates: HashMap, +} + +impl FunctionDelta { + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.updates.is_empty() + } +} + +// ============================================================================ +// OVERLAY STRUCTURE +// ============================================================================ + +/// A mutable overlay on top of an immutable base structure. +/// +/// All reads check the delta first, then fall back to the base. +/// All writes go to the delta. The base is never modified. +pub struct OverlayStructure { + /// The immutable base (memory-mapped, zero-copy) + base: Arc, + + /// Accumulated changes + delta: StructureDelta, +} + +impl OverlayStructure { + /// Create a new overlay on top of a base structure. + pub fn new(base: Arc) -> Self { + let num_relations = base.num_relations(); + let num_functions = base.num_functions(); + Self { + base, + delta: StructureDelta::new(num_relations, num_functions), + } + } + + /// Get the immutable base. + pub fn base(&self) -> &MappedStructure { + &self.base + } + + /// Get the accumulated delta. + pub fn delta(&self) -> &StructureDelta { + &self.delta + } + + /// Check if clean (no changes from base). + pub fn is_clean(&self) -> bool { + self.delta.is_empty() + } + + /// Discard all changes, returning to base state. + pub fn rollback(&mut self) { + self.delta = StructureDelta::new( + self.base.num_relations(), + self.base.num_functions(), + ); + } + + // ======================================================================== + // ELEMENT OPERATIONS + // ======================================================================== + + /// Total number of elements (base + overlay). + pub fn len(&self) -> usize { + self.base.len() + self.delta.new_elements.len() + } + + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Number of sorts. + pub fn num_sorts(&self) -> usize { + self.base.num_sorts() + } + + /// Number of relations. + pub fn num_relations(&self) -> usize { + self.base.num_relations() + } + + /// Number of functions. + pub fn num_functions(&self) -> usize { + self.base.num_functions() + } + + /// Add a new element. Returns its Slid (starts at base.len()). + pub fn add_element(&mut self, luid: Luid, sort_id: SortId) -> Slid { + let slid = Slid::from_usize(self.base.len() + self.delta.new_elements.len()); + self.delta.new_elements.push((luid, sort_id)); + slid + } + + /// Get the Luid for an element. + pub fn get_luid(&self, slid: Slid) -> Option { + let idx = slid.index(); + let base_len = self.base.len(); + if idx < base_len { + self.base.get_luid(slid) + } else { + self.delta + .new_elements + .get(idx - base_len) + .map(|(luid, _)| *luid) + } + } + + /// Get the sort for an element. + pub fn get_sort(&self, slid: Slid) -> Option { + let idx = slid.index(); + let base_len = self.base.len(); + if idx < base_len { + self.base.get_sort(slid) + } else { + self.delta + .new_elements + .get(idx - base_len) + .map(|(_, sort)| *sort) + } + } + + /// Iterate over all elements (base + overlay). + pub fn elements(&self) -> impl Iterator + '_ { + let base_iter = self.base.elements(); + let base_len = self.base.len(); + let overlay_iter = self + .delta + .new_elements + .iter() + .enumerate() + .map(move |(i, (luid, sort))| { + (Slid::from_usize(base_len + i), *luid, *sort) + }); + base_iter.chain(overlay_iter) + } + + /// Iterate over elements of a specific sort. + pub fn elements_of_sort(&self, sort_id: SortId) -> impl Iterator + '_ { + let base_iter = self.base.elements_of_sort(sort_id); + let base_len = self.base.len(); + let overlay_iter = self + .delta + .new_elements + .iter() + .enumerate() + .filter(move |(_, (_, s))| *s == sort_id) + .map(move |(i, _)| Slid::from_usize(base_len + i)); + base_iter.chain(overlay_iter) + } + + // ======================================================================== + // RELATION OPERATIONS + // ======================================================================== + + /// Assert a relation tuple. + pub fn assert_relation(&mut self, rel_id: usize, tuple: Vec) { + // If this tuple was previously retracted, un-retract it + self.delta.relations[rel_id].retractions.remove(&tuple); + // Add to assertions + self.delta.relations[rel_id].assertions.insert(tuple); + } + + /// Retract a relation tuple (by content). + pub fn retract_relation(&mut self, rel_id: usize, tuple: Vec) { + // If this tuple was asserted in the overlay, just remove it + if self.delta.relations[rel_id].assertions.remove(&tuple) { + return; + } + // Otherwise, mark it as retracted from base + self.delta.relations[rel_id].retractions.insert(tuple); + } + + /// Get an overlay view of a relation. + pub fn relation(&self, rel_id: usize) -> Option> { + let base_rel = self.base.relation(rel_id)?; + let delta = self.delta.relations.get(rel_id)?; + Some(OverlayRelation { + base: base_rel, + delta, + }) + } + + // ======================================================================== + // FUNCTION OPERATIONS + // ======================================================================== + + /// Set a function value. + pub fn set_function(&mut self, func_id: usize, domain: Slid, value: Slid) { + self.delta.functions[func_id].updates.insert(domain, value); + } + + /// Get a function value. + pub fn get_function(&self, func_id: usize, domain: Slid) -> Option { + // Check delta first + if let Some(&value) = self.delta.functions[func_id].updates.get(&domain) { + return Some(value); + } + // Fall back to base (only for base elements) + if domain.index() < self.base.len() { + // Need to convert Slid to sort-local index for base lookup + // This requires knowing the sort of the domain element + if let Some(sort_id) = self.base.get_sort(domain) { + // Count how many elements of this sort come before this one + let sort_local_idx = self + .base + .elements_of_sort(sort_id) + .take_while(|&s| s.index() < domain.index()) + .count(); + return self.base.function(func_id)?.get_local(sort_local_idx); + } + } + None + } + + // ======================================================================== + // COMMIT / MATERIALIZE + // ======================================================================== + + /// Materialize the overlay into an owned Structure. + /// + /// This combines the base and delta into a single Structure that can be + /// saved to disk. + pub fn materialize(&self) -> Structure { + // Start with a fresh structure + let mut structure = Structure::new(self.num_sorts()); + + // Copy base elements (we need to create them fresh since Structure wants to own them) + // For now, we'll iterate and add. In production, we'd want a more efficient bulk copy. + let mut slid_map: HashMap = HashMap::new(); + + // We need a universe to add elements, but we're materializing so we'll + // reuse the Luids from the overlay. Create elements with existing Luids. + for (old_slid, luid, sort_id) in self.elements() { + let new_slid = structure.add_element_with_luid(luid, sort_id); + slid_map.insert(old_slid, new_slid); + } + + // Initialize relations with correct arities + let arities: Vec = (0..self.num_relations()) + .map(|rel_id| { + self.base + .relation(rel_id) + .map(|r| r.arity()) + .unwrap_or(0) + }) + .collect(); + structure.init_relations(&arities); + + // Copy relation tuples (applying the slid remapping) + for rel_id in 0..self.num_relations() { + if let Some(rel) = self.relation(rel_id) { + for tuple in rel.live_tuples() { + let remapped: Vec = tuple + .iter() + .map(|&old_slid| slid_map.get(&old_slid).copied().unwrap_or(old_slid)) + .collect(); + structure.assert_relation(rel_id, remapped); + } + } + } + + // TODO: Copy functions (more complex, skip for now) + + structure + } + + /// Commit the overlay: materialize and save to a new file, returning the new MappedStructure. + pub fn commit(&self, path: &Path) -> Result { + let structure = self.materialize(); + save_structure(&structure, path)?; + MappedStructure::open(path) + } +} + +// ============================================================================ +// OVERLAY RELATION VIEW +// ============================================================================ + +/// A read-only view of a relation through an overlay. +pub struct OverlayRelation<'a> { + base: MappedRelation<'a>, + delta: &'a RelationDelta, +} + +impl<'a> OverlayRelation<'a> { + /// Relation arity. + pub fn arity(&self) -> usize { + self.base.arity() + } + + /// Approximate count of live tuples. + /// + /// This is approximate because checking retractions against base tuples + /// would require iterating. For exact count, iterate `live_tuples()`. + pub fn live_count_approx(&self) -> usize { + // Base count + assertions - retractions (approximate) + self.base.live_count() + self.delta.assertions.len() + - self.delta.retractions.len().min(self.base.live_count()) + } + + /// Check if a tuple is live (in base or assertions, not retracted). + pub fn contains(&self, tuple: &[Slid]) -> bool { + // Check if retracted + if self.delta.retractions.contains(tuple) { + return false; + } + // Check assertions + if self.delta.assertions.contains(tuple) { + return true; + } + // Check base - need to iterate base tuples to check + // This is O(n) which is unfortunate, but we don't have a hash index + for base_tuple in self.base.live_tuples() { + let base_vec: Vec = base_tuple.collect(); + if base_vec.as_slice() == tuple { + return true; + } + } + false + } + + /// Iterate over live tuples (base filtered by retractions, plus assertions). + /// + /// Returns tuples as `Vec` for simplicity. Each vec is one tuple. + pub fn live_tuples(&self) -> impl Iterator> + '_ { + // Collect base tuples, filtering out retracted ones + let base_filtered = self + .base + .live_tuples() + .map(|t| t.collect::>()) + .filter(|tuple| !self.delta.retractions.contains(tuple)); + + // Chain with assertions + let assertions = self.delta.assertions.iter().cloned(); + + base_filtered.chain(assertions) + } +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::universe::Universe; + use crate::serialize::save_structure; + use tempfile::tempdir; + + #[test] + fn test_overlay_add_elements() { + let dir = tempdir().unwrap(); + let path = dir.path().join("base.structure"); + + // Create and save a base structure + let mut universe = Universe::new(); + let mut base_structure = Structure::new(2); + let (a, _) = base_structure.add_element(&mut universe, 0); + let (b, _) = base_structure.add_element(&mut universe, 1); + save_structure(&base_structure, &path).unwrap(); + + // Load as mapped and create overlay + let mapped = MappedStructure::open(&path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + assert_eq!(overlay.len(), 2); + assert!(overlay.is_clean()); + + // Add elements through overlay + let luid_c = universe.intern(crate::id::Uuid::now_v7()); + let c = overlay.add_element(luid_c, 0); + + assert_eq!(overlay.len(), 3); + assert!(!overlay.is_clean()); + assert_eq!(c.index(), 2); // New element gets Slid after base + + // Check element lookups + assert_eq!(overlay.get_sort(a), Some(0)); + assert_eq!(overlay.get_sort(b), Some(1)); + assert_eq!(overlay.get_sort(c), Some(0)); + + // Rollback + overlay.rollback(); + assert_eq!(overlay.len(), 2); + assert!(overlay.is_clean()); + } + + #[test] + fn test_overlay_relations() { + let dir = tempdir().unwrap(); + let path = dir.path().join("base.structure"); + + // Create base with a relation + let mut universe = Universe::new(); + let mut base_structure = Structure::new(1); + let (a, _) = base_structure.add_element(&mut universe, 0); + let (b, _) = base_structure.add_element(&mut universe, 0); + base_structure.init_relations(&[2]); // binary relation + base_structure.assert_relation(0, vec![a, b]); + save_structure(&base_structure, &path).unwrap(); + + // Load and overlay + let mapped = MappedStructure::open(&path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Check base relation + let rel = overlay.relation(0).unwrap(); + assert_eq!(rel.arity(), 2); + assert!(rel.contains(&[a, b])); + assert!(!rel.contains(&[b, a])); + + // Assert new tuple + overlay.assert_relation(0, vec![b, a]); + let rel = overlay.relation(0).unwrap(); + assert!(rel.contains(&[a, b])); + assert!(rel.contains(&[b, a])); + + // Retract original tuple + overlay.retract_relation(0, vec![a, b]); + let rel = overlay.relation(0).unwrap(); + assert!(!rel.contains(&[a, b])); + assert!(rel.contains(&[b, a])); + } + + #[test] + fn test_overlay_materialize() { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let new_path = dir.path().join("new.structure"); + + // Create base + let mut universe = Universe::new(); + let mut base_structure = Structure::new(1); + let (a, _) = base_structure.add_element(&mut universe, 0); + base_structure.init_relations(&[1]); // unary relation + base_structure.assert_relation(0, vec![a]); + save_structure(&base_structure, &base_path).unwrap(); + + // Load, modify, commit + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + let luid_b = universe.intern(crate::id::Uuid::now_v7()); + let b = overlay.add_element(luid_b, 0); + overlay.assert_relation(0, vec![b]); + + let new_mapped = overlay.commit(&new_path).unwrap(); + + // Verify new structure + assert_eq!(new_mapped.len(), 2); + assert_eq!(new_mapped.num_relations(), 1); + let rel = new_mapped.relation(0).unwrap(); + assert_eq!(rel.live_count(), 2); + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..b540ff2 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,761 @@ +//! Parser for Geolog +//! +//! Parses token streams into AST. + +use chumsky::prelude::*; + +use crate::ast::*; +use crate::lexer::{Span, Token}; + +/// Create a parser for a complete Geolog file +pub fn parser() -> impl Parser> + Clone { + declaration() + .map_with_span(|decl, span| Spanned::new(decl, to_span(span))) + .repeated() + .then_ignore(end()) + .map(|declarations| File { declarations }) +} + +fn to_span(span: Span) -> crate::ast::Span { + crate::ast::Span::new(span.start, span.end) +} + +/// Assign positional names ("0", "1", ...) to unnamed fields in a record +/// Only unnamed fields consume positional indices, so named fields can be reordered freely: +/// `[a, on: b, c]` → `[("0", a), ("on", b), ("1", c)]` +/// `[on: b, a, c]` → `[("on", b), ("0", a), ("1", c)]` +/// +/// Returns Err with the duplicate field name if duplicates are found. +fn assign_positional_names_checked( + fields: Vec<(Option, T)>, +) -> Result, String> { + let mut positional_idx = 0usize; + let mut seen = std::collections::HashSet::new(); + let mut result = Vec::with_capacity(fields.len()); + + for (name, val) in fields { + let field_name = match name { + Some(n) => n, + None => { + let n = positional_idx.to_string(); + positional_idx += 1; + n + } + }; + + if !seen.insert(field_name.clone()) { + return Err(field_name); + } + result.push((field_name, val)); + } + + Ok(result) +} + +// ============================================================================ +// Helpers +// ============================================================================ + +fn ident() -> impl Parser> + Clone { + select! { + Token::Ident(s) => s, + // Allow keywords to be used as identifiers (e.g., in paths like ax/child/exists) + Token::Namespace => "namespace".to_string(), + Token::Theory => "theory".to_string(), + Token::Instance => "instance".to_string(), + Token::Query => "query".to_string(), + Token::Sort => "Sort".to_string(), + Token::Prop => "Prop".to_string(), + Token::Forall => "forall".to_string(), + Token::Exists => "exists".to_string(), + } +} + +/// Parse a path: `foo` or `foo/bar/baz` +/// Uses `/` for namespace qualification +fn path() -> impl Parser> + Clone { + ident() + .separated_by(just(Token::Slash)) + .at_least(1) + .map(|segments| Path { segments }) +} + +// ============================================================================ +// Types (Concatenative Stack-Based Parsing) +// ============================================================================ + +/// Parse a full type expression with arrows (concatenative style) +/// +/// `A B -> C D -> E` becomes tokens: [A, B, C, D, E, Arrow, Arrow] +/// which evaluates right-to-left: A B -> (C D -> E) +/// +/// Uses a single recursive() to handle mutual recursion between type expressions +/// (for parentheses and record fields) and atomic type tokens. +fn type_expr_impl() -> impl Parser> + Clone { + recursive(|type_expr_rec| { + // === Atomic type tokens (non-recursive) === + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + let instance = just(Token::Instance).to(TypeToken::Instance); + let path_tok = path().map(TypeToken::Path); + + // Record type: [field: Type, ...] or [Type, ...] or mixed + // Named field: `name: Type` + let named_type_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_rec.clone()) + .map(|(name, ty)| (Some(name), ty)); + // Positional field: `Type` + let positional_type_field = type_expr_rec.clone().map(|ty| (None, ty)); + let record_field = choice((named_type_field, positional_type_field)); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(TypeToken::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Single atomic token + let single_token = choice((sort, prop, instance, record, path_tok)).map(|t| vec![t]); + + // Parenthesized expression - flatten tokens into parent sequence + let paren_expr = type_expr_rec + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr: TypeExpr| expr.tokens); + + // A "chunk item" is either a paren group or a single token + let chunk_item = choice((paren_expr, single_token)); + + // A "chunk" is one or more items (before an arrow or end) + let chunk = chunk_item + .repeated() + .at_least(1) + .map(|items: Vec>| items.into_iter().flatten().collect::>()); + + // Full type expression: chunks separated by arrows + chunk + .separated_by(just(Token::Arrow)) + .at_least(1) + .map(|chunks: Vec>| { + // For right-associative arrows: + // chunks: [[A, B], [C, D], [E]] + // result: [A, B, C, D, E, Arrow, Arrow] + // + // The evaluator processes Arrow tokens right-to-left: + // Stack after all tokens pushed: [A, B, C, D, E] + // Arrow 1: pop C,D -> push Arrow{C,D} -> [A, B, Arrow{C,D}, E] + // Wait, that's not right either... + // + // Actually the order should be: + // [A, B, Arrow, C, D, Arrow, E] for left-to-right application + // But we want (A B) -> ((C D) -> E) for right-associative + // + // For postfix arrows: + // [A, B, C, D, E, Arrow, Arrow] means: + // - Push A, B, C, D, E + // - Arrow: pop E, pop D -> push Arrow{D,E} + // - Arrow: pop Arrow{D,E}, pop C -> push Arrow{C, Arrow{D,E}} + // Hmm, this also doesn't work well for multi-token chunks. + // + // Actually, let's just flatten all and append arrows. + // The evaluator will be responsible for parsing chunks correctly. + + let num_arrows = chunks.len() - 1; + let mut tokens: Vec = chunks.into_iter().flatten().collect(); + + // Add Arrow tokens at end + for _ in 0..num_arrows { + tokens.push(TypeToken::Arrow); + } + + TypeExpr { tokens } + }) + }) +} + +/// Parse a type expression (full, with arrows) +fn type_expr() -> impl Parser> + Clone { + type_expr_impl() +} + +/// Parse a type expression without top-level arrows (for function domain position) +/// +/// This parses a single "chunk" - type tokens without arrows at the top level. +/// Used for places like function domain where we don't want `A -> B` to be ambiguous. +fn type_expr_no_arrow() -> impl Parser> + Clone { + recursive(|_type_expr_rec| { + // Atomic type tokens + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + let instance = just(Token::Instance).to(TypeToken::Instance); + let path_tok = path().map(TypeToken::Path); + + // Record type: [field: Type, ...] or [Type, ...] or mixed + // Named field: `name: Type` + let named_type_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_impl()) + .map(|(name, ty)| (Some(name), ty)); + // Positional field: `Type` + let positional_type_field = type_expr_impl().map(|ty| (None, ty)); + let record_field = choice((named_type_field, positional_type_field)); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(TypeToken::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Single atomic token + let single_token = choice((sort, prop, instance, record, path_tok)).map(|t| vec![t]); + + // Parenthesized expression - can contain full type expr with arrows + let paren_expr = type_expr_impl() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr: TypeExpr| expr.tokens); + + // A "chunk item" is either a paren group or a single token + let chunk_item = choice((paren_expr, single_token)); + + // One or more items, no arrows + chunk_item + .repeated() + .at_least(1) + .map(|items: Vec>| { + TypeExpr { + tokens: items.into_iter().flatten().collect(), + } + }) + }) +} + +// ============================================================================ +// Terms +// ============================================================================ + +fn term() -> impl Parser> + Clone { + recursive(|term| { + let path_term = path().map(Term::Path); + + // Record literal: [field: term, ...] or [term, ...] or mixed + // Named field: `name: value` + // Positional field: `value` (gets name "0", "1", etc.) + let named_field = ident() + .then_ignore(just(Token::Colon)) + .then(term.clone()) + .map(|(name, val)| (Some(name), val)); + let positional_field = term.clone().map(|val| (None, val)); + let record_field = choice((named_field, positional_field)); + + let record_term = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(Term::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }); + + // Parenthesized term + let paren_term = term + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); + + let atom = choice((record_term, paren_term, path_term)); + + // Postfix operations: + // - Application (juxtaposition): `w W/src` means "apply W/src to w" + // - Field projection: `.field` projects a field from a record + atom.clone() + .then( + choice(( + // Field projection: .field + just(Token::Dot) + .ignore_then(ident()) + .map(TermPostfix::Project), + // Application: another atom + atom.clone().map(TermPostfix::App), + )) + .repeated(), + ) + .foldl(|acc, op| match op { + TermPostfix::Project(field) => Term::Project(Box::new(acc), field), + TermPostfix::App(arg) => Term::App(Box::new(acc), Box::new(arg)), + }) + }) +} + +#[derive(Clone)] +enum TermPostfix { + Project(String), + App(Term), +} + +/// Parse a record term specifically: [field: term, ...] or [term, ...] or mixed +/// Used for relation assertions where we need a standalone record parser. +fn record_term() -> impl Parser> + Clone { + recursive(|rec_term| { + let path_term = path().map(Term::Path); + let inner_term = choice((rec_term.clone(), path_term.clone())); + + // Named field: `name: value` + let named_field = ident() + .then_ignore(just(Token::Colon)) + .then(inner_term.clone()) + .map(|(name, val)| (Some(name), val)); + // Positional field: `value` + let positional_field = inner_term.map(|val| (None, val)); + let record_field = choice((named_field, positional_field)); + + record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .try_map(|fields, span| { + assign_positional_names_checked(fields) + .map(Term::Record) + .map_err(|dup| Simple::custom(span, format!("duplicate field name: {}", dup))) + }) + }) +} + +// ============================================================================ +// Formulas +// ============================================================================ + +fn formula() -> impl Parser> + Clone { + recursive(|formula| { + let quantified_var = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(names, ty)| QuantifiedVar { names, ty }); + + // Existential: exists x : T. phi1, phi2, ... + // The body is a conjunction of formulas (comma-separated). + // An empty body (exists x : X.) is interpreted as True. + // This is standard geometric logic syntax. + let exists = just(Token::Exists) + .ignore_then( + quantified_var + .clone() + .separated_by(just(Token::Comma)) + .at_least(1), + ) + .then_ignore(just(Token::Dot)) + .then(formula.clone().separated_by(just(Token::Comma))) + .map(|(vars, body_conjuncts)| { + let body = match body_conjuncts.len() { + 0 => Formula::True, + 1 => body_conjuncts.into_iter().next().unwrap(), + _ => Formula::And(body_conjuncts), + }; + Formula::Exists(vars, Box::new(body)) + }); + + // Parenthesized formula + let paren_formula = formula + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); + + // Term-based formulas: either equality (term = term) or relation application (term rel) + // Since term() greedily parses `base rel` as App(base, Path(rel)), + // we detect that pattern when not followed by `=` and convert to RelApp + let term_based = term() + .then(just(Token::Eq).ignore_then(term()).or_not()) + .try_map(|(t, opt_rhs), span| { + match opt_rhs { + Some(rhs) => Ok(Formula::Eq(t, rhs)), + None => { + // Not equality - check for relation application pattern: term rel + match t { + Term::App(base, rel_term) => { + match *rel_term { + Term::Path(path) if path.segments.len() == 1 => { + Ok(Formula::RelApp(path.segments[0].clone(), *base)) + } + _ => Err(Simple::custom(span, "expected relation name (single identifier)")) + } + } + _ => Err(Simple::custom(span, "expected relation application (term rel) or equality (term = term)")) + } + } + } + }); + + // Literals + let true_lit = just(Token::True).to(Formula::True); + let false_lit = just(Token::False).to(Formula::False); + + let atom = choice((true_lit, false_lit, exists, paren_formula, term_based)); + + // Conjunction: phi /\ psi (binds tighter than disjunction) + let conjunction = atom + .clone() + .then(just(Token::And).ignore_then(atom.clone()).repeated()) + .foldl(|a, b| { + // Flatten into a single And with multiple conjuncts + match a { + Formula::And(mut conjuncts) => { + conjuncts.push(b); + Formula::And(conjuncts) + } + _ => Formula::And(vec![a, b]), + } + }); + + // Disjunction: phi \/ psi + conjunction + .clone() + .then(just(Token::Or).ignore_then(conjunction.clone()).repeated()) + .foldl(|a, b| { + // Flatten into a single Or with multiple disjuncts + match a { + Formula::Or(mut disjuncts) => { + disjuncts.push(b); + Formula::Or(disjuncts) + } + _ => Formula::Or(vec![a, b]), + } + }) + }) +} + +// ============================================================================ +// Axioms +// ============================================================================ + +fn axiom_decl() -> impl Parser> + Clone { + let quantified_var = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(names, ty)| QuantifiedVar { names, ty }); + + // Allow empty quantifier list: `forall .` means no universally quantified variables + // This is useful for "unconditional" axioms like `forall . |- exists x : X. ...` + let quantified_vars = just(Token::Forall) + .ignore_then(quantified_var.separated_by(just(Token::Comma))) + .then_ignore(just(Token::Dot)); + + // Hypotheses before |- (optional, comma separated) + let hypotheses = formula() + .separated_by(just(Token::Comma)) + .then_ignore(just(Token::Turnstile)); + + // name : forall vars. hyps |- conclusion + // Name can be a path like `ax/anc/base` + path() + .then_ignore(just(Token::Colon)) + .then(quantified_vars) + .then(hypotheses) + .then(formula()) + .map(|(((name, quantified), hypotheses), conclusion)| AxiomDecl { + name, + quantified, + hypotheses, + conclusion, + }) +} + +// ============================================================================ +// Theory items +// ============================================================================ + +fn theory_item() -> impl Parser> + Clone { + // Sort declaration: P : Sort; + let sort_decl = ident() + .then_ignore(just(Token::Colon)) + .then_ignore(just(Token::Sort)) + .then_ignore(just(Token::Semicolon)) + .map(TheoryItem::Sort); + + // Function declaration: name : domain -> codomain; + // Name can be a path like `in.src` + // Domain is parsed without arrows to avoid ambiguity + let function_decl = path() + .then_ignore(just(Token::Colon)) + .then(type_expr_no_arrow()) + .then_ignore(just(Token::Arrow)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|((name, domain), codomain)| { + TheoryItem::Function(FunctionDecl { + name, + domain, + codomain, + }) + }); + + // Axiom: name : forall ... |- ...; + let axiom = axiom_decl() + .then_ignore(just(Token::Semicolon)) + .map(TheoryItem::Axiom); + + // Field declaration (catch-all for parameterized theories): name : type; + let field_decl = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|(name, ty)| TheoryItem::Field(name, ty)); + + // Order matters: try more specific patterns first + // axiom starts with "ident : forall" + // function has "ident : type ->" + // sort has "ident : Sort" + // field is catch-all "ident : type" + choice((axiom, function_decl, sort_decl, field_decl)) +} + +// ============================================================================ +// Declarations +// ============================================================================ + +fn param() -> impl Parser> + Clone { + ident() + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .map(|(name, ty)| Param { name, ty }) +} + +fn theory_decl() -> impl Parser> + Clone { + // Optional `extends ParentTheory` + let extends_clause = ident() + .try_map(|s, span| { + if s == "extends" { + Ok(()) + } else { + Err(Simple::custom(span, "expected 'extends'")) + } + }) + .ignore_then(path()) + .or_not(); + + // A param group in parens: (X : Type, Y : Type) + let param_group = param() + .separated_by(just(Token::Comma)) + .at_least(1) + .delimited_by(just(Token::LParen), just(Token::RParen)); + + // After 'theory', we may have: + // 1. One or more param groups followed by an identifier: (X:T) (Y:U) Name + // 2. Just an identifier (no params): Name + // 3. Just '{' (missing name - ERROR) + // + // Strategy: Parse by looking at the first token after 'theory': + // - If '(' -> parse params, then expect name + // - If identifier -> that's the name, no params + // - If '{' -> error: missing name + + // Helper to parse params then name + let params_then_name = param_group + .repeated() + .at_least(1) + .map(|groups: Vec>| groups.into_iter().flatten().collect::>()) + .then(ident()) + .map(|(params, name)| (params, name)); + + // No params, just a name + let just_name = ident().map(|name| (Vec::::new(), name)); + + // Error case: '{' with no name - emit error at the '{' token's location + // Use `just` to peek at '{' and capture its position, then emit a helpful error + // We DON'T consume the '{' because we need it for the body parser + let missing_name = just(Token::LBrace) + .map_with_span(|_, span: Span| span) // Capture '{' token's span + .rewind() // Rewind to not consume '{' - we need it for the body + .validate(|brace_span, _, emit| { + emit(Simple::custom( + brace_span, + "expected theory name - anonymous theories are not allowed. \ + Use: theory MyTheoryName { ... }", + )); + // Return dummy values for error recovery + (Vec::::new(), "_anonymous_".to_string()) + }); + + // Parse theory keyword, then params+name in one of the three ways + // Order matters: try params first (if '('), then name (if ident), then error (if '{') + just(Token::Theory) + .ignore_then(choice((params_then_name, just_name, missing_name))) + .then(extends_clause) + .then( + theory_item() + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(((params, name), extends), body)| TheoryDecl { + params, + name, + extends, + body, + }) +} + +fn instance_item() -> impl Parser> + Clone { + recursive(|instance_item| { + // Nested instance: name = { ... }; + // Type is inferred from the field declaration in the theory + let nested = ident() + .then_ignore(just(Token::Eq)) + .then( + instance_item + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .then_ignore(just(Token::Semicolon)) + .map(|(name, body)| { + InstanceItem::NestedInstance( + name, + InstanceDecl { + // Type will be inferred during elaboration + theory: TypeExpr::single_path(Path::single("_inferred".to_string())), + name: String::new(), + body, + needs_chase: false, + }, + ) + }); + + // Element declaration: A : P; or a, b, c : P; + let element = ident() + .separated_by(just(Token::Comma)) + .at_least(1) + .then_ignore(just(Token::Colon)) + .then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .map(|(names, ty)| InstanceItem::Element(names, ty)); + + // Equation: term = term; + let equation = term() + .then_ignore(just(Token::Eq)) + .then(term()) + .then_ignore(just(Token::Semicolon)) + .map(|(l, r)| InstanceItem::Equation(l, r)); + + // Relation assertion: [field: value, ...] relation_name; (multi-ary) + // or: element relation_name; (unary) + // Multi-ary with explicit record + let relation_assertion_record = record_term() + .then(ident()) + .then_ignore(just(Token::Semicolon)) + .map(|(term, rel)| InstanceItem::RelationAssertion(term, rel)); + + // Unary relation: element relation_name; + // This parses as: path followed by another ident, then semicolon + // We wrap the element in a single-field record for uniform handling + let relation_assertion_unary = path() + .map(Term::Path) + .then(ident()) + .then_ignore(just(Token::Semicolon)) + .map(|(elem, rel)| InstanceItem::RelationAssertion(elem, rel)); + + // Try nested first (ident = {), then element (ident :), then record relation ([ ...), + // then unary relation (ident ident ;), then equation (fallback with =) + choice((nested, element, relation_assertion_record, relation_assertion_unary, equation)) + }) +} + +/// Parse a single type token without 'instance' (for instance declaration headers) +fn type_token_no_instance() -> impl Parser> + Clone { + let sort = just(Token::Sort).to(TypeToken::Sort); + let prop = just(Token::Prop).to(TypeToken::Prop); + // No instance token here! + + let path_tok = path().map(TypeToken::Path); + + // Record type with full type expressions inside + let record_field = ident() + .then_ignore(just(Token::Colon)) + .then(type_expr_impl()); + + let record = record_field + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::LBracket), just(Token::RBracket)) + .map(TypeToken::Record); + + choice((sort, prop, record, path_tok)) +} + +/// Parse a type expression without the `instance` suffix (for instance declaration headers) +fn type_expr_no_instance() -> impl Parser> + Clone { + // Parenthesized type - parse inner full type expr + let paren_expr = type_expr_impl() + .delimited_by(just(Token::LParen), just(Token::RParen)) + .map(|expr| expr.tokens); + + // Single token (no instance allowed) + let single = type_token_no_instance().map(|t| vec![t]); + + // Either paren group or single token + let item = choice((paren_expr, single)); + + // Collect all tokens + item.repeated() + .at_least(1) + .map(|items| TypeExpr { + tokens: items.into_iter().flatten().collect(), + }) +} + +fn instance_decl() -> impl Parser> + Clone { + // Syntax: instance Name : Type = { ... } + // or: instance Name : Type = chase { ... } + just(Token::Instance) + .ignore_then(ident()) + .then_ignore(just(Token::Colon)) + .then(type_expr_no_instance()) + .then_ignore(just(Token::Eq)) + .then(just(Token::Chase).or_not()) + .then( + instance_item() + .map_with_span(|item, span| Spanned::new(item, to_span(span))) + .repeated() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(((name, theory), needs_chase), body)| InstanceDecl { + theory, + name, + body, + needs_chase: needs_chase.is_some(), + }) +} + +fn query_decl() -> impl Parser> + Clone { + just(Token::Query) + .ignore_then(ident()) + .then( + just(Token::Question) + .ignore_then(just(Token::Colon)) + .ignore_then(type_expr()) + .then_ignore(just(Token::Semicolon)) + .delimited_by(just(Token::LBrace), just(Token::RBrace)), + ) + .map(|(name, goal)| QueryDecl { name, goal }) +} + +fn namespace_decl() -> impl Parser> + Clone { + just(Token::Namespace) + .ignore_then(ident()) + .then_ignore(just(Token::Semicolon)) +} + +fn declaration() -> impl Parser> + Clone { + choice(( + namespace_decl().map(Declaration::Namespace), + theory_decl().map(Declaration::Theory), + instance_decl().map(Declaration::Instance), + query_decl().map(Declaration::Query), + )) +} + +// Unit tests moved to tests/unit_parsing.rs diff --git a/src/patch.rs b/src/patch.rs new file mode 100644 index 0000000..5e99620 --- /dev/null +++ b/src/patch.rs @@ -0,0 +1,688 @@ +//! Patch types for version control of geolog structures +//! +//! A Patch represents the changes between two versions of a Structure. +//! Patches are the fundamental unit of version history - each commit +//! creates a new patch that can be applied to recreate the structure. + +use crate::core::SortId; +use crate::id::{NumericId, Slid, Uuid}; +use rkyv::{Archive, Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; + +/// Changes to the element universe (additions and deletions) +/// +/// Note: Element names are tracked separately in NamingPatch. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct ElementPatch { + /// Elements removed from structure (by UUID) + pub deletions: BTreeSet, + /// Elements added: Uuid → sort_id + pub additions: BTreeMap, +} + +impl ElementPatch { + pub fn is_empty(&self) -> bool { + self.deletions.is_empty() && self.additions.is_empty() + } +} + +/// Changes to element names (separate from structural changes) +/// +/// Names can change independently of structure (renames), and new elements +/// need names. This keeps patches self-contained for version control. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct NamingPatch { + /// Names removed (by UUID) - typically when element is deleted + pub deletions: BTreeSet, + /// Names added or changed: UUID → qualified_name path + pub additions: BTreeMap>, +} + +impl NamingPatch { + pub fn is_empty(&self) -> bool { + self.deletions.is_empty() && self.additions.is_empty() + } +} + +/// Changes to function definitions +/// +/// We track both old and new values to support inversion (for undo). +/// The structure uses UUIDs rather than Slids since Slids are unstable +/// across different structure versions. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct FunctionPatch { + /// func_id → (domain_uuid → old_codomain_uuid) + /// None means was undefined before + pub old_values: BTreeMap>>, + /// func_id → (domain_uuid → new_codomain_uuid) + pub new_values: BTreeMap>, +} + +impl FunctionPatch { + pub fn is_empty(&self) -> bool { + self.new_values.is_empty() + } +} + +/// Changes to relation assertions (tuples added/removed) +/// +/// Tuples are stored as `Vec` since element Slids are unstable across versions. +/// We track both assertions and retractions to support inversion. +#[derive(Default, Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct RelationPatch { + /// rel_id → set of tuples retracted (as UUID vectors) + pub retractions: BTreeMap>>, + /// rel_id → set of tuples asserted (as UUID vectors) + pub assertions: BTreeMap>>, +} + +impl RelationPatch { + pub fn is_empty(&self) -> bool { + self.assertions.is_empty() && self.retractions.is_empty() + } +} + +/// A complete patch between two structure versions +/// +/// Patches form a linked list via source_commit → target_commit. +/// The initial commit has source_commit = None. +/// +/// Note: Theory reference is stored as a Luid in the Structure, not here. +#[derive(Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct Patch { + /// The commit this patch is based on (None for initial commit) + pub source_commit: Option, + /// The commit this patch creates + pub target_commit: Uuid, + /// Number of sorts in the theory (needed to rebuild structure) + pub num_sorts: usize, + /// Number of functions in the theory (needed to rebuild structure) + pub num_functions: usize, + /// Number of relations in the theory (needed to rebuild structure) + pub num_relations: usize, + /// Element changes (additions/deletions) + pub elements: ElementPatch, + /// Function value changes + pub functions: FunctionPatch, + /// Relation tuple changes (assertions/retractions) + pub relations: RelationPatch, + /// Name changes (for self-contained patches) + pub names: NamingPatch, +} + +impl Patch { + /// Create a new patch + pub fn new( + source_commit: Option, + num_sorts: usize, + num_functions: usize, + num_relations: usize, + ) -> Self { + Self { + source_commit, + target_commit: Uuid::now_v7(), + num_sorts, + num_functions, + num_relations, + elements: ElementPatch::default(), + functions: FunctionPatch::default(), + relations: RelationPatch::default(), + names: NamingPatch::default(), + } + } + + /// Check if this patch makes any changes + pub fn is_empty(&self) -> bool { + self.elements.is_empty() + && self.functions.is_empty() + && self.relations.is_empty() + && self.names.is_empty() + } + + /// Invert this patch (swap old/new, additions/deletions) + /// + /// Note: Inversion of element additions requires knowing the sort_id of deleted elements, + /// which we don't track in deletions. This is a known limitation - sort info is lost on invert. + /// Names are fully invertible since we track the full qualified name. + /// Relations are fully invertible (assertions ↔ retractions). + pub fn invert(&self) -> Patch { + Patch { + source_commit: Some(self.target_commit), + target_commit: self.source_commit.unwrap_or_else(Uuid::now_v7), + num_sorts: self.num_sorts, + num_functions: self.num_functions, + num_relations: self.num_relations, + elements: ElementPatch { + deletions: self.elements.additions.keys().copied().collect(), + additions: self + .elements + .deletions + .iter() + .map(|uuid| (*uuid, 0)) // Note: loses sort info on invert + .collect(), + }, + functions: FunctionPatch { + old_values: self + .functions + .new_values + .iter() + .map(|(func_id, changes)| { + ( + *func_id, + changes.iter().map(|(k, v)| (*k, Some(*v))).collect(), + ) + }) + .collect(), + new_values: self + .functions + .old_values + .iter() + .filter_map(|(func_id, changes)| { + let filtered: BTreeMap<_, _> = changes + .iter() + .filter_map(|(k, v)| v.map(|v| (*k, v))) + .collect(); + if filtered.is_empty() { + None + } else { + Some((*func_id, filtered)) + } + }) + .collect(), + }, + relations: RelationPatch { + // Swap assertions ↔ retractions + retractions: self.relations.assertions.clone(), + assertions: self.relations.retractions.clone(), + }, + names: NamingPatch { + deletions: self.names.additions.keys().copied().collect(), + additions: self + .names + .deletions + .iter() + .map(|uuid| (*uuid, vec![])) // Note: loses name on invert (would need old_names tracking) + .collect(), + }, + } + } +} + +// ============ Diff and Apply operations ============ + +use crate::core::{RelationStorage, Structure}; +use crate::id::{Luid, get_slid, some_slid}; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +/// Create a patch representing the difference from `old` to `new`. +/// +/// The resulting patch, when applied to `old`, produces `new`. +/// Requires Universe for UUID lookup and NamingIndex for name changes. +pub fn diff( + old: &Structure, + new: &Structure, + universe: &Universe, + old_naming: &NamingIndex, + new_naming: &NamingIndex, +) -> Patch { + let mut patch = Patch::new( + None, // Will be set by caller if needed + new.num_sorts(), + new.num_functions(), + new.relations.len(), + ); + + // Find element deletions: elements in old but not in new + for &luid in old.luids.iter() { + if !new.luid_to_slid.contains_key(&luid) + && let Some(uuid) = universe.get(luid) { + patch.elements.deletions.insert(uuid); + // Also mark name as deleted + patch.names.deletions.insert(uuid); + } + } + + // Find element additions: elements in new but not in old + for (slid, &luid) in new.luids.iter().enumerate() { + if !old.luid_to_slid.contains_key(&luid) + && let Some(uuid) = universe.get(luid) { + patch.elements.additions.insert(uuid, new.sorts[slid]); + // Also add name from new_naming + if let Some(name) = new_naming.get(&uuid) { + patch.names.additions.insert(uuid, name.clone()); + } + } + } + + // Find name changes for elements that exist in both + for &luid in new.luids.iter() { + if old.luid_to_slid.contains_key(&luid) { + // Element exists in both - check for name change + if let Some(uuid) = universe.get(luid) { + let old_name = old_naming.get(&uuid); + let new_name = new_naming.get(&uuid); + if old_name != new_name + && let Some(name) = new_name { + patch.names.additions.insert(uuid, name.clone()); + } + } + } + } + + // Find function value changes + // We need to compare function values for elements that exist in both + for func_id in 0..new.num_functions() { + if func_id >= old.num_functions() { + // New function added to schema - all its values are additions + // Record each defined value with old_value = None + let Some(new_func_col) = new.functions[func_id].as_local() else { continue }; + for (sort_slid, opt_codomain) in new_func_col.iter().enumerate() { + if let Some(new_codomain_slid) = get_slid(*opt_codomain) { + // Find UUIDs for domain and codomain + let domain_uuid = find_uuid_by_sort_slid(new, universe, func_id, sort_slid); + if let Some(domain_uuid) = domain_uuid { + let new_codomain_luid = new.luids[new_codomain_slid.index()]; + if let Some(new_codomain_uuid) = universe.get(new_codomain_luid) { + // Record: this domain element now maps to this codomain element + // (was undefined before since function didn't exist) + patch.functions.old_values + .entry(func_id) + .or_default() + .insert(domain_uuid, None); + patch.functions.new_values + .entry(func_id) + .or_default() + .insert(domain_uuid, new_codomain_uuid); + } + } + } + } + continue; + } + + let mut old_vals: BTreeMap> = BTreeMap::new(); + let mut new_vals: BTreeMap = BTreeMap::new(); + + // Iterate over elements in the new structure's function domain + // Note: patches only work with local functions currently + let Some(new_func_col) = new.functions[func_id].as_local() else { continue }; + let Some(old_func_col) = old.functions[func_id].as_local() else { continue }; + + for (sort_slid, opt_codomain) in new_func_col.iter().enumerate() { + // Find the UUID for this domain element + if let Some(new_codomain_slid) = get_slid(*opt_codomain) { + let domain_uuid = find_uuid_by_sort_slid(new, universe, func_id, sort_slid); + if let Some(domain_uuid) = domain_uuid { + let new_codomain_luid = new.luids[new_codomain_slid.index()]; + let new_codomain_uuid = universe.get(new_codomain_luid); + + if let Some(new_codomain_uuid) = new_codomain_uuid { + // Check if this element existed in old (by looking up its luid) + let domain_luid = find_luid_by_sort_slid(new, func_id, sort_slid); + if let Some(domain_luid) = domain_luid { + if let Some(&old_domain_slid) = old.luid_to_slid.get(&domain_luid) { + let old_sort_slid = old.sort_local_id(old_domain_slid); + let old_codomain = get_slid(old_func_col[old_sort_slid.index()]); + + match old_codomain { + Some(old_codomain_slid) => { + let old_codomain_luid = old.luids[old_codomain_slid.index()]; + if let Some(old_codomain_uuid) = + universe.get(old_codomain_luid) + && old_codomain_uuid != new_codomain_uuid { + // Value changed + old_vals + .insert(domain_uuid, Some(old_codomain_uuid)); + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + None => { + // Was undefined, now defined + old_vals.insert(domain_uuid, None); + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + } else { + // Domain element is new - function value is part of the addition + new_vals.insert(domain_uuid, new_codomain_uuid); + } + } + } + } + } + } + + if !new_vals.is_empty() { + patch.functions.old_values.insert(func_id, old_vals); + patch.functions.new_values.insert(func_id, new_vals); + } + } + + // Find relation changes + // Compare tuples in each relation between old and new + let num_relations = new.relations.len().min(old.relations.len()); + for rel_id in 0..num_relations { + let old_rel = &old.relations[rel_id]; + let new_rel = &new.relations[rel_id]; + + // Helper: convert a Slid tuple to UUID tuple + let slid_tuple_to_uuids = |tuple: &[Slid], structure: &Structure| -> Option> { + tuple + .iter() + .map(|&slid| { + let luid = structure.luids[slid.index()]; + universe.get(luid) + }) + .collect() + }; + + // Find tuples in old but not in new (retractions) + let mut retractions: BTreeSet> = BTreeSet::new(); + for tuple in old_rel.iter() { + // Check if this tuple (by UUID) exists in new + if let Some(uuid_tuple) = slid_tuple_to_uuids(tuple, old) { + // See if we can find the same UUID tuple in new + let exists_in_new = new_rel.iter().any(|new_tuple| { + slid_tuple_to_uuids(new_tuple, new) + .map(|new_uuids| new_uuids == uuid_tuple) + .unwrap_or(false) + }); + if !exists_in_new { + retractions.insert(uuid_tuple); + } + } + } + + // Find tuples in new but not in old (assertions) + let mut assertions: BTreeSet> = BTreeSet::new(); + for tuple in new_rel.iter() { + if let Some(uuid_tuple) = slid_tuple_to_uuids(tuple, new) { + let exists_in_old = old_rel.iter().any(|old_tuple| { + slid_tuple_to_uuids(old_tuple, old) + .map(|old_uuids| old_uuids == uuid_tuple) + .unwrap_or(false) + }); + if !exists_in_old { + assertions.insert(uuid_tuple); + } + } + } + + if !retractions.is_empty() { + patch.relations.retractions.insert(rel_id, retractions); + } + if !assertions.is_empty() { + patch.relations.assertions.insert(rel_id, assertions); + } + } + + // Handle new relations in new that don't exist in old + for rel_id in num_relations..new.relations.len() { + let new_rel = &new.relations[rel_id]; + let mut assertions: BTreeSet> = BTreeSet::new(); + + for tuple in new_rel.iter() { + let uuid_tuple: Option> = tuple + .iter() + .map(|&slid| { + let luid = new.luids[slid.index()]; + universe.get(luid) + }) + .collect(); + if let Some(uuids) = uuid_tuple { + assertions.insert(uuids); + } + } + + if !assertions.is_empty() { + patch.relations.assertions.insert(rel_id, assertions); + } + } + + patch +} + +/// Helper to find the Luid of an element given its func_id and sort_slid in a structure +fn find_luid_by_sort_slid(structure: &Structure, func_id: usize, sort_slid: usize) -> Option { + let func_col_len = structure.functions[func_id].len(); + for (slid_idx, &_sort_id) in structure.sorts.iter().enumerate() { + let slid = Slid::from_usize(slid_idx); + let elem_sort_slid = structure.sort_local_id(slid); + if elem_sort_slid.index() == sort_slid && func_col_len > sort_slid { + return Some(structure.luids[slid_idx]); + } + } + None +} + +/// Helper to find the UUID of an element given its func_id and sort_slid in a structure +fn find_uuid_by_sort_slid( + structure: &Structure, + universe: &Universe, + func_id: usize, + sort_slid: usize, +) -> Option { + find_luid_by_sort_slid(structure, func_id, sort_slid).and_then(|luid| universe.get(luid)) +} + +/// Apply a patch to create a new structure and update naming index. +/// +/// Returns Ok(new_structure) on success, or Err with a description of what went wrong. +/// Requires a Universe to convert UUIDs from the patch to Luids. +/// The naming parameter is updated with name changes from the patch. +pub fn apply_patch( + base: &Structure, + patch: &Patch, + universe: &mut Universe, + naming: &mut NamingIndex, +) -> Result { + // Create a new structure + let mut result = Structure::new(patch.num_sorts); + + // Build a set of deleted UUIDs for quick lookup + let deleted_uuids: std::collections::HashSet = + patch.elements.deletions.iter().copied().collect(); + + // Copy elements from base that weren't deleted + for (slid, &luid) in base.luids.iter().enumerate() { + let uuid = universe.get(luid).ok_or("Unknown luid in base structure")?; + if !deleted_uuids.contains(&uuid) { + result.add_element_with_luid(luid, base.sorts[slid]); + } + } + + // Add new elements from the patch (register UUIDs in universe) + for (uuid, sort_id) in &patch.elements.additions { + result.add_element_with_uuid(universe, *uuid, *sort_id); + } + + // Apply naming changes + for uuid in &patch.names.deletions { + // Note: NamingIndex doesn't have a remove method yet, skip for now + let _ = uuid; + } + for (uuid, name) in &patch.names.additions { + naming.insert(*uuid, name.clone()); + } + + // Initialize function storage + let domain_sort_ids: Vec> = (0..patch.num_functions) + .map(|func_id| { + if func_id < base.functions.len() && !base.functions[func_id].is_empty() { + let func_len = base.functions[func_id].len(); + for (sort_id, carrier) in base.carriers.iter().enumerate() { + if carrier.len() as usize == func_len { + return Some(sort_id); + } + } + } + None + }) + .collect(); + + result.init_functions(&domain_sort_ids); + + // Copy function values from base (for non-deleted elements) + // Note: patches only work with local functions currently + for func_id in 0..base.num_functions().min(result.num_functions()) { + let Some(base_func_col) = base.functions[func_id].as_local() else { continue }; + if !result.functions[func_id].is_local() { continue }; + + // Collect all the updates we need to make (to avoid borrow checker issues) + let mut updates: Vec<(usize, Slid)> = Vec::new(); + + for (old_sort_slid, opt_codomain) in base_func_col.iter().enumerate() { + if let Some(old_codomain_slid) = get_slid(*opt_codomain) { + // Find the domain element's Luid + let domain_luid = find_luid_by_sort_slid(base, func_id, old_sort_slid); + if let Some(domain_luid) = domain_luid { + // Check if domain element still exists in result + if let Some(&new_domain_slid) = result.luid_to_slid.get(&domain_luid) { + // Check if codomain element still exists + let codomain_luid = base.luids[old_codomain_slid.index()]; + if let Some(&new_codomain_slid) = result.luid_to_slid.get(&codomain_luid) { + let new_sort_slid = result.sort_local_id(new_domain_slid); + updates.push((new_sort_slid.index(), new_codomain_slid)); + } + } + } + } + } + + // Apply updates + if let Some(result_func_col) = result.functions[func_id].as_local_mut() { + for (idx, codomain_slid) in updates { + if idx < result_func_col.len() { + result_func_col[idx] = some_slid(codomain_slid); + } + } + } + } + + // Apply function value changes from patch (using UUIDs → Luids) + // Note: patches only work with local functions currently + for (func_id, changes) in &patch.functions.new_values { + if *func_id < result.num_functions() && result.functions[*func_id].is_local() { + // Collect updates first to avoid borrow checker issues + let mut updates: Vec<(usize, Slid)> = Vec::new(); + for (domain_uuid, codomain_uuid) in changes { + let domain_luid = universe.lookup(domain_uuid); + let codomain_luid = universe.lookup(codomain_uuid); + if let (Some(domain_luid), Some(codomain_luid)) = (domain_luid, codomain_luid) + && let (Some(&domain_slid), Some(&codomain_slid)) = ( + result.luid_to_slid.get(&domain_luid), + result.luid_to_slid.get(&codomain_luid), + ) + { + let sort_slid = result.sort_local_id(domain_slid); + updates.push((sort_slid.index(), codomain_slid)); + } + } + + // Apply updates + if let Some(result_func_col) = result.functions[*func_id].as_local_mut() { + for (idx, codomain_slid) in updates { + if idx < result_func_col.len() { + result_func_col[idx] = some_slid(codomain_slid); + } + } + } + } + } + + // Initialize relation storage + // Infer arities from base if available, otherwise from patch assertions + let relation_arities: Vec = (0..patch.num_relations) + .map(|rel_id| { + // Try base first + if rel_id < base.relations.len() { + base.relations[rel_id].arity() + } else if let Some(assertions) = patch.relations.assertions.get(&rel_id) { + // Infer from first assertion + assertions.iter().next().map(|t| t.len()).unwrap_or(0) + } else { + 0 + } + }) + .collect(); + result.init_relations(&relation_arities); + + // Copy relation tuples from base (for non-deleted elements) + for rel_id in 0..base.relations.len().min(patch.num_relations) { + let base_rel = &base.relations[rel_id]; + + for tuple in base_rel.iter() { + // Convert Slid tuple to UUID tuple to check if still valid + let uuid_tuple: Option> = tuple + .iter() + .map(|&slid| { + let luid = base.luids[slid.index()]; + universe.get(luid) + }) + .collect(); + + if let Some(uuid_tuple) = uuid_tuple { + // Check if this tuple should be retracted + let should_retract = patch + .relations + .retractions + .get(&rel_id) + .map(|r| r.contains(&uuid_tuple)) + .unwrap_or(false); + + if !should_retract { + // Check all elements still exist and convert to new Slids + let new_tuple: Option> = uuid_tuple + .iter() + .map(|uuid| { + universe + .lookup(uuid) + .and_then(|luid| result.luid_to_slid.get(&luid).copied()) + }) + .collect(); + + if let Some(new_tuple) = new_tuple { + result.assert_relation(rel_id, new_tuple); + } + } + } + } + } + + // Apply relation assertions from patch + for (rel_id, assertions) in &patch.relations.assertions { + if *rel_id < patch.num_relations { + for uuid_tuple in assertions { + let slid_tuple: Option> = uuid_tuple + .iter() + .map(|uuid| { + universe + .lookup(uuid) + .and_then(|luid| result.luid_to_slid.get(&luid).copied()) + }) + .collect(); + + if let Some(slid_tuple) = slid_tuple { + result.assert_relation(*rel_id, slid_tuple); + } + } + } + } + + Ok(result) +} + +/// Create a patch representing a structure from empty (initial commit) +pub fn to_initial_patch(structure: &Structure, universe: &Universe, naming: &NamingIndex) -> Patch { + let empty = Structure::new(structure.num_sorts()); + let empty_naming = NamingIndex::new(); + diff(&empty, structure, universe, &empty_naming, naming) +} + +// Unit tests moved to tests/proptest_patch.rs diff --git a/src/pretty.rs b/src/pretty.rs new file mode 100644 index 0000000..98368db --- /dev/null +++ b/src/pretty.rs @@ -0,0 +1,424 @@ +//! Pretty-printer for Geolog AST +//! +//! Renders AST back to source syntax for round-trip testing. + +use crate::ast::*; + +/// Pretty-print configuration +pub struct PrettyConfig { + pub indent: usize, +} + +impl Default for PrettyConfig { + fn default() -> Self { + Self { indent: 2 } + } +} + +/// A pretty-printer with indentation tracking +pub struct Pretty { + output: String, + indent_level: usize, + config: PrettyConfig, +} + +impl Default for Pretty { + fn default() -> Self { + Self::new() + } +} + +impl Pretty { + pub fn new() -> Self { + Self { + output: String::new(), + indent_level: 0, + config: PrettyConfig::default(), + } + } + + pub fn finish(self) -> String { + self.output + } + + fn indent(&mut self) { + for _ in 0..(self.indent_level * self.config.indent) { + self.output.push(' '); + } + } + + fn write(&mut self, s: &str) { + self.output.push_str(s); + } + + fn writeln(&mut self, s: &str) { + self.output.push_str(s); + self.output.push('\n'); + } + + fn newline(&mut self) { + self.output.push('\n'); + } + + fn inc_indent(&mut self) { + self.indent_level += 1; + } + + fn dec_indent(&mut self) { + self.indent_level = self.indent_level.saturating_sub(1); + } +} + +// ============ Pretty-printing implementations ============ + +impl Pretty { + pub fn file(&mut self, file: &File) { + for (i, decl) in file.declarations.iter().enumerate() { + if i > 0 { + self.newline(); + } + self.declaration(&decl.node); + } + } + + pub fn declaration(&mut self, decl: &Declaration) { + match decl { + Declaration::Namespace(name) => { + self.write("namespace "); + self.write(name); + self.writeln(";"); + } + Declaration::Theory(t) => self.theory_decl(t), + Declaration::Instance(i) => self.instance_decl(i), + Declaration::Query(q) => self.query_decl(q), + } + } + + pub fn theory_decl(&mut self, t: &TheoryDecl) { + self.write("theory "); + for param in &t.params { + self.write("("); + self.write(¶m.name); + self.write(" : "); + self.type_expr(¶m.ty); + self.write(") "); + } + self.write(&t.name); + self.writeln(" {"); + self.inc_indent(); + for item in &t.body { + self.indent(); + self.theory_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.writeln("}"); + } + + pub fn theory_item(&mut self, item: &TheoryItem) { + match item { + TheoryItem::Sort(name) => { + self.write(name); + self.write(" : Sort;"); + } + TheoryItem::Function(f) => { + self.write(&f.name.to_string()); + self.write(" : "); + self.type_expr(&f.domain); + self.write(" -> "); + self.type_expr(&f.codomain); + self.write(";"); + } + TheoryItem::Axiom(a) => self.axiom_decl(a), + TheoryItem::Field(name, ty) => { + self.write(name); + self.write(" : "); + self.type_expr(ty); + self.write(";"); + } + } + } + + pub fn axiom_decl(&mut self, a: &AxiomDecl) { + self.write(&a.name.to_string()); + self.write(" : forall "); + for (i, qv) in a.quantified.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(&qv.names.join(", ")); + self.write(" : "); + self.type_expr(&qv.ty); + } + self.write(". "); + + // Hypotheses (if any) + if !a.hypotheses.is_empty() { + for (i, hyp) in a.hypotheses.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.formula(hyp); + } + self.write(" "); + } + + self.write("|- "); + self.formula(&a.conclusion); + self.write(";"); + } + + pub fn type_expr(&mut self, ty: &TypeExpr) { + use crate::ast::TypeToken; + + let mut need_space = false; + + for token in &ty.tokens { + match token { + TypeToken::Sort => { + if need_space { + self.write(" "); + } + self.write("Sort"); + need_space = true; + } + TypeToken::Prop => { + if need_space { + self.write(" "); + } + self.write("Prop"); + need_space = true; + } + TypeToken::Path(p) => { + if need_space { + self.write(" "); + } + self.write(&p.to_string()); + need_space = true; + } + TypeToken::Instance => { + self.write(" instance"); + need_space = true; + } + TypeToken::Arrow => { + // Arrows are inserted between chunks + // This simplistic approach just prints " -> " when we see Arrow + self.write(" -> "); + need_space = false; + } + TypeToken::Record(fields) => { + if need_space { + self.write(" "); + } + self.write("["); + for (i, (name, field_ty)) in fields.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(name); + self.write(" : "); + self.type_expr(field_ty); + } + self.write("]"); + need_space = true; + } + } + } + } + + /// Print a type expression that might need parentheses + #[allow(dead_code)] + fn type_expr_atom(&mut self, ty: &TypeExpr) { + use crate::ast::TypeToken; + + // Check if this needs parentheses (has arrows or multiple paths) + let has_arrow = ty.tokens.iter().any(|t| matches!(t, TypeToken::Arrow)); + let path_count = ty + .tokens + .iter() + .filter(|t| matches!(t, TypeToken::Path(_))) + .count(); + + if has_arrow || path_count > 1 { + self.write("("); + self.type_expr(ty); + self.write(")"); + } else { + self.type_expr(ty); + } + } + + pub fn term(&mut self, t: &Term) { + match t { + Term::Path(p) => self.write(&p.to_string()), + Term::App(f, a) => { + self.term(f); + self.write(" "); + self.term_atom(a); + } + Term::Project(t, field) => { + self.term(t); + self.write(" ."); + self.write(field); + } + Term::Record(fields) => { + self.write("["); + for (i, (name, val)) in fields.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(name); + self.write(": "); + self.term(val); + } + self.write("]"); + } + } + } + + /// Print a term that might need parentheses + fn term_atom(&mut self, t: &Term) { + match t { + Term::App(_, _) | Term::Project(_, _) => { + self.write("("); + self.term(t); + self.write(")"); + } + _ => self.term(t), + } + } + + pub fn formula(&mut self, f: &Formula) { + match f { + Formula::True => self.write("true"), + Formula::False => self.write("false"), + Formula::RelApp(rel_name, arg) => { + // Postfix relation application: term rel + self.term(arg); + self.write(" "); + self.write(rel_name); + } + Formula::Eq(l, r) => { + self.term(l); + self.write(" = "); + self.term(r); + } + Formula::And(conjuncts) => { + for (i, c) in conjuncts.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.formula(c); + } + } + Formula::Or(disjuncts) => { + for (i, d) in disjuncts.iter().enumerate() { + if i > 0 { + self.write(" \\/ "); + } + self.formula_atom(d); + } + } + Formula::Exists(vars, body) => { + self.write("(exists "); + for (i, qv) in vars.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(&qv.names.join(", ")); + self.write(" : "); + self.type_expr(&qv.ty); + } + self.write(". "); + self.formula(body); + self.write(")"); + } + } + } + + /// Print a formula that might need parentheses + fn formula_atom(&mut self, f: &Formula) { + match f { + Formula::Or(_) | Formula::And(_) => { + self.write("("); + self.formula(f); + self.write(")"); + } + _ => self.formula(f), + } + } + + pub fn instance_decl(&mut self, i: &InstanceDecl) { + self.write("instance "); + self.write(&i.name); + self.write(" : "); + self.type_expr(&i.theory); + self.writeln(" = {"); + self.inc_indent(); + for item in &i.body { + self.indent(); + self.instance_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.writeln("}"); + } + + pub fn instance_item(&mut self, item: &InstanceItem) { + match item { + InstanceItem::Element(names, ty) => { + self.write(&names.join(", ")); + self.write(" : "); + self.type_expr(ty); + self.write(";"); + } + InstanceItem::Equation(lhs, rhs) => { + self.term(lhs); + self.write(" = "); + self.term(rhs); + self.write(";"); + } + InstanceItem::NestedInstance(name, inner) => { + self.write(name); + self.writeln(" = {"); + self.inc_indent(); + for item in &inner.body { + self.indent(); + self.instance_item(&item.node); + self.newline(); + } + self.dec_indent(); + self.indent(); + self.write("};"); + } + InstanceItem::RelationAssertion(term, rel) => { + self.term(term); + self.write(" "); + self.write(rel); + self.write(";"); + } + } + } + + pub fn query_decl(&mut self, q: &QueryDecl) { + self.write("query "); + self.write(&q.name); + self.writeln(" {"); + self.inc_indent(); + self.indent(); + self.write("? : "); + self.type_expr(&q.goal); + self.writeln(";"); + self.dec_indent(); + self.writeln("}"); + } +} + +/// Convenience function to pretty-print a file +pub fn pretty_print(file: &File) -> String { + let mut p = Pretty::new(); + p.file(file); + p.finish() +} + +// Unit tests moved to tests/unit_pretty.rs diff --git a/src/query/backend.rs b/src/query/backend.rs new file mode 100644 index 0000000..458b67b --- /dev/null +++ b/src/query/backend.rs @@ -0,0 +1,1650 @@ +//! Naive backend for executing RelAlgIR query plans. +//! +//! This is the "obviously correct" reference implementation: +//! - No optimization +//! - No indexing +//! - Just straightforward interpretation +//! +//! Used for proptest validation against optimized backends. +//! +//! # DBSP Temporal Operators +//! +//! This backend supports DBSP-style incremental computation via three temporal operators: +//! +//! - **Delay (z⁻¹)**: Access previous timestep's value +//! - **Diff (δ = 1 - z⁻¹)**: Compute difference from previous timestep +//! - **Integrate (∫)**: Accumulate values across all timesteps +//! +//! These operators require state across timesteps, managed by `StreamContext`. + +use std::collections::HashMap; + +use crate::core::Structure; +use crate::id::{NumericId, Slid}; + +/// A tuple in a relation (bag of tuples with multiplicities). +/// For now we use positive multiplicities only (proper Z-sets would allow negatives). +pub type Tuple = Vec; + +/// A bag of tuples (multiset). Maps tuple -> multiplicity. +/// Multiplicity 0 means absent. +#[derive(Debug, Clone, Default)] +pub struct Bag { + pub tuples: HashMap, +} + +impl Bag { + pub fn new() -> Self { + Self::default() + } + + pub fn singleton(tuple: Tuple) -> Self { + let mut b = Self::new(); + b.insert(tuple, 1); + b + } + + pub fn insert(&mut self, tuple: Tuple, mult: i64) { + let entry = self.tuples.entry(tuple.clone()).or_insert(0); + *entry += mult; + if *entry == 0 { + self.tuples.remove(&tuple); + } + } + + pub fn iter(&self) -> impl Iterator { + self.tuples.iter().filter(|(_, m)| **m != 0) + } + + /// Union (Z-set addition) + pub fn union(&self, other: &Bag) -> Bag { + let mut result = self.clone(); + for (tuple, mult) in other.iter() { + result.insert(tuple.clone(), *mult); + } + result + } + + /// Negate (flip multiplicities) + pub fn negate(&self) -> Bag { + let mut result = Bag::new(); + for (tuple, mult) in self.iter() { + result.insert(tuple.clone(), -mult); + } + result + } + + /// Distinct (clamp multiplicities to 0 or 1) + pub fn distinct(&self) -> Bag { + let mut result = Bag::new(); + for (tuple, mult) in self.iter() { + if *mult > 0 { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + pub fn is_empty(&self) -> bool { + self.tuples.is_empty() + } + + pub fn len(&self) -> usize { + self.tuples.len() + } +} + +/// Query plan operations (mirrors RelAlgIR but as Rust enums for execution) +#[derive(Debug, Clone)] +pub enum QueryOp { + /// Scan all elements of a sort + Scan { sort_idx: usize }, + + /// Scan all tuples in a relation + /// Each tuple becomes a row in the result bag + ScanRelation { rel_id: usize }, + + /// Filter by predicate + Filter { + input: Box, + pred: Predicate, + }, + + /// Project to specific columns + Project { + input: Box, + columns: Vec, + }, + + /// Join two inputs on condition + Join { + left: Box, + right: Box, + cond: JoinCond, + }, + + /// Union (bag addition) + Union { + left: Box, + right: Box, + }, + + /// Distinct (deduplicate) + Distinct { input: Box }, + + /// Negate multiplicities + Negate { input: Box }, + + /// Constant single tuple + Constant { tuple: Tuple }, + + /// Empty relation + Empty, + + /// Apply a function: extends tuples with `func(arg_col)` + /// `(t₁, ..., tₙ)` → `(t₁, ..., tₙ, func(t[arg_col]))` + Apply { + input: Box, + func_idx: usize, + arg_col: usize, + }, + + /// Apply a single field of a product codomain function + /// For `f: A -> [x: B, y: C]`, extends tuples with `f(arg_col).field_name` + /// `(t₁, ..., tₙ)` → `(t₁, ..., tₙ, f(t[arg_col]).field_name)` + ApplyField { + input: Box, + func_idx: usize, + arg_col: usize, + field_name: String, + }, + + // ======================================================================== + // DBSP Temporal Operators + // ======================================================================== + // These operators work on streams over time, requiring state management. + // Use `execute_stream` with a `StreamContext` instead of bare `execute`. + + /// Delay (z⁻¹): output previous timestep's input value + /// At timestep 0, outputs empty bag. + Delay { + input: Box, + /// Unique identifier for this delay's state + state_id: usize, + }, + + /// Differentiate (δ = 1 - z⁻¹): compute changes since previous timestep + /// output = current_input - previous_input + Diff { + input: Box, + /// Unique identifier for this diff's state + state_id: usize, + }, + + /// Integrate (∫): accumulate inputs over all timesteps + /// output = Σ (all inputs from timestep 0 to now) + Integrate { + input: Box, + /// Unique identifier for this integrate's state + state_id: usize, + }, +} + +/// Predicate for filtering +#[derive(Debug, Clone)] +pub enum Predicate { + True, + False, + /// Column equals constant + ColEqConst { col: usize, val: Slid }, + /// Two columns equal + ColEqCol { left: usize, right: usize }, + /// Function application: func(col_arg) = col_result (both columns) + FuncEq { + func_idx: usize, + arg_col: usize, + result_col: usize, + }, + /// Function application equals constant: func(col_arg) = expected + FuncEqConst { + func_idx: usize, + arg_col: usize, + expected: Slid, + }, + And(Box, Box), + Or(Box, Box), +} + +/// Join condition +#[derive(Debug, Clone)] +pub enum JoinCond { + /// Cross product + Cross, + /// Equijoin on columns + Equi { left_col: usize, right_col: usize }, +} + +// ============================================================================ +// Pretty Printing +// ============================================================================ + +use std::fmt; + +impl fmt::Display for QueryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.fmt_indented(f, 0) + } +} + +impl QueryOp { + /// Format with indentation for tree structure + fn fmt_indented(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { + let pad = " ".repeat(indent); + match self { + QueryOp::Scan { sort_idx } => { + write!(f, "{}Scan(sort={})", pad, sort_idx) + } + QueryOp::ScanRelation { rel_id } => { + write!(f, "{}ScanRelation(rel={})", pad, rel_id) + } + QueryOp::Filter { input, pred } => { + writeln!(f, "{}Filter({})", pad, pred)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Project { input, columns } => { + writeln!(f, "{}Project({:?})", pad, columns)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Join { left, right, cond } => { + writeln!(f, "{}Join({})", pad, cond)?; + left.fmt_indented(f, indent + 1)?; + writeln!(f)?; + right.fmt_indented(f, indent + 1) + } + QueryOp::Union { left, right } => { + writeln!(f, "{}Union", pad)?; + left.fmt_indented(f, indent + 1)?; + writeln!(f)?; + right.fmt_indented(f, indent + 1) + } + QueryOp::Distinct { input } => { + writeln!(f, "{}Distinct", pad)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Negate { input } => { + writeln!(f, "{}Negate", pad)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Constant { tuple } => { + let vals: Vec<_> = tuple.iter().map(|s| s.index()).collect(); + write!(f, "{}Const({:?})", pad, vals) + } + QueryOp::Empty => { + write!(f, "{}Empty", pad) + } + QueryOp::Apply { input, func_idx, arg_col } => { + writeln!(f, "{}Apply(func={}, arg_col={})", pad, func_idx, arg_col)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + writeln!(f, "{}ApplyField(func={}, arg_col={}, field={})", pad, func_idx, arg_col, field_name)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Delay { input, state_id } => { + writeln!(f, "{}z⁻¹(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Diff { input, state_id } => { + writeln!(f, "{}δ(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + QueryOp::Integrate { input, state_id } => { + writeln!(f, "{}∫(state={})", pad, state_id)?; + input.fmt_indented(f, indent + 1) + } + } + } +} + +impl fmt::Display for Predicate { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Predicate::True => write!(f, "⊤"), + Predicate::False => write!(f, "⊥"), + Predicate::ColEqConst { col, val } => write!(f, "c{}={}", col, val.index()), + Predicate::ColEqCol { left, right } => write!(f, "c{}=c{}", left, right), + Predicate::FuncEq { func_idx, arg_col, result_col } => { + write!(f, "f{}(c{})=c{}", func_idx, arg_col, result_col) + } + Predicate::FuncEqConst { func_idx, arg_col, expected } => { + write!(f, "f{}(c{})={}", func_idx, arg_col, expected.index()) + } + Predicate::And(a, b) => write!(f, "({} ∧ {})", a, b), + Predicate::Or(a, b) => write!(f, "({} ∨ {})", a, b), + } + } +} + +impl fmt::Display for JoinCond { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + JoinCond::Cross => write!(f, "×"), + JoinCond::Equi { left_col, right_col } => { + write!(f, "c{}=c{}", left_col, right_col) + } + } + } +} + +// ============================================================================ +// DBSP Stream Context +// ============================================================================ + +/// State for DBSP temporal operators across timesteps. +/// +/// Each stateful operator (Delay, Diff, Integrate) uses a unique `state_id` +/// to store its state in this context. Call `step()` to advance time. +#[derive(Debug, Clone, Default)] +pub struct StreamContext { + /// Current timestep (starts at 0) + pub timestep: u64, + + /// State for Delay operators: state_id -> previous input + delay_state: HashMap, + + /// State for Diff operators: state_id -> previous input + diff_state: HashMap, + + /// State for Integrate operators: state_id -> accumulated sum + integrate_state: HashMap, +} + +impl StreamContext { + /// Create a new stream context at timestep 0 + pub fn new() -> Self { + Self::default() + } + + /// Advance to the next timestep. + /// + /// This should be called after processing all operators for the current step. + /// Delay state is automatically updated during execution. + pub fn step(&mut self) { + self.timestep += 1; + } + + /// Reset all state (for testing or restarting computation) + pub fn reset(&mut self) { + self.timestep = 0; + self.delay_state.clear(); + self.diff_state.clear(); + self.integrate_state.clear(); + } + + /// Get delay state (previous input) + fn get_delay(&self, state_id: usize) -> Bag { + self.delay_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Set delay state for next timestep + fn set_delay(&mut self, state_id: usize, bag: Bag) { + self.delay_state.insert(state_id, bag); + } + + /// Get diff state (previous input for differentiation) + fn get_diff_prev(&self, state_id: usize) -> Bag { + self.diff_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Set diff state for next timestep + fn set_diff_prev(&mut self, state_id: usize, bag: Bag) { + self.diff_state.insert(state_id, bag); + } + + /// Get integrate state (accumulated sum) + fn get_integrate(&self, state_id: usize) -> Bag { + self.integrate_state.get(&state_id).cloned().unwrap_or_default() + } + + /// Update integrate state with new input + fn accumulate_integrate(&mut self, state_id: usize, delta: &Bag) { + let current = self.get_integrate(state_id); + let new_total = current.union(delta); + self.integrate_state.insert(state_id, new_total); + } +} + +/// Execute a query plan against a structure. +/// +/// This is the naive, obviously-correct implementation. +pub fn execute(plan: &QueryOp, structure: &Structure) -> Bag { + match plan { + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond } => { + let left_bag = execute(left, structure); + let right_bag = execute(right, structure); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + if eval_join_cond(cond, l_tuple, r_tuple) { + // Concatenate tuples + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute(left, structure); + let right_bag = execute(right, structure); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute(input, structure); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute(input, structure); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + // Look up function value + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + // Extend tuple with function result + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + // If function undefined, tuple is dropped (acts as filter) + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + // Get product codomain and look up specific field + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + // If field undefined, tuple is dropped + } + } + result + } + + // DBSP operators require StreamContext - use execute_stream() instead + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + panic!("DBSP temporal operators require StreamContext - use execute_stream() instead") + } + } +} + +/// Execute a query plan with DBSP temporal operator support. +/// +/// This handles both stateless operators (scan, filter, join, etc.) and stateful +/// DBSP operators (delay, diff, integrate). The StreamContext maintains state +/// across timesteps. +/// +/// # Example: Semi-naive Datalog fixpoint +/// +/// ```ignore +/// let mut ctx = StreamContext::new(); +/// let plan = /* query plan with Integrate for fixpoint */; +/// +/// loop { +/// let delta = execute_stream(&plan, &structure, &mut ctx); +/// if delta.is_empty() { +/// break; // fixpoint reached +/// } +/// ctx.step(); +/// } +/// ``` +pub fn execute_stream(plan: &QueryOp, structure: &Structure, ctx: &mut StreamContext) -> Bag { + match plan { + // Stateless operators - delegate to execute() + QueryOp::Scan { .. } + | QueryOp::ScanRelation { .. } + | QueryOp::Filter { .. } + | QueryOp::Project { .. } + | QueryOp::Join { .. } + | QueryOp::Union { .. } + | QueryOp::Distinct { .. } + | QueryOp::Negate { .. } + | QueryOp::Constant { .. } + | QueryOp::Empty + | QueryOp::Apply { .. } + | QueryOp::ApplyField { .. } => { + // For stateless operators that contain DBSP subexpressions, + // we need to recursively handle them + execute_stream_stateless(plan, structure, ctx) + } + + // DBSP: Delay (z⁻¹) - output previous timestep's input + QueryOp::Delay { input, state_id } => { + // Get previous state (empty at timestep 0) + let previous = ctx.get_delay(*state_id); + + // Compute current input + let current = execute_stream(input, structure, ctx); + + // Store current for next timestep + ctx.set_delay(*state_id, current); + + // Return previous + previous + } + + // DBSP: Diff (δ = 1 - z⁻¹) - compute difference from previous + QueryOp::Diff { input, state_id } => { + // Get previous input + let previous = ctx.get_diff_prev(*state_id); + + // Compute current input + let current = execute_stream(input, structure, ctx); + + // Store current for next timestep + ctx.set_diff_prev(*state_id, current.clone()); + + // Return current - previous (using Z-set subtraction) + current.union(&previous.negate()) + } + + // DBSP: Integrate (∫) - accumulate over all timesteps + QueryOp::Integrate { input, state_id } => { + // Compute current input (typically a delta/diff) + let delta = execute_stream(input, structure, ctx); + + // Add to accumulated total + ctx.accumulate_integrate(*state_id, &delta); + + // Return the accumulated total + ctx.get_integrate(*state_id) + } + } +} + +/// Helper for executing stateless operators that may contain DBSP subexpressions. +fn execute_stream_stateless(plan: &QueryOp, structure: &Structure, ctx: &mut StreamContext) -> Bag { + match plan { + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond } => { + let left_bag = execute_stream(left, structure, ctx); + let right_bag = execute_stream(right, structure, ctx); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + if eval_join_cond(cond, l_tuple, r_tuple) { + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute_stream(left, structure, ctx); + let right_bag = execute_stream(right, structure, ctx); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute_stream(input, structure, ctx); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute_stream(input, structure, ctx); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute_stream(input, structure, ctx); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + } + } + result + } + + // DBSP operators handled by execute_stream directly + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + execute_stream(plan, structure, ctx) + } + } +} + +fn eval_predicate(pred: &Predicate, tuple: &Tuple, structure: &Structure) -> bool { + match pred { + Predicate::True => true, + Predicate::False => false, + + Predicate::ColEqConst { col, val } => tuple.get(*col) == Some(val), + + Predicate::ColEqCol { left, right } => { + tuple.get(*left) == tuple.get(*right) && tuple.get(*left).is_some() + } + + Predicate::FuncEq { + func_idx, + arg_col, + result_col, + } => { + if let (Some(&arg), Some(&expected)) = (tuple.get(*arg_col), tuple.get(*result_col)) { + // Look up function value in structure + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(actual) = structure.get_function(*func_idx, sort_slid) { + return actual == expected; + } + } + false + } + + Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } => { + if let Some(&arg) = tuple.get(*arg_col) { + // Look up function value in structure + // Use sort_local_id to convert Slid to sort-local SortSlid + let sort_slid = structure.sort_local_id(arg); + if let Some(actual) = structure.get_function(*func_idx, sort_slid) { + return actual == *expected; + } + } + false + } + + Predicate::And(a, b) => { + eval_predicate(a, tuple, structure) && eval_predicate(b, tuple, structure) + } + + Predicate::Or(a, b) => { + eval_predicate(a, tuple, structure) || eval_predicate(b, tuple, structure) + } + } +} + +fn eval_join_cond(cond: &JoinCond, left: &Tuple, right: &Tuple) -> bool { + match cond { + JoinCond::Cross => true, + JoinCond::Equi { left_col, right_col } => { + left.get(*left_col) == right.get(*right_col) && left.get(*left_col).is_some() + } + } +} + +// ============================================================================ +// Optimized Backend with Hash Joins +// ============================================================================ + +/// Execute a query plan with optimizations (hash joins for equijoins). +/// +/// This produces the same results as `execute()` but with better asymptotic +/// complexity for equijoins: O(n+m) instead of O(n*m). +/// +/// Use `execute()` as the reference implementation for testing correctness. +pub fn execute_optimized(plan: &QueryOp, structure: &Structure) -> Bag { + match plan { + QueryOp::Join { left, right, cond: JoinCond::Equi { left_col, right_col } } => { + // Hash join: O(n + m) instead of O(n * m) + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + + // Build phase: hash the smaller relation + let (build_bag, probe_bag, build_col, probe_col, is_left_build) = + if left_bag.len() <= right_bag.len() { + (&left_bag, &right_bag, *left_col, *right_col, true) + } else { + (&right_bag, &left_bag, *right_col, *left_col, false) + }; + + // Build hash table: key -> Vec<(tuple, multiplicity)> + let mut hash_table: HashMap> = HashMap::new(); + for (tuple, mult) in build_bag.iter() { + if let Some(&key) = tuple.get(build_col) { + hash_table.entry(key).or_default().push((tuple, *mult)); + } + } + + // Probe phase + let mut result = Bag::new(); + for (probe_tuple, probe_mult) in probe_bag.iter() { + if let Some(&key) = probe_tuple.get(probe_col) + && let Some(matches) = hash_table.get(&key) { + for (build_tuple, build_mult) in matches { + // Reconstruct in correct order (left, right) + let combined = if is_left_build { + let mut c = (*build_tuple).clone(); + c.extend(probe_tuple.iter().cloned()); + c + } else { + let mut c = probe_tuple.clone(); + c.extend((*build_tuple).iter().cloned()); + c + }; + + let mult = if is_left_build { + build_mult * probe_mult + } else { + probe_mult * build_mult + }; + result.insert(combined, mult); + } + } + } + result + } + + // For other operators, delegate to naive implementation but recurse optimized + QueryOp::Scan { sort_idx } => { + let mut result = Bag::new(); + if let Some(carrier) = structure.carriers.get(*sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)], 1); + } + } + result + } + + QueryOp::ScanRelation { rel_id } => { + let mut result = Bag::new(); + if let Some(rel) = structure.relations.get(*rel_id) { + for tuple in rel.iter() { + result.insert(tuple.to_vec(), 1); + } + } + result + } + + QueryOp::Filter { input, pred } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if eval_predicate(pred, tuple, structure) { + result.insert(tuple.clone(), *mult); + } + } + result + } + + QueryOp::Project { input, columns } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + let projected: Tuple = columns.iter().map(|&c| tuple[c]).collect(); + result.insert(projected, *mult); + } + result + } + + QueryOp::Join { left, right, cond: JoinCond::Cross } => { + // Cross join: still O(n*m), no optimization possible + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + let mut result = Bag::new(); + + for (l_tuple, l_mult) in left_bag.iter() { + for (r_tuple, r_mult) in right_bag.iter() { + let mut combined = l_tuple.clone(); + combined.extend(r_tuple.iter().cloned()); + result.insert(combined, l_mult * r_mult); + } + } + result + } + + QueryOp::Union { left, right } => { + let left_bag = execute_optimized(left, structure); + let right_bag = execute_optimized(right, structure); + left_bag.union(&right_bag) + } + + QueryOp::Distinct { input } => { + let input_bag = execute_optimized(input, structure); + input_bag.distinct() + } + + QueryOp::Negate { input } => { + let input_bag = execute_optimized(input, structure); + input_bag.negate() + } + + QueryOp::Constant { tuple } => Bag::singleton(tuple.clone()), + + QueryOp::Empty => Bag::new(), + + QueryOp::Apply { input, func_idx, arg_col } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(func_result) = structure.get_function(*func_idx, sort_slid) { + let mut extended = tuple.clone(); + extended.push(func_result); + result.insert(extended, *mult); + } + } + } + result + } + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => { + let input_bag = execute_optimized(input, structure); + let mut result = Bag::new(); + for (tuple, mult) in input_bag.iter() { + if let Some(&arg) = tuple.get(*arg_col) { + let sort_slid = structure.sort_local_id(arg); + if let Some(fields) = structure.get_function_product_codomain(*func_idx, sort_slid) + && let Some((_, field_val)) = fields.iter().find(|(n, _)| n == field_name) { + let mut extended = tuple.clone(); + extended.push(*field_val); + result.insert(extended, *mult); + } + } + } + result + } + + // DBSP operators not supported in optimized path yet + QueryOp::Delay { .. } | QueryOp::Diff { .. } | QueryOp::Integrate { .. } => { + panic!("DBSP temporal operators require StreamContext - use execute_stream() instead") + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::id::NumericId; + + #[test] + fn test_scan_filter() { + // Create a simple structure with one sort containing elements 0, 1, 2 + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + // Scan all elements + let scan = QueryOp::Scan { sort_idx: 0 }; + let result = execute(&scan, &structure); + assert_eq!(result.len(), 3); + + // Filter to just element 1 + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + let result = execute(&filter, &structure); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1)])); + } + + #[test] + fn test_scan_relation() { + use crate::core::{RelationStorage, VecRelation}; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + + // Initialize a relation with arity 2 + structure.relations.push(VecRelation::new(2)); + + // Add tuples to the relation + structure.relations[0].insert(vec![Slid::from_usize(0), Slid::from_usize(1)]); + structure.relations[0].insert(vec![Slid::from_usize(1), Slid::from_usize(0)]); + + // Scan the relation + let scan_rel = QueryOp::ScanRelation { rel_id: 0 }; + let result = execute(&scan_rel, &structure); + + assert_eq!(result.len(), 2); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(0), Slid::from_usize(1)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1), Slid::from_usize(0)])); + } + + #[test] + fn test_join() { + let mut structure = Structure::new(2); + // Sort 0: {a, b} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + // Sort 1: {x, y} + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + let result = execute(&join, &structure); + // Cross product: 2 * 2 = 4 tuples + assert_eq!(result.len(), 4); + } + + // ======================================================================== + // DBSP Temporal Operator Tests + // ======================================================================== + + #[test] + fn test_delay_initial_empty() { + // Delay should output empty at timestep 0 + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(42)], + }), + state_id: 0, + }; + + // First step: output should be empty (no previous) + let result = execute_stream(&plan, &structure, &mut ctx); + assert!(result.is_empty(), "delay should be empty at timestep 0"); + } + + #[test] + fn test_delay_outputs_previous() { + // Delay should output previous input after step() + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(42)], + }), + state_id: 0, + }; + + // First step: execute to set up state + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + + // Second step: should output the previous input + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(42)])); + } + + #[test] + fn test_diff_computes_delta() { + // Diff outputs current - previous + let mut structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + // Start with elements {0, 1} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + + let plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // First step: diff = {0, 1} - {} = {0, 1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 2); + ctx.step(); + + // Add element 2, so now scan = {0, 1, 2} + structure.carriers[0].insert(2); + + // Second step: diff = {0, 1, 2} - {0, 1} = {2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(2)])); + } + + #[test] + fn test_integrate_accumulates() { + // Integrate accumulates across timesteps + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + // We'll feed constant input at each step + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(1)], + }), + state_id: 0, + }; + + // Step 0: accumulated = {1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 1); + ctx.step(); + + // Step 1: accumulated = {1} + {1} = {1} with multiplicity 2 + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 2); + ctx.step(); + + // Step 2: multiplicity 3 + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 3); + } + + #[test] + fn test_diff_integrate_identity() { + // ∫(δ(x)) = x (for stable input) + // This is the fundamental DBSP identity + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let mut ctx = StreamContext::new(); + + // ∫(δ(scan)) + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + + // Step 0: diff = {0,1,2}, integrate = {0,1,2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + ctx.step(); + + // Step 1: diff = {} (no change), integrate = {0,1,2} (unchanged) + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + ctx.step(); + + // Step 2: still {0,1,2} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 3); + } + + #[test] + fn test_dbsp_with_filter() { + // Test DBSP operators composed with stateless operators + let mut structure = Structure::new(1); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let mut ctx = StreamContext::new(); + + // Filter(Diff(scan)) - incremental filter + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + + // Step 0: diff = {0,1,2}, filter = {1} + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(result.len(), 1); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(1)])); + ctx.step(); + + // Add element 3 (doesn't pass filter) + structure.carriers[0].insert(3); + + // Step 1: diff = {3}, filter = {} (3 doesn't match predicate) + let result = execute_stream(&plan, &structure, &mut ctx); + assert!(result.is_empty()); + } + + #[test] + fn test_stream_context_reset() { + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { + tuple: vec![Slid::from_usize(1)], + }), + state_id: 0, + }; + + // Run a few steps + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + let _ = execute_stream(&plan, &structure, &mut ctx); + ctx.step(); + + assert_eq!(ctx.timestep, 2); + + // Reset + ctx.reset(); + assert_eq!(ctx.timestep, 0); + + // Integrate should start fresh + let result = execute_stream(&plan, &structure, &mut ctx); + assert_eq!(*result.tuples.get(&vec![Slid::from_usize(1)]).unwrap(), 1); + } + + // ======================================================================== + // Semi-Naive Datalog Example (DBSP in action) + // ======================================================================== + + /// Demonstrates DBSP for transitive closure (semi-naive style). + /// + /// This example computes reachability in a graph using the DBSP pattern: + /// - δR = new facts this iteration + /// - ∫(δR) = all facts so far + /// + /// The "semi-naive" optimization is automatic: Diff computes only changes, + /// avoiding redundant re-derivation of old facts. + #[test] + fn test_semi_naive_transitive_closure() { + // Graph: 0→1, 1→2, 2→3 + // We represent edges as tuples (src, tgt) in sort 0 + let mut structure = Structure::new(1); + + // Add edge tuples as elements: encode (a,b) as slid = a*10 + b + // 0→1: slid=1, 1→2: slid=12, 2→3: slid=23 + structure.carriers[0].insert(1); // edge 0→1 + structure.carriers[0].insert(12); // edge 1→2 + structure.carriers[0].insert(23); // edge 2→3 + + let mut ctx = StreamContext::new(); + + // Query: scan all edges (base facts) + let base_facts = QueryOp::Scan { sort_idx: 0 }; + + // In a full implementation, we'd: + // 1. Differentiate the base facts to get δR + // 2. Join δR with ∫R to derive new transitive edges + // 3. Integrate to accumulate all reachable pairs + // + // For this test, we just verify the DBSP operators work together: + + // Step 1: ∫(δ(scan)) should equal the scan itself for stable input + let incremental_view = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(base_facts.clone()), + state_id: 0, + }), + state_id: 1, + }; + + // First execution: should see all 3 edges + let result = execute_stream(&incremental_view, &structure, &mut ctx); + assert_eq!(result.len(), 3, "should have 3 edges initially"); + ctx.step(); + + // Add new edge: 3→4 (encoded as slid=34) + structure.carriers[0].insert(34); + + // Second execution: diff should detect +1 new edge, integrate shows all 4 + let result = execute_stream(&incremental_view, &structure, &mut ctx); + assert_eq!(result.len(), 4, "should have 4 edges after adding 3→4"); + + // Verify incrementality: diff should show just the new edge + let diff_only = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 2, // fresh state_id + }; + let mut fresh_ctx = StreamContext::new(); + + // First step: all edges are "new" + let delta = execute_stream(&diff_only, &structure, &mut fresh_ctx); + assert_eq!(delta.len(), 4); + fresh_ctx.step(); + + // Second step with no changes: delta should be empty + let delta = execute_stream(&diff_only, &structure, &mut fresh_ctx); + assert!(delta.is_empty(), "no changes, delta should be empty"); + } + + // ======================================================================== + // Hash Join Tests (execute_optimized) + // ======================================================================== + + #[test] + fn test_hash_join_basic() { + // Test that hash join produces same results as nested loop join + let mut structure = Structure::new(2); + // Sort 0: {0, 1, 2} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: {0, 1, 2} (some overlap for equijoin) + structure.carriers[1].insert(0); + structure.carriers[1].insert(1); + structure.carriers[1].insert(2); + + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // Results should be identical + assert_eq!(naive_result.len(), optimized_result.len()); + for (tuple, mult) in naive_result.iter() { + assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "tuple {:?} has different multiplicity", + tuple + ); + } + } + + #[test] + fn test_hash_join_no_matches() { + // Test equijoin with no matching keys + let mut structure = Structure::new(2); + // Sort 0: {0, 1} + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + // Sort 1: {10, 11} (no overlap) + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + assert!(naive_result.is_empty()); + assert!(optimized_result.is_empty()); + } + + #[test] + fn test_hash_join_asymmetric() { + // Test that join order is preserved when left is larger than right + let mut structure = Structure::new(2); + // Sort 0: {0, 1, 2, 3, 4} (larger) + for i in 0..5 { + structure.carriers[0].insert(i); + } + // Sort 1: {2, 3} (smaller, will be build side) + structure.carriers[1].insert(2); + structure.carriers[1].insert(3); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // Should have matches for 2 and 3 + assert_eq!(naive_result.len(), 2); + assert_eq!(optimized_result.len(), 2); + + // Verify tuple order is (left_val, right_val) + assert!(optimized_result.tuples.contains_key(&vec![ + Slid::from_usize(2), + Slid::from_usize(2) + ])); + assert!(optimized_result.tuples.contains_key(&vec![ + Slid::from_usize(3), + Slid::from_usize(3) + ])); + } + + #[test] + fn test_hash_join_with_duplicates() { + // Test hash join correctly handles multiplicities + let mut structure = Structure::new(2); + // Both sides have element 1 + structure.carriers[0].insert(1); + structure.carriers[1].insert(1); + + // Join constant bags with multiplicities + let left = QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + }; + let right = QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Union { + left: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + right: Box::new(QueryOp::Constant { tuple: vec![Slid::from_usize(1)] }), + }), + }; + + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + // 2 * 3 = 6 (multiplicity multiplication) + let tuple = vec![Slid::from_usize(1), Slid::from_usize(1)]; + assert_eq!(naive_result.tuples.get(&tuple), Some(&6)); + assert_eq!(optimized_result.tuples.get(&tuple), Some(&6)); + } + + #[test] + fn test_optimized_matches_naive_cross_join() { + // Cross join should work the same in optimized backend + let mut structure = Structure::new(2); + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + + let naive_result = execute(&join, &structure); + let optimized_result = super::execute_optimized(&join, &structure); + + assert_eq!(naive_result.len(), 4); // 2 * 2 = 4 + assert_eq!(optimized_result.len(), 4); + + for (tuple, mult) in naive_result.iter() { + assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "tuple {:?} mismatch", + tuple + ); + } + } + + #[test] + fn test_optimized_nested_joins() { + // Test optimized backend with nested joins + let mut structure = Structure::new(3); + structure.carriers[0].insert(1); + structure.carriers[1].insert(1); + structure.carriers[2].insert(1); + + // (A ⋈ B) ⋈ C + let join_ab = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join_abc = QueryOp::Join { + left: Box::new(join_ab), + right: Box::new(QueryOp::Scan { sort_idx: 2 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join_abc, &structure); + let optimized_result = super::execute_optimized(&join_abc, &structure); + + assert_eq!(naive_result.len(), optimized_result.len()); + // Result should be (1, 1, 1) + let expected = vec![Slid::from_usize(1), Slid::from_usize(1), Slid::from_usize(1)]; + assert!(optimized_result.tuples.contains_key(&expected)); + } + + // ======================================================================== + // Display / Pretty Printing Tests + // ======================================================================== + + #[test] + fn test_display_scan() { + let plan = QueryOp::Scan { sort_idx: 0 }; + let display = format!("{}", plan); + assert_eq!(display, "Scan(sort=0)"); + } + + #[test] + fn test_display_filter() { + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 1 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(42), + }, + }; + let display = format!("{}", plan); + assert!(display.contains("Filter(c0=42)")); + assert!(display.contains("Scan(sort=1)")); + } + + #[test] + fn test_display_join() { + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + let display = format!("{}", plan); + assert!(display.contains("Join(c0=c0)")); + assert!(display.contains("Scan(sort=0)")); + assert!(display.contains("Scan(sort=1)")); + } + + #[test] + fn test_display_cross_join() { + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + let display = format!("{}", plan); + assert!(display.contains("Join(×)")); + } + + #[test] + fn test_display_dbsp_operators() { + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + let display = format!("{}", plan); + assert!(display.contains("∫(state=1)")); + assert!(display.contains("δ(state=0)")); + assert!(display.contains("Scan(sort=0)")); + } + + #[test] + fn test_display_nested_plan() { + // Filter(Join(×) + // Scan(0) + // Scan(1)) + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }), + pred: Predicate::ColEqCol { left: 0, right: 1 }, + }; + let display = format!("{}", plan); + // Verify structure is maintained + assert!(display.contains("Filter(c0=c1)")); + assert!(display.contains("Join(×)")); + // Verify indentation is present (child ops should be indented) + assert!(display.contains(" Scan(sort=0)")); + assert!(display.contains(" Scan(sort=1)")); + } + + #[test] + fn test_display_predicate_compound() { + let pred = Predicate::And( + Box::new(Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }), + Box::new(Predicate::Or( + Box::new(Predicate::True), + Box::new(Predicate::False), + )), + ); + let display = format!("{}", pred); + assert_eq!(display, "(c0=1 ∧ (⊤ ∨ ⊥))"); + } +} diff --git a/src/query/chase.rs b/src/query/chase.rs new file mode 100644 index 0000000..aa3ee5d --- /dev/null +++ b/src/query/chase.rs @@ -0,0 +1,710 @@ +//! Chase algorithm for computing derived relations. +//! +//! The chase takes a structure and a set of axioms (sequents) and repeatedly +//! applies the axioms until a fixpoint is reached. This is the standard database +//! chase algorithm adapted for geometric logic. +//! +//! # Implementation +//! +//! This implementation uses the tensor subsystem to evaluate premises: +//! 1. Compile premise to TensorExpr (handles existentials, conjunctions, etc.) +//! 2. Materialize to get all satisfying variable assignments +//! 3. For each assignment, fire the conclusion (add relations, create elements) +//! +//! This approach is strictly more powerful than query-based chase because +//! the tensor system naturally handles existential quantification in premises +//! via tensor contraction. +//! +//! # Supported Axiom Patterns +//! +//! **Premises** (anything the tensor system can compile): +//! - Relations: `R(x,y)` +//! - Conjunctions: `R(x,y), S(y,z)` +//! - Existentials: `∃e. f(e) = x ∧ g(e) = y` +//! - Equalities: `f(x) = y`, `f(x) = g(y)` +//! - Disjunctions: `R(x) ∨ S(x)` +//! +//! **Conclusions**: +//! - Relations: `⊢ R(x,y)` — add tuple to relation +//! - Existentials: `⊢ ∃b. f(b) = y` — create element with function binding +//! - Conjunctions: `⊢ R(x,y), f(x) = z` — multiple effects +//! +//! # Usage +//! +//! ```ignore +//! use geolog::query::chase::chase_fixpoint; +//! +//! // Run chase to fixpoint +//! let iterations = chase_fixpoint( +//! &theory.theory.axioms, +//! &mut structure, +//! &mut universe, +//! &theory.theory.signature, +//! 100, +//! )?; +//! ``` + +use std::collections::HashMap; + +use crate::cc::{CongruenceClosure, EquationReason}; +use crate::core::{DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term}; +use crate::id::{NumericId, Slid}; +use crate::tensor::{check_sequent, CheckResult}; +use crate::universe::Universe; + +/// Error type for chase operations +#[derive(Debug, Clone)] +pub enum ChaseError { + /// Unsupported formula in conclusion + UnsupportedConclusion(String), + /// Variable not bound + UnboundVariable(String), + /// Function conflict (different values for same input) + FunctionConflict(String), + /// Chase did not converge + MaxIterationsExceeded(usize), + /// Tensor compilation failed + TensorCompilationFailed(String), +} + +impl std::fmt::Display for ChaseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnsupportedConclusion(s) => write!(f, "Unsupported conclusion: {s}"), + Self::UnboundVariable(s) => write!(f, "Unbound variable: {s}"), + Self::FunctionConflict(s) => write!(f, "Function conflict: {s}"), + Self::MaxIterationsExceeded(n) => write!(f, "Chase did not converge after {n} iterations"), + Self::TensorCompilationFailed(s) => write!(f, "Tensor compilation failed: {s}"), + } + } +} + +impl std::error::Error for ChaseError {} + +/// Variable binding: maps variable names to Slids +pub type Binding = HashMap; + +/// Execute one step of the chase algorithm. +/// +/// Iterates over all axioms, evaluates premises using the tensor system, +/// and fires conclusions for each satisfying assignment. +/// +/// Returns `true` if any changes were made. +pub fn chase_step( + axioms: &[Sequent], + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + let mut changed = false; + + for axiom in axioms { + changed |= fire_axiom(axiom, structure, cc, universe, sig)?; + } + + Ok(changed) +} + +/// Fire a single axiom: find violations using tensor system, fire conclusion only for violations. +/// +/// This is the key to correct chase semantics: we only create fresh elements when +/// the tensor system confirms there is NO existing witness for the conclusion. +fn fire_axiom( + axiom: &Sequent, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + // Check the axiom - if compilation fails due to unsupported patterns, skip silently + let violations = match check_sequent(axiom, structure, sig) { + Ok(CheckResult::Satisfied) => { + // Axiom is already satisfied - nothing to fire + return Ok(false); + } + Ok(CheckResult::Violated(vs)) => vs, + Err(_) => { + // Tensor compilation failed (unsupported pattern) + // Skip this axiom silently + return Ok(false); + } + }; + + if violations.is_empty() { + return Ok(false); + } + + // Build index→Slid lookup for each context variable + let index_to_slid: Vec> = axiom.context.vars.iter() + .map(|(_, sort)| carrier_to_slid_vec(structure, sort)) + .collect(); + + // Map from variable name to its position in the context + let var_to_ctx_idx: HashMap<&str, usize> = axiom.context.vars.iter() + .enumerate() + .map(|(i, (name, _))| (name.as_str(), i)) + .collect(); + + let mut changed = false; + + // Fire conclusion ONLY for violations (where premise holds but conclusion doesn't) + for violation in violations { + // Build binding from violation assignment + let binding: Binding = violation.variable_names.iter() + .enumerate() + .filter_map(|(tensor_idx, var_name)| { + let ctx_idx = var_to_ctx_idx.get(var_name.as_str())?; + let slid_vec = &index_to_slid[*ctx_idx]; + let tensor_val = violation.assignment.get(tensor_idx)?; + let slid = slid_vec.get(*tensor_val)?; + Some((var_name.clone(), *slid)) + }) + .collect(); + + // Fire conclusion with this binding + match fire_conclusion(&axiom.conclusion, &binding, structure, cc, universe, sig) { + Ok(c) => changed |= c, + Err(_) => { + // Unsupported conclusion pattern - skip this axiom silently + return Ok(false); + } + } + } + + Ok(changed) +} + +/// Convert a carrier to a Vec of Slids for index→Slid lookup +fn carrier_to_slid_vec(structure: &Structure, sort: &DerivedSort) -> Vec { + match sort { + DerivedSort::Base(sort_id) => { + structure.carriers[*sort_id] + .iter() + .map(|u| Slid::from_usize(u as usize)) + .collect() + } + DerivedSort::Product(_) => { + // Product sorts: would need to enumerate all combinations + // For now, return empty (these are rare in practice) + vec![] + } + } +} + +/// Fire a conclusion formula given a variable binding. +/// Returns true if any changes were made. +fn fire_conclusion( + formula: &Formula, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + match formula { + Formula::True => Ok(false), + + Formula::False => { + // Contradiction - this shouldn't happen in valid chase + Err(ChaseError::UnsupportedConclusion("False in conclusion".to_string())) + } + + Formula::Rel(rel_id, term) => { + // Add tuple to relation + let tuple = eval_term_to_tuple(term, binding, structure)?; + + // Check if already present (using canonical representatives) + let canonical_tuple: Vec = tuple.iter() + .map(|&s| cc.canonical(s)) + .collect(); + + // Check if a canonically-equivalent tuple exists + let exists = structure.relations[*rel_id].iter().any(|existing| { + if existing.len() != canonical_tuple.len() { + return false; + } + existing.iter().zip(canonical_tuple.iter()).all(|(e, c)| { + cc.canonical(*e) == *c + }) + }); + + if exists { + return Ok(false); + } + + structure.relations[*rel_id].insert(tuple); + Ok(true) + } + + Formula::Conj(formulas) => { + let mut changed = false; + for f in formulas { + changed |= fire_conclusion(f, binding, structure, cc, universe, sig)?; + } + Ok(changed) + } + + Formula::Disj(formulas) => { + // Naive parallel chase: fire all disjuncts + // (sound but potentially adds more facts than necessary) + let mut changed = false; + for f in formulas { + changed |= fire_conclusion(f, binding, structure, cc, universe, sig)?; + } + Ok(changed) + } + + Formula::Eq(left, right) => { + fire_equality(left, right, binding, structure, cc, sig) + } + + Formula::Exists(var_name, sort, body) => { + fire_existential(var_name, sort, body, binding, structure, cc, universe, sig) + } + } +} + +/// Evaluate a term to a tuple of Slids (for relation arguments) +fn eval_term_to_tuple( + term: &Term, + binding: &Binding, + structure: &Structure, +) -> Result, ChaseError> { + match term { + Term::Var(name, _) => { + let slid = binding.get(name) + .ok_or_else(|| ChaseError::UnboundVariable(name.clone()))?; + Ok(vec![*slid]) + } + Term::Record(fields) => { + let mut tuple = Vec::new(); + for (_, field_term) in fields { + tuple.extend(eval_term_to_tuple(field_term, binding, structure)?); + } + Ok(tuple) + } + Term::App(_, _) => { + // Delegate to eval_term_to_slid which handles function application + let result = eval_term_to_slid(term, binding, structure)?; + Ok(vec![result]) + } + Term::Project(_, _) => { + Err(ChaseError::UnsupportedConclusion( + "Projection in relation argument".to_string() + )) + } + } +} + +/// Evaluate a term to a single Slid +fn eval_term_to_slid( + term: &Term, + binding: &Binding, + structure: &Structure, +) -> Result { + match term { + Term::Var(name, _) => { + binding.get(name) + .copied() + .ok_or_else(|| ChaseError::UnboundVariable(name.clone())) + } + Term::App(func_idx, arg) => { + let arg_slid = eval_term_to_slid(arg, binding, structure)?; + let local_id = structure.sort_local_id(arg_slid); + + structure.get_function(*func_idx, local_id) + .ok_or_else(|| ChaseError::UnboundVariable( + format!("Function {} undefined at {:?}", func_idx, arg_slid) + )) + } + Term::Project(base, field) => { + let _base_slid = eval_term_to_slid(base, binding, structure)?; + // Product projection - would need more structure info + Err(ChaseError::UnsupportedConclusion( + format!("Projection .{} not yet supported in chase", field) + )) + } + Term::Record(_) => { + Err(ChaseError::UnsupportedConclusion( + "Record term in scalar position".to_string() + )) + } + } +} + +/// Fire an equality in conclusion: f(x) = y, x = y, etc. +fn fire_equality( + left: &Term, + right: &Term, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + sig: &Signature, +) -> Result { + match (left, right) { + // f(arg) = value + (Term::App(func_idx, arg), value) | (value, Term::App(func_idx, arg)) => { + let arg_slid = eval_term_to_slid(arg, binding, structure)?; + let local_id = structure.sort_local_id(arg_slid); + + // Check if dealing with product codomain + let func_info = &sig.functions[*func_idx]; + match &func_info.codomain { + DerivedSort::Base(_) => { + // Simple codomain + let value_slid = eval_term_to_slid(value, binding, structure)?; + + // Check if already defined + if let Some(existing) = structure.get_function(*func_idx, local_id) { + // Check if values are equal (using CC) + if cc.are_equal(existing, value_slid) { + return Ok(false); // Already set to equivalent value + } + // Function conflict: add equation to CC instead of error + // (this is how we propagate equalities through functions) + cc.add_equation(existing, value_slid, EquationReason::FunctionConflict { + func_id: *func_idx, + domain: arg_slid, + }); + return Ok(true); // Changed (added equation) + } + + structure.define_function(*func_idx, arg_slid, value_slid) + .map_err(|e| ChaseError::FunctionConflict(format!("{:?}", e)))?; + Ok(true) + } + DerivedSort::Product(_fields) => { + // Product codomain: f(x) = [field1: v1, ...] + if let Term::Record(value_fields) = value { + let codomain_values: Vec<(&str, Slid)> = value_fields.iter() + .map(|(name, term)| { + let slid = eval_term_to_slid(term, binding, structure)?; + Ok((name.as_str(), slid)) + }) + .collect::, ChaseError>>()?; + + // Check if already defined + if let Some(existing) = structure.get_function_product_codomain(*func_idx, local_id) { + let all_match = codomain_values.iter().all(|(name, expected)| { + existing.iter().any(|(n, v)| n == name && cc.are_equal(*v, *expected)) + }); + if all_match { + return Ok(false); + } + return Err(ChaseError::FunctionConflict( + format!("Function {} already defined at {:?} with different values", func_idx, arg_slid) + )); + } + + structure.define_function_product_codomain(*func_idx, arg_slid, &codomain_values) + .map_err(|e| ChaseError::FunctionConflict(format!("{:?}", e)))?; + Ok(true) + } else { + Err(ChaseError::UnsupportedConclusion( + format!("Expected record for product codomain function, got {:?}", value) + )) + } + } + } + } + + // x = y (variable equality) - add to congruence closure! + (Term::Var(name1, _), Term::Var(name2, _)) => { + let slid1 = binding.get(name1) + .ok_or_else(|| ChaseError::UnboundVariable(name1.clone()))?; + let slid2 = binding.get(name2) + .ok_or_else(|| ChaseError::UnboundVariable(name2.clone()))?; + + // Check if already equal in CC + if cc.are_equal(*slid1, *slid2) { + Ok(false) // Already equivalent + } else { + // Add equation to congruence closure + cc.add_equation(*slid1, *slid2, EquationReason::ChaseConclusion); + Ok(true) // Changed! + } + } + + _ => Err(ChaseError::UnsupportedConclusion( + format!("Unsupported equality pattern: {:?} = {:?}", left, right) + )) + } +} + +/// Check if a formula is satisfied given a variable binding. +/// This is used for witness search in existential conclusions. +/// Uses CC for canonical relation lookups and equality checks. +fn check_formula_satisfied( + formula: &Formula, + binding: &Binding, + structure: &Structure, + cc: &mut CongruenceClosure, +) -> bool { + match formula { + Formula::True => true, + Formula::False => false, + + Formula::Rel(rel_id, term) => { + // Check if the tuple is in the relation (using canonical representatives) + if let Ok(tuple) = eval_term_to_tuple(term, binding, structure) { + let canonical_tuple: Vec = tuple.iter() + .map(|&s| cc.canonical(s)) + .collect(); + + // Check if a canonically-equivalent tuple exists + structure.relations[*rel_id].iter().any(|existing| { + if existing.len() != canonical_tuple.len() { + return false; + } + existing.iter().zip(canonical_tuple.iter()).all(|(e, c)| { + cc.canonical(*e) == *c + }) + }) + } else { + false // Couldn't evaluate term (unbound variable) + } + } + + Formula::Conj(fs) => { + fs.iter().all(|f| check_formula_satisfied(f, binding, structure, cc)) + } + + Formula::Disj(fs) => { + fs.iter().any(|f| check_formula_satisfied(f, binding, structure, cc)) + } + + Formula::Eq(t1, t2) => { + // Check if both terms evaluate to equivalent values (using CC) + match (eval_term_to_slid(t1, binding, structure), eval_term_to_slid(t2, binding, structure)) { + (Ok(s1), Ok(s2)) => cc.are_equal(s1, s2), + _ => false // Couldn't evaluate (unbound variable or undefined function) + } + } + + Formula::Exists(inner_var, inner_sort, inner_body) => { + // Check if any witness exists in the carrier + let DerivedSort::Base(sort_idx) = inner_sort else { + return false; // Product sorts not supported + }; + + structure.carriers[*sort_idx].iter().any(|w_u64| { + let witness = Slid::from_usize(w_u64 as usize); + let mut extended = binding.clone(); + extended.insert(inner_var.clone(), witness); + check_formula_satisfied(inner_body, &extended, structure, cc) + }) + } + } +} + +/// Fire an existential in conclusion: ∃x:S. body +/// This creates a new element if no witness exists. +/// +/// The algorithm: +/// 1. Search the carrier of S for an existing witness w where body[x↦w] holds +/// 2. If found, do nothing (witness exists) +/// 3. If not found, create a fresh element w and fire body as conclusion with x↦w +fn fire_existential( + var_name: &str, + sort: &DerivedSort, + body: &Formula, + binding: &Binding, + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, +) -> Result { + let DerivedSort::Base(sort_idx) = sort else { + return Err(ChaseError::UnsupportedConclusion( + "Existential with product sort not yet supported".to_string() + )); + }; + + // Search for existing witness by checking if body is satisfied (using CC for canonical lookups) + let carrier = &structure.carriers[*sort_idx]; + let witness_found = carrier.iter().any(|elem_u64| { + let elem_slid = Slid::from_usize(elem_u64 as usize); + let mut extended_binding = binding.clone(); + extended_binding.insert(var_name.to_string(), elem_slid); + check_formula_satisfied(body, &extended_binding, structure, cc) + }); + + if witness_found { + return Ok(false); // Witness already exists, nothing to do + } + + // No witness exists - create a fresh element + let (new_elem, _) = structure.add_element(universe, *sort_idx); + + // Fire body as conclusion with the new element bound to var_name + let mut extended_binding = binding.clone(); + extended_binding.insert(var_name.to_string(), new_elem); + + // Use fire_conclusion to make the body true + // This handles relations, equalities, conjunctions uniformly + fire_conclusion(body, &extended_binding, structure, cc, universe, sig)?; + + Ok(true) +} + +/// Run the chase algorithm until a fixpoint is reached, with congruence closure. +/// +/// Repeatedly applies [`chase_step`] and propagates equations until no more changes occur. +/// +/// # Arguments +/// +/// * `axioms` - The sequents (axioms) to apply +/// * `structure` - The structure to modify +/// * `cc` - Congruence closure for equality reasoning +/// * `universe` - The universe for element creation +/// * `sig` - The signature +/// * `max_iterations` - Safety limit to prevent infinite loops +/// +/// # Returns +/// +/// The number of iterations taken to reach the fixpoint. +pub fn chase_fixpoint_with_cc( + axioms: &[Sequent], + structure: &mut Structure, + cc: &mut CongruenceClosure, + universe: &mut Universe, + sig: &Signature, + max_iterations: usize, +) -> Result { + let mut iterations = 0; + + loop { + if iterations >= max_iterations { + return Err(ChaseError::MaxIterationsExceeded(max_iterations)); + } + + // Fire axiom conclusions + let axiom_changed = chase_step(axioms, structure, cc, universe, sig)?; + + // Propagate pending equations in CC + let eq_changed = propagate_equations(structure, cc, sig); + + iterations += 1; + + if !axiom_changed && !eq_changed { + break; + } + } + + Ok(iterations) +} + +/// Propagate pending equations in the congruence closure. +/// +/// This merges equivalence classes and detects function conflicts +/// (which add new equations via congruence). +fn propagate_equations( + structure: &Structure, + cc: &mut CongruenceClosure, + sig: &Signature, +) -> bool { + let mut changed = false; + + while let Some(eq) = cc.pop_pending() { + // Merge the equivalence classes + if cc.merge(eq.lhs, eq.rhs) { + changed = true; + + // Check for function conflicts (congruence propagation) + // If f(a) = x and f(b) = y, and a = b (just merged), then x = y + for func_id in 0..sig.functions.len() { + if func_id >= structure.functions.len() { + continue; + } + + let lhs_local = structure.sort_local_id(eq.lhs); + let rhs_local = structure.sort_local_id(eq.rhs); + + let lhs_val = structure.get_function(func_id, lhs_local); + let rhs_val = structure.get_function(func_id, rhs_local); + + if let (Some(lv), Some(rv)) = (lhs_val, rhs_val) + && !cc.are_equal(lv, rv) { + // Congruence: f(a) = lv, f(b) = rv, a = b implies lv = rv + cc.add_equation(lv, rv, EquationReason::Congruence { func_id }); + } + } + } + } + + changed +} + +/// Canonicalize the structure based on the congruence closure. +/// +/// After the chase, some elements may have been merged in the CC but the +/// structure still contains distinct elements. This function: +/// 1. Removes non-canonical elements from carriers +/// 2. Replaces relation tuples with their canonical forms +fn canonicalize_structure(structure: &mut Structure, cc: &mut CongruenceClosure) { + use crate::core::{RelationStorage, VecRelation}; + + // 1. Canonicalize carriers: keep only canonical representatives + for carrier in &mut structure.carriers { + let elements: Vec = carrier.iter().collect(); + carrier.clear(); + for elem in elements { + let slid = Slid::from_usize(elem as usize); + let canonical = cc.canonical(slid); + // Only keep if this element is its own canonical representative + if canonical == slid { + carrier.insert(elem); + } + } + } + + // 2. Canonicalize relations: replace tuples with canonical forms + for rel in &mut structure.relations { + let canonical_tuples: Vec> = rel.iter() + .map(|tuple| tuple.iter().map(|&s| cc.canonical(s)).collect()) + .collect(); + + let arity = rel.arity(); + let mut new_rel = VecRelation::new(arity); + for tuple in canonical_tuples { + new_rel.insert(tuple); + } + + *rel = new_rel; + } +} + +/// Run the chase algorithm until a fixpoint is reached. +/// +/// This is a convenience wrapper that creates a fresh congruence closure. +/// Use [`chase_fixpoint_with_cc`] if you need to provide your own CC. +/// +/// # Arguments +/// +/// * `axioms` - The sequents (axioms) to apply +/// * `structure` - The structure to modify +/// * `universe` - The universe for element creation +/// * `sig` - The signature +/// * `max_iterations` - Safety limit to prevent infinite loops +/// +/// # Returns +/// +/// The number of iterations taken to reach the fixpoint. +pub fn chase_fixpoint( + axioms: &[Sequent], + structure: &mut Structure, + universe: &mut Universe, + sig: &Signature, + max_iterations: usize, +) -> Result { + let mut cc = CongruenceClosure::new(); + let iterations = chase_fixpoint_with_cc(axioms, structure, &mut cc, universe, sig, max_iterations)?; + + // Canonicalize structure to reflect CC merges before returning + canonicalize_structure(structure, &mut cc); + + Ok(iterations) +} + +// Tests are in tests/unit_chase.rs diff --git a/src/query/compile.rs b/src/query/compile.rs new file mode 100644 index 0000000..d7dfa0e --- /dev/null +++ b/src/query/compile.rs @@ -0,0 +1,702 @@ +//! Query compiler: high-level queries → QueryOp plans. +//! +//! This module compiles query specifications into executable QueryOp plans. +//! It supports: +//! - Single-sort queries (like `Pattern`) +//! - Multi-sort queries with joins +//! - Function application and projection +//! +//! # Query Styles +//! +//! **∀-style (open sorts):** Elements determined by constraints. +//! Compiled to relational algebra (scan, filter, join, project). +//! +//! **∃-style (closed sorts):** Elements are declared constants. +//! Compiled to constraint satisfaction (witness enumeration). +//! [Not yet implemented] +//! +//! # Design +//! +//! Query compilation is currently direct (Query → QueryOp). +//! A future homoiconic version would compile to RelAlgIR instances, +//! which would then be interpreted by the backend. + +use crate::id::Slid; +use super::backend::{JoinCond, Predicate, QueryOp}; + +/// A query specification that can involve multiple sorts and joins. +/// +/// This generalizes `Pattern` to handle: +/// - Multiple source sorts +/// - Joins between sorts +/// - Complex constraints across sorts +/// +/// # Example: Find all Func where Func/theory == target +/// +/// ```ignore +/// let query = Query::scan(func_sort) +/// .filter_eq(theory_func, 0, target_slid) +/// .build(); +/// ``` +/// +/// # Example: Find all (Srt, Func) pairs where Srt/theory == Func/theory +/// +/// ```ignore +/// let query = Query::scan(srt_sort) +/// .join_scan(func_sort) +/// .join_on_func(srt_theory_func, 0, func_theory_func, 1) +/// .build(); +/// ``` +#[derive(Debug, Clone)] +pub struct Query { + /// Sources: each is (sort_idx, alias). Alias is used in constraints. + sources: Vec, + /// Constraints to apply (filters and join conditions) + constraints: Vec, + /// Projection: which columns to return + projection: Projection, +} + +/// A source in the query (a sort to scan). +#[derive(Debug, Clone)] +struct Source { + /// Sort index to scan + sort_idx: usize, + /// Column offset in the combined tuple + /// (each source adds 1 column for its element) + #[allow(dead_code)] // Used for tracking, will be needed for complex projections + col_offset: usize, +} + +/// A constraint in the query. +#[derive(Debug, Clone)] +enum Constraint { + /// func(col) == constant + FuncEqConst { + func_idx: usize, + arg_col: usize, + expected: Slid, + }, + /// func1(col1) == func2(col2) + FuncEqFunc { + func1_idx: usize, + arg1_col: usize, + func2_idx: usize, + arg2_col: usize, + }, + /// col1 == col2 (direct element equality) + ColEq { + col1: usize, + col2: usize, + }, + /// col == constant + ColEqConst { + col: usize, + expected: Slid, + }, +} + +/// Projection specification. +#[derive(Debug, Clone)] +enum Projection { + /// Return all columns + All, + /// Return specific columns + Cols(Vec), + /// Return specific columns with function applications + FuncCols(Vec), +} + +/// A column in projection, possibly with function application. +#[derive(Debug, Clone)] +struct FuncCol { + /// Column to use as argument + arg_col: usize, + /// Function to apply (None = just the element) + func_idx: Option, +} + +impl Query { + /// Create a new query scanning a single sort. + pub fn scan(sort_idx: usize) -> QueryBuilder { + QueryBuilder { + sources: vec![Source { sort_idx, col_offset: 0 }], + constraints: vec![], + projection: Projection::All, + next_col: 1, + } + } +} + +/// Builder for constructing queries fluently. +#[derive(Debug, Clone)] +pub struct QueryBuilder { + sources: Vec, + constraints: Vec, + projection: Projection, + next_col: usize, +} + +impl QueryBuilder { + /// Add another sort to scan (creates a cross join, to be constrained). + pub fn join_scan(mut self, sort_idx: usize) -> Self { + let col_offset = self.next_col; + self.sources.push(Source { sort_idx, col_offset }); + self.next_col += 1; + self + } + + /// Add a filter: func(col) == expected. + /// + /// `col` is 0-indexed, referring to which source's element. + pub fn filter_eq(mut self, func_idx: usize, arg_col: usize, expected: Slid) -> Self { + self.constraints.push(Constraint::FuncEqConst { + func_idx, + arg_col, + expected, + }); + self + } + + /// Add a join condition: func1(col1) == func2(col2). + /// + /// Used to join two scanned sorts by comparing function values. + pub fn join_on_func( + mut self, + func1_idx: usize, + arg1_col: usize, + func2_idx: usize, + arg2_col: usize, + ) -> Self { + self.constraints.push(Constraint::FuncEqFunc { + func1_idx, + arg1_col, + func2_idx, + arg2_col, + }); + self + } + + /// Add an element equality constraint: col1 == col2. + pub fn where_eq(mut self, col1: usize, col2: usize) -> Self { + self.constraints.push(Constraint::ColEq { col1, col2 }); + self + } + + /// Add a constant equality constraint: col == expected. + pub fn where_const(mut self, col: usize, expected: Slid) -> Self { + self.constraints.push(Constraint::ColEqConst { col, expected }); + self + } + + /// Project to specific columns. + pub fn project(mut self, cols: Vec) -> Self { + self.projection = Projection::Cols(cols); + self + } + + /// Project with function applications. + pub fn project_funcs(mut self, func_cols: Vec<(usize, Option)>) -> Self { + self.projection = Projection::FuncCols( + func_cols + .into_iter() + .map(|(arg_col, func_idx)| FuncCol { arg_col, func_idx }) + .collect(), + ); + self + } + + /// Build the final Query. + pub fn build(self) -> Query { + Query { + sources: self.sources, + constraints: self.constraints, + projection: self.projection, + } + } + + /// Compile directly to QueryOp (skipping Query intermediate). + pub fn compile(self) -> QueryOp { + self.build().compile() + } +} + +impl Query { + /// Compile the query to a QueryOp plan. + /// + /// The compilation strategy: + /// 1. Scan each source sort + /// 2. Join scans together (cross join if >1) + /// 3. Handle FuncEqFunc constraints by applying functions, then filtering + /// 4. Apply other constraints as filters + /// 5. Apply projection + pub fn compile(&self) -> QueryOp { + if self.sources.is_empty() { + return QueryOp::Empty; + } + + // Step 1: Build base plan from sources + let mut plan = QueryOp::Scan { + sort_idx: self.sources[0].sort_idx, + }; + + // Track current column count (each source adds 1 column) + let mut current_cols = 1; + + // If multiple sources, join them + for source in &self.sources[1..] { + let right = QueryOp::Scan { + sort_idx: source.sort_idx, + }; + plan = QueryOp::Join { + left: Box::new(plan), + right: Box::new(right), + cond: JoinCond::Cross, // Start with cross join, constraints will filter + }; + current_cols += 1; + } + + // Step 2: Separate FuncEqFunc constraints (need Apply) from others + let mut func_eq_func_constraints = Vec::new(); + let mut simple_constraints = Vec::new(); + + for constraint in &self.constraints { + match constraint { + Constraint::FuncEqFunc { .. } => func_eq_func_constraints.push(constraint), + _ => simple_constraints.push(constraint), + } + } + + // Step 3: Handle FuncEqFunc constraints + // For each, apply both functions, track the added columns, then filter on equality + for constraint in func_eq_func_constraints { + if let Constraint::FuncEqFunc { + func1_idx, + arg1_col, + func2_idx, + arg2_col, + } = constraint + { + // Apply func1 to arg1_col, result goes in current_cols + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func1_idx, + arg_col: *arg1_col, + }; + let col1_result = current_cols; + current_cols += 1; + + // Apply func2 to arg2_col, result goes in current_cols + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func2_idx, + arg_col: *arg2_col, + }; + let col2_result = current_cols; + current_cols += 1; + + // Filter where the two result columns are equal + plan = QueryOp::Filter { + input: Box::new(plan), + pred: Predicate::ColEqCol { + left: col1_result, + right: col2_result, + }, + }; + } + } + + // Step 4: Apply simple constraints as filters + for constraint in simple_constraints { + let pred = match constraint { + Constraint::FuncEqConst { + func_idx, + arg_col, + expected, + } => Predicate::FuncEqConst { + func_idx: *func_idx, + arg_col: *arg_col, + expected: *expected, + }, + Constraint::FuncEqFunc { .. } => { + unreachable!("FuncEqFunc already handled") + } + Constraint::ColEq { col1, col2 } => Predicate::ColEqCol { + left: *col1, + right: *col2, + }, + Constraint::ColEqConst { col, expected } => Predicate::ColEqConst { + col: *col, + val: *expected, + }, + }; + plan = QueryOp::Filter { + input: Box::new(plan), + pred, + }; + } + + // Step 5: Apply projection + match &self.projection { + Projection::All => { + // No projection needed, return all columns + } + Projection::Cols(cols) => { + plan = QueryOp::Project { + input: Box::new(plan), + columns: cols.clone(), + }; + } + Projection::FuncCols(func_cols) => { + // Apply each function, then project + let base_col = current_cols; // Start adding func results here + for fc in func_cols.iter() { + if let Some(func_idx) = fc.func_idx { + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx, + arg_col: fc.arg_col, + }; + current_cols += 1; + } + } + // Project to the added columns + if current_cols > base_col { + let columns: Vec = (base_col..current_cols).collect(); + plan = QueryOp::Project { + input: Box::new(plan), + columns, + }; + } + } + } + + plan + } +} + +// ============================================================================ +// Convenience functions for common query patterns +// ============================================================================ + +/// Compile a simple single-sort query: scan sort, filter by func == value. +/// +/// This is equivalent to `Pattern::new(sort).filter(func, value).compile()` +/// but uses the new Query API. +pub fn compile_simple_filter(sort_idx: usize, func_idx: usize, expected: Slid) -> QueryOp { + Query::scan(sort_idx) + .filter_eq(func_idx, 0, expected) + .compile() +} + +/// Compile a query that returns func(elem) for matching elements. +/// +/// scan(sort) |> filter(filter_func(elem) == expected) |> project(project_func(elem)) +pub fn compile_filter_project( + sort_idx: usize, + filter_func: usize, + expected: Slid, + project_func: usize, +) -> QueryOp { + // scan → filter → apply → project + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::FuncEqConst { + func_idx: filter_func, + arg_col: 0, + expected, + }, + }; + let apply = QueryOp::Apply { + input: Box::new(filter), + func_idx: project_func, + arg_col: 0, + }; + // Now we have (elem, func(elem)), project to just column 1 + QueryOp::Project { + input: Box::new(apply), + columns: vec![1], + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::id::NumericId; + + #[test] + fn test_simple_scan_compiles() { + let plan = Query::scan(0).compile(); + assert!(matches!(plan, QueryOp::Scan { sort_idx: 0 })); + } + + /// Test that Query-compiled plans produce same results as Pattern. + /// + /// This validates that the new Query API is equivalent to the + /// existing Pattern API for simple queries. + #[test] + fn test_query_matches_pattern() { + use crate::core::Structure; + use crate::query::backend::execute; + use crate::query::{Pattern, Projection as PatternProjection}; + + // Create a structure with some data + let mut structure = Structure::new(2); + // Sort 0: elements 0, 1, 2 + structure.carriers[0].insert(0); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + // Sort 1: elements 10, 11 + structure.carriers[1].insert(10); + structure.carriers[1].insert(11); + + // Test 1: Simple scan + let pattern_plan = Pattern { + source_sort: 0, + constraints: vec![], + projection: PatternProjection::Element, + } + .compile(); + + let query_plan = Query::scan(0).compile(); + + let pattern_result = execute(&pattern_plan, &structure); + let query_result = execute(&query_plan, &structure); + + assert_eq!( + pattern_result.len(), + query_result.len(), + "Scan should return same number of results" + ); + + // Test 2: Scan with filter (using ColEqConst since we don't have functions) + let pattern_plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(1), + }, + }; + + let query_plan = Query::scan(0) + .where_const(0, Slid::from_usize(1)) + .compile(); + + let pattern_result = execute(&pattern_plan, &structure); + let query_result = execute(&query_plan, &structure); + + assert_eq!(pattern_result.len(), 1); + assert_eq!(query_result.len(), 1); + } + + /// Test FuncEqFunc constraint: func1(col1) == func2(col2) + #[test] + fn test_func_eq_func_join() { + use crate::core::Structure; + use crate::query::backend::execute; + use crate::universe::Universe; + + // Create a structure with two sorts + let mut structure = Structure::new(2); + let mut universe = Universe::new(); + + // Sort 0: elements a, b + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + // Sort 1: elements x, y, z + let (x, _) = structure.add_element(&mut universe, 1); + let (y, _) = structure.add_element(&mut universe, 1); + let (z, _) = structure.add_element(&mut universe, 1); + + // Common target for function results + let target1 = Slid::from_usize(100); + let target2 = Slid::from_usize(200); + + // Initialize functions + // func0: Sort0 -> targets (a→100, b→200) + // func1: Sort1 -> targets (x→100, y→200, z→100) + structure.init_functions(&[Some(0), Some(1)]); + + structure.define_function(0, a, target1).unwrap(); + structure.define_function(0, b, target2).unwrap(); + structure.define_function(1, x, target1).unwrap(); + structure.define_function(1, y, target2).unwrap(); + structure.define_function(1, z, target1).unwrap(); + + // Query: Find all (s0, s1) where func0(s0) == func1(s1) + // Expected matches: + // - (a, x) because func0(a)=100 == func1(x)=100 + // - (a, z) because func0(a)=100 == func1(z)=100 + // - (b, y) because func0(b)=200 == func1(y)=200 + + let plan = Query::scan(0) + .join_scan(1) + .join_on_func(0, 0, 1, 1) // func0(col0) == func1(col1) + .compile(); + + let result = execute(&plan, &structure); + + // Should have exactly 3 matching pairs + assert_eq!( + result.len(), + 3, + "Expected 3 matching pairs, got {}", + result.len() + ); + } + + /// Integration test: validate compiled queries against bootstrap_queries. + /// + /// This test creates a real theory using the REPL, then verifies that + /// queries compiled with the Query API produce the same results as + /// the handcoded bootstrap_queries methods. + #[test] + fn test_query_matches_bootstrap_queries() { + use crate::repl::ReplState; + + // Create a theory via REPL + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + // Get the theory slid + let theory_slid = match repl.store.resolve_name("Graph") { + Some((slid, _)) => slid, + None => panic!("Theory 'Graph' not found"), + }; + + // Get bootstrap_queries result + let bootstrap_sorts = repl.store.query_theory_sorts(theory_slid); + + // Now compile a Query that does the same thing: + // "Find all Srt where Srt/theory == theory_slid" + let srt_sort = repl.store.sort_ids.srt.expect("Srt sort not found"); + let theory_func = repl + .store + .func_ids + .srt_theory + .expect("Srt/theory func not found"); + + // Compile the query + let plan = compile_simple_filter(srt_sort, theory_func, theory_slid); + + // Execute against the store's meta structure + let result = crate::query::backend::execute(&plan, &repl.store.meta); + + // Compare: should have same number of sorts + assert_eq!( + bootstrap_sorts.len(), + result.len(), + "Query should return same number of sorts as bootstrap_queries.\n\ + Bootstrap returned {} sorts: {:?}\n\ + Compiled query returned {} tuples", + bootstrap_sorts.len(), + bootstrap_sorts.iter().map(|s| &s.name).collect::>(), + result.len() + ); + + // Verify we got V and E + assert!( + bootstrap_sorts.len() >= 2, + "Graph theory should have at least V and E sorts" + ); + } + + #[test] + fn test_filter_compiles() { + let plan = Query::scan(0) + .filter_eq(1, 0, Slid::from_usize(42)) + .compile(); + + // Should be Filter(Scan) + if let QueryOp::Filter { input, pred } = plan { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 0 })); + assert!(matches!( + pred, + Predicate::FuncEqConst { + func_idx: 1, + arg_col: 0, + .. + } + )); + } else { + panic!("Expected Filter, got {:?}", plan); + } + } + + #[test] + fn test_join_compiles() { + let plan = Query::scan(0) + .join_scan(1) + .compile(); + + // Should be Join(Scan, Scan) + if let QueryOp::Join { left, right, .. } = plan { + assert!(matches!(*left, QueryOp::Scan { sort_idx: 0 })); + assert!(matches!(*right, QueryOp::Scan { sort_idx: 1 })); + } else { + panic!("Expected Join, got {:?}", plan); + } + } + + #[test] + fn test_compile_simple_filter() { + let plan = compile_simple_filter(5, 3, Slid::from_usize(100)); + + if let QueryOp::Filter { input, pred } = plan { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 5 })); + if let Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } = pred + { + assert_eq!(func_idx, 3); + assert_eq!(arg_col, 0); + assert_eq!(expected, Slid::from_usize(100)); + } else { + panic!("Expected FuncEqConst predicate"); + } + } else { + panic!("Expected Filter"); + } + } + + #[test] + fn test_compile_filter_project() { + let plan = compile_filter_project(0, 1, Slid::from_usize(42), 2); + + // Should be Project(Apply(Filter(Scan))) + if let QueryOp::Project { input, columns } = plan { + assert_eq!(columns, vec![1]); + if let QueryOp::Apply { + input, + func_idx, + arg_col, + } = *input + { + assert_eq!(func_idx, 2); + assert_eq!(arg_col, 0); + if let QueryOp::Filter { input, .. } = *input { + assert!(matches!(*input, QueryOp::Scan { sort_idx: 0 })); + } else { + panic!("Expected Filter inside Apply"); + } + } else { + panic!("Expected Apply inside Project"); + } + } else { + panic!("Expected Project"); + } + } +} diff --git a/src/query/exec.rs b/src/query/exec.rs new file mode 100644 index 0000000..10172d2 --- /dev/null +++ b/src/query/exec.rs @@ -0,0 +1,243 @@ +//! Query execution against a Store. +//! +//! This module executes Pattern queries against the GeologMeta store, +//! computing the unique maximal element (cofree model) for ∀-style queries. + +use crate::id::Slid; +use crate::store::Store; +use crate::store::append::AppendOps; + +use super::{Pattern, Projection}; + +/// Result of a pattern query. +/// +/// For ∀-style queries (open sorts), this is the cofree model: +/// all elements satisfying the constraints. +#[derive(Debug, Clone)] +pub enum QueryResult { + /// List of matching elements + Elements(Vec), + /// List of projected values + Values(Vec), + /// List of projected tuples + Tuples(Vec>), +} + +impl QueryResult { + /// Get as elements (panics if not Elements variant). + pub fn into_elements(self) -> Vec { + match self { + QueryResult::Elements(e) => e, + _ => panic!("QueryResult is not Elements"), + } + } + + /// Get as values (panics if not Values variant). + pub fn into_values(self) -> Vec { + match self { + QueryResult::Values(v) => v, + _ => panic!("QueryResult is not Values"), + } + } + + /// Get as tuples (panics if not Tuples variant). + pub fn into_tuples(self) -> Vec> { + match self { + QueryResult::Tuples(t) => t, + _ => panic!("QueryResult is not Tuples"), + } + } + + /// Check if the result is empty. + pub fn is_empty(&self) -> bool { + match self { + QueryResult::Elements(e) => e.is_empty(), + QueryResult::Values(v) => v.is_empty(), + QueryResult::Tuples(t) => t.is_empty(), + } + } + + /// Get the number of results. + pub fn len(&self) -> usize { + match self { + QueryResult::Elements(e) => e.len(), + QueryResult::Values(v) => v.len(), + QueryResult::Tuples(t) => t.len(), + } + } +} + +/// Execute a pattern query against a store. +/// +/// This is the ∀-style query executor: scans all elements of source_sort, +/// filters by constraints, and projects the result. +/// +/// In terms of query semantics: computes the unique maximal element +/// (cofree model) of the theory extension. +pub fn execute_pattern(store: &Store, pattern: &Pattern) -> QueryResult { + // Scan all elements of source sort + let candidates = store.elements_of_sort(pattern.source_sort); + + // Filter by constraints + let matching: Vec = candidates + .into_iter() + .filter(|&elem| { + pattern.constraints.iter().all(|c| { + store.get_func(c.func, elem) == Some(c.expected) + }) + }) + .collect(); + + // Project + match &pattern.projection { + Projection::Element => QueryResult::Elements(matching), + Projection::Func(func) => { + let values: Vec = matching + .into_iter() + .filter_map(|elem| store.get_func(*func, elem)) + .collect(); + QueryResult::Values(values) + } + Projection::Tuple(funcs) => { + let tuples: Vec> = matching + .into_iter() + .filter_map(|elem| { + let tuple: Vec = funcs + .iter() + .filter_map(|f| store.get_func(*f, elem)) + .collect(); + // Only include if all projections succeeded + if tuple.len() == funcs.len() { + Some(tuple) + } else { + None + } + }) + .collect(); + QueryResult::Tuples(tuples) + } + } +} + +/// Convenience methods on Store for pattern queries. +impl Store { + /// Execute a pattern query. + /// + /// # Example + /// + /// ```ignore + /// // Find all Srt where Srt.theory == theory_slid + /// let result = store.query( + /// Pattern::new(store.sort_ids.srt.unwrap()) + /// .filter(store.func_ids.srt_theory.unwrap(), theory_slid) + /// ); + /// ``` + pub fn query(&self, pattern: &Pattern) -> QueryResult { + execute_pattern(self, pattern) + } + + /// Execute a pattern query and return just the matching elements. + pub fn query_elements(&self, pattern: &Pattern) -> Vec { + execute_pattern(self, pattern).into_elements() + } +} + +// ============================================================================ +// Typed query helpers that replace bootstrap_queries +// ============================================================================ + +/// Information about a sort (mirrors bootstrap_queries::SortInfo) +#[derive(Debug, Clone)] +pub struct SortInfo { + pub name: String, + pub slid: Slid, +} + +impl Store { + /// Query all sorts belonging to a theory using Pattern API. + /// + /// This is the Pattern-based equivalent of bootstrap_queries::query_theory_sorts. + /// Both should return identical results. + pub fn query_sorts_of_theory(&self, theory_slid: Slid) -> Vec { + let Some(srt_sort) = self.sort_ids.srt else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.srt_theory else { + return vec![]; + }; + + // The core pattern: find all Srt where Srt.theory == theory_slid + let pattern = Pattern::new(srt_sort) + .filter(theory_func, theory_slid); + + // Execute and post-process + self.query_elements(&pattern) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + SortInfo { name: short_name, slid } + }) + .collect() + } +} + +#[cfg(test)] +mod tests { + /// Test that Pattern-based query matches bootstrap_queries. + /// + /// This is a sanity test to ensure the new query engine gives + /// identical results to the hand-coded queries. + #[test] + fn test_query_sorts_matches_bootstrap() { + // Parse and elaborate a theory via REPL + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + "#; + + // Use ReplState to execute + let mut repl = crate::repl::ReplState::new(); + let _ = repl.execute_geolog(source); + + // Get the theory slid + if let Some((theory_slid, _)) = repl.store.resolve_name("Graph") { + // Query using bootstrap method + let bootstrap_result = repl.store.query_theory_sorts(theory_slid); + + // Query using Pattern method + let pattern_result = repl.store.query_sorts_of_theory(theory_slid); + + // Should have same number of results + assert_eq!( + bootstrap_result.len(), + pattern_result.len(), + "Different number of sorts returned: bootstrap={}, pattern={}", + bootstrap_result.len(), + pattern_result.len() + ); + + // Should have same sort names (V and E) + let bootstrap_names: std::collections::HashSet<_> = + bootstrap_result.iter().map(|s| &s.name).collect(); + let pattern_names: std::collections::HashSet<_> = + pattern_result.iter().map(|s| &s.name).collect(); + + assert_eq!( + bootstrap_names, + pattern_names, + "Different sort names returned" + ); + + // Verify we got the expected sorts + assert!(bootstrap_names.contains(&"V".to_string()), "Missing sort V"); + assert!(bootstrap_names.contains(&"E".to_string()), "Missing sort E"); + } else { + panic!("Theory 'Graph' not found after execution"); + } + } +} diff --git a/src/query/from_relalg.rs b/src/query/from_relalg.rs new file mode 100644 index 0000000..808f253 --- /dev/null +++ b/src/query/from_relalg.rs @@ -0,0 +1,1239 @@ +//! RelAlgIR Interpreter: Execute query plans represented as geolog instances. +//! +//! This module provides a CPU backend that interprets RelAlgIR instances. +//! It reads the string diagram structure from a geolog Structure and executes +//! the query operations to produce results. +//! +//! # Architecture +//! +//! A RelAlgIR instance encodes a query plan as a string diagram: +//! - Wire elements are edges carrying data streams (Z-sets of tuples) +//! - Op elements are boxes transforming data +//! - Composition is encoded by wire sharing (same Wire as output of one Op and input of another) +//! +//! The interpreter: +//! 1. Parses the instance structure to extract operations and wires +//! 2. Builds a dependency graph from wire connections +//! 3. Topologically sorts operations (respecting DBSP delay semantics) +//! 4. Executes each operation in order +//! 5. Returns the result on the designated output wire +//! +//! # Example +//! +//! ```ignore +//! use geolog::query::from_relalg::execute_relalg; +//! +//! let result = execute_relalg( +//! &relalg_instance, // The compiled query plan +//! &relalg_theory, // RelAlgIR theory +//! &target_structure, // Data to query +//! )?; +//! ``` + +use std::collections::{HashMap, VecDeque}; + +use crate::core::{ElaboratedTheory, Structure}; +use crate::id::{NumericId, Slid, get_slid}; +use crate::query::backend::Bag; +use crate::query::to_relalg::RelAlgInstance; + +/// Error type for RelAlgIR execution +#[derive(Debug, Clone)] +pub enum RelAlgError { + /// Missing required sort in RelAlgIR theory + MissingSortId(String), + /// Missing required function in RelAlgIR theory + MissingFuncId(String), + /// No output wire found + NoOutputWire, + /// Invalid operation structure + InvalidOp(String), + /// Cycle detected without delay + InstantaneousCycle, + /// Unsupported operation + Unsupported(String), +} + +impl std::fmt::Display for RelAlgError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingSortId(s) => write!(f, "Missing sort: {s}"), + Self::MissingFuncId(s) => write!(f, "Missing function: {s}"), + Self::NoOutputWire => write!(f, "No output wire found in plan"), + Self::InvalidOp(s) => write!(f, "Invalid operation: {s}"), + Self::InstantaneousCycle => write!(f, "Cycle detected without delay"), + Self::Unsupported(s) => write!(f, "Unsupported: {s}"), + } + } +} + +impl std::error::Error for RelAlgError {} + +/// Cached sort/function IDs from RelAlgIR theory +#[allow(dead_code)] // Some IDs are for future use +struct RelAlgIds { + // Core sorts + wire: usize, + op: usize, + + // Operation sorts + scan_op: usize, + filter_op: usize, + distinct_op: usize, + negate_op: usize, + join_op: usize, + union_op: usize, + empty_op: usize, + delay_op: usize, + diff_op: usize, + integrate_op: usize, + + // Predicate sorts + pred: usize, + true_pred: usize, + false_pred: usize, + col_eq_pred: usize, + const_eq_pred: usize, + and_pred: usize, + or_pred: usize, + + // Join condition sorts + join_cond: usize, + equi_join_cond: usize, + cross_join_cond: usize, + + // Column reference sorts + col_ref: usize, + col_path: usize, + here_path: usize, + left_path: usize, + right_path: usize, + + // GeologMeta sorts (for references to target structure) + srt: usize, + elem: usize, + func: usize, +} + +impl RelAlgIds { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + + let get_sort = |name: &str| -> Result { + sig.sorts + .iter() + .position(|s| s == name) + .ok_or_else(|| RelAlgError::MissingSortId(name.to_string())) + }; + + Ok(Self { + wire: get_sort("Wire")?, + op: get_sort("Op")?, + + scan_op: get_sort("ScanOp")?, + filter_op: get_sort("FilterOp")?, + distinct_op: get_sort("DistinctOp")?, + negate_op: get_sort("NegateOp")?, + join_op: get_sort("JoinOp")?, + union_op: get_sort("UnionOp")?, + empty_op: get_sort("EmptyOp")?, + delay_op: get_sort("DelayOp")?, + diff_op: get_sort("DiffOp")?, + integrate_op: get_sort("IntegrateOp")?, + + pred: get_sort("Pred")?, + true_pred: get_sort("TruePred")?, + false_pred: get_sort("FalsePred")?, + col_eq_pred: get_sort("ColEqPred")?, + const_eq_pred: get_sort("ConstEqPred")?, + and_pred: get_sort("AndPred")?, + or_pred: get_sort("OrPred")?, + + join_cond: get_sort("JoinCond")?, + equi_join_cond: get_sort("EquiJoinCond")?, + cross_join_cond: get_sort("CrossJoinCond")?, + + col_ref: get_sort("ColRef")?, + col_path: get_sort("ColPath")?, + here_path: get_sort("HerePath")?, + left_path: get_sort("LeftPath")?, + right_path: get_sort("RightPath")?, + + srt: get_sort("GeologMeta/Srt")?, + elem: get_sort("GeologMeta/Elem")?, + func: get_sort("GeologMeta/Func")?, + }) + } +} + +/// Function IDs for navigating RelAlgIR structure +#[allow(dead_code)] // Some IDs are for future use +struct RelAlgFuncs { + // ScanOp accessors + scan_op_srt: usize, + scan_op_out: usize, + + // FilterOp accessors + filter_op_in: usize, + filter_op_out: usize, + filter_op_pred: usize, + + // DistinctOp accessors + distinct_op_in: usize, + distinct_op_out: usize, + + // NegateOp accessors + negate_op_in: usize, + negate_op_out: usize, + + // JoinOp accessors + join_op_left_in: usize, + join_op_right_in: usize, + join_op_out: usize, + join_op_cond: usize, + + // UnionOp accessors + union_op_left_in: usize, + union_op_right_in: usize, + union_op_out: usize, + + // EmptyOp accessors + empty_op_out: usize, + + // DelayOp accessors + delay_op_in: usize, + delay_op_out: usize, + + // DiffOp accessors + diff_op_in: usize, + diff_op_out: usize, + + // IntegrateOp accessors + integrate_op_in: usize, + integrate_op_out: usize, + + // Predicate accessors + true_pred_pred: usize, + false_pred_pred: usize, + col_eq_pred_pred: usize, + col_eq_pred_left: usize, + col_eq_pred_right: usize, + const_eq_pred_pred: usize, + const_eq_pred_col: usize, + const_eq_pred_val: usize, + and_pred_pred: usize, + and_pred_left: usize, + and_pred_right: usize, + or_pred_pred: usize, + or_pred_left: usize, + or_pred_right: usize, + + // Join condition accessors + equi_join_cond_cond: usize, + equi_join_cond_left_col: usize, + equi_join_cond_right_col: usize, + cross_join_cond_cond: usize, + + // ColRef accessors + col_ref_wire: usize, + col_ref_path: usize, + + // ColPath accessors + here_path_path: usize, + left_path_path: usize, + left_path_rest: usize, + right_path_path: usize, + right_path_rest: usize, +} + +impl RelAlgFuncs { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + + let get_func = |name: &str| -> Result { + sig.func_names + .get(name) + .copied() + .ok_or_else(|| RelAlgError::MissingFuncId(name.to_string())) + }; + + Ok(Self { + scan_op_srt: get_func("ScanOp/srt")?, + scan_op_out: get_func("ScanOp/out")?, + + filter_op_in: get_func("FilterOp/in")?, + filter_op_out: get_func("FilterOp/out")?, + filter_op_pred: get_func("FilterOp/pred")?, + + distinct_op_in: get_func("DistinctOp/in")?, + distinct_op_out: get_func("DistinctOp/out")?, + + negate_op_in: get_func("NegateOp/in")?, + negate_op_out: get_func("NegateOp/out")?, + + join_op_left_in: get_func("JoinOp/left_in")?, + join_op_right_in: get_func("JoinOp/right_in")?, + join_op_out: get_func("JoinOp/out")?, + join_op_cond: get_func("JoinOp/cond")?, + + union_op_left_in: get_func("UnionOp/left_in")?, + union_op_right_in: get_func("UnionOp/right_in")?, + union_op_out: get_func("UnionOp/out")?, + + empty_op_out: get_func("EmptyOp/out")?, + + delay_op_in: get_func("DelayOp/in")?, + delay_op_out: get_func("DelayOp/out")?, + + diff_op_in: get_func("DiffOp/in")?, + diff_op_out: get_func("DiffOp/out")?, + + integrate_op_in: get_func("IntegrateOp/in")?, + integrate_op_out: get_func("IntegrateOp/out")?, + + true_pred_pred: get_func("TruePred/pred")?, + false_pred_pred: get_func("FalsePred/pred")?, + col_eq_pred_pred: get_func("ColEqPred/pred")?, + col_eq_pred_left: get_func("ColEqPred/left")?, + col_eq_pred_right: get_func("ColEqPred/right")?, + const_eq_pred_pred: get_func("ConstEqPred/pred")?, + const_eq_pred_col: get_func("ConstEqPred/col")?, + const_eq_pred_val: get_func("ConstEqPred/val")?, + and_pred_pred: get_func("AndPred/pred")?, + and_pred_left: get_func("AndPred/left")?, + and_pred_right: get_func("AndPred/right")?, + or_pred_pred: get_func("OrPred/pred")?, + or_pred_left: get_func("OrPred/left")?, + or_pred_right: get_func("OrPred/right")?, + + equi_join_cond_cond: get_func("EquiJoinCond/cond")?, + equi_join_cond_left_col: get_func("EquiJoinCond/left_col")?, + equi_join_cond_right_col: get_func("EquiJoinCond/right_col")?, + cross_join_cond_cond: get_func("CrossJoinCond/cond")?, + + col_ref_wire: get_func("ColRef/wire")?, + col_ref_path: get_func("ColRef/path")?, + + here_path_path: get_func("HerePath/path")?, + left_path_path: get_func("LeftPath/path")?, + left_path_rest: get_func("LeftPath/rest")?, + right_path_path: get_func("RightPath/path")?, + right_path_rest: get_func("RightPath/rest")?, + }) + } +} + +/// Parsed operation from a RelAlgIR instance +#[derive(Debug, Clone)] +enum ParsedOp { + Scan { + sort_idx: usize, + out_wire: Slid, + }, + Filter { + in_wire: Slid, + out_wire: Slid, + pred: Slid, + }, + Distinct { + in_wire: Slid, + out_wire: Slid, + }, + Negate { + in_wire: Slid, + out_wire: Slid, + }, + Join { + left_wire: Slid, + right_wire: Slid, + out_wire: Slid, + cond: Slid, + }, + Union { + left_wire: Slid, + right_wire: Slid, + out_wire: Slid, + }, + Empty { + out_wire: Slid, + }, + Delay { + in_wire: Slid, + out_wire: Slid, + }, + Diff { + in_wire: Slid, + out_wire: Slid, + }, + Integrate { + in_wire: Slid, + out_wire: Slid, + }, +} + +impl ParsedOp { + fn out_wire(&self) -> Slid { + match self { + Self::Scan { out_wire, .. } + | Self::Filter { out_wire, .. } + | Self::Distinct { out_wire, .. } + | Self::Negate { out_wire, .. } + | Self::Join { out_wire, .. } + | Self::Union { out_wire, .. } + | Self::Empty { out_wire, .. } + | Self::Delay { out_wire, .. } + | Self::Diff { out_wire, .. } + | Self::Integrate { out_wire, .. } => *out_wire, + } + } + + fn in_wires(&self) -> Vec { + match self { + Self::Scan { .. } | Self::Empty { .. } => vec![], + Self::Filter { in_wire, .. } + | Self::Distinct { in_wire, .. } + | Self::Negate { in_wire, .. } + | Self::Delay { in_wire, .. } + | Self::Diff { in_wire, .. } + | Self::Integrate { in_wire, .. } => vec![*in_wire], + Self::Join { + left_wire, + right_wire, + .. + } + | Self::Union { + left_wire, + right_wire, + .. + } => vec![*left_wire, *right_wire], + } + } + + /// Returns true if this operation breaks instantaneous cycles + fn breaks_cycle(&self) -> bool { + matches!(self, Self::Delay { .. } | Self::Integrate { .. }) + } +} + +/// Parsed predicate from a RelAlgIR instance +#[derive(Debug, Clone)] +pub enum ParsedPred { + True, + False, + ColEq { left: usize, right: usize }, + ConstEq { col: usize, val: Slid }, + And(Box, Box), + Or(Box, Box), +} + +/// Parsed join condition +#[derive(Debug, Clone)] +pub enum ParsedJoinCond { + Cross, + Equi { left_col: usize, right_col: usize }, +} + +/// Context for interpreting RelAlgIR instances +struct InterpretContext<'a> { + /// The RelAlgIR instance being interpreted + instance: &'a RelAlgInstance, + /// RelAlgIR theory sort IDs + ids: RelAlgIds, + /// RelAlgIR theory function IDs + funcs: RelAlgFuncs, + /// Wire values during execution + wire_values: HashMap, + /// Target structure being queried + target: &'a Structure, +} + +impl<'a> InterpretContext<'a> { + fn new( + instance: &'a RelAlgInstance, + theory: &ElaboratedTheory, + target: &'a Structure, + ) -> Result { + Ok(Self { + instance, + ids: RelAlgIds::from_theory(theory)?, + funcs: RelAlgFuncs::from_theory(theory)?, + wire_values: HashMap::new(), + target, + }) + } + + /// Get function value for an element + fn get_func_value(&self, func_id: usize, elem: Slid) -> Option { + let structure = &self.instance.structure; + // Convert global Slid to sort-local index + let local_idx = structure.sort_local_id(elem).index(); + structure + .functions + .get(func_id) + .and_then(|f| get_slid(f.get_local(local_idx))) + } + + /// Get sort index from a GeologMeta/Srt element using the sort_mapping + fn get_srt_sort_idx(&self, srt_elem: Slid) -> Result { + self.instance + .sort_mapping + .get(&srt_elem) + .copied() + .ok_or_else(|| RelAlgError::InvalidOp(format!( + "Unknown Srt element {:?} - not in sort_mapping", + srt_elem + ))) + } + + /// Parse all operations from the instance + fn parse_operations(&self) -> Result, RelAlgError> { + let mut ops = Vec::new(); + let structure = &self.instance.structure; + + // Find all ScanOp elements + for elem_idx in structure.carriers[self.ids.scan_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let srt = self + .get_func_value(self.funcs.scan_op_srt, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ScanOp missing srt".into()))?; + let out_wire = self + .get_func_value(self.funcs.scan_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ScanOp missing out".into()))?; + + let sort_idx = self.get_srt_sort_idx(srt)?; + ops.push(ParsedOp::Scan { sort_idx, out_wire }); + } + + // Find all FilterOp elements + for elem_idx in structure.carriers[self.ids.filter_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.filter_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.filter_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing out".into()))?; + let pred = self + .get_func_value(self.funcs.filter_op_pred, elem) + .ok_or_else(|| RelAlgError::InvalidOp("FilterOp missing pred".into()))?; + + ops.push(ParsedOp::Filter { + in_wire, + out_wire, + pred, + }); + } + + // Find all DistinctOp elements + for elem_idx in structure.carriers[self.ids.distinct_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.distinct_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DistinctOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.distinct_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DistinctOp missing out".into()))?; + + ops.push(ParsedOp::Distinct { in_wire, out_wire }); + } + + // Find all NegateOp elements + for elem_idx in structure.carriers[self.ids.negate_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.negate_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("NegateOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.negate_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("NegateOp missing out".into()))?; + + ops.push(ParsedOp::Negate { in_wire, out_wire }); + } + + // Find all JoinOp elements + for elem_idx in structure.carriers[self.ids.join_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let left_wire = self + .get_func_value(self.funcs.join_op_left_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing left_in".into()))?; + let right_wire = self + .get_func_value(self.funcs.join_op_right_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing right_in".into()))?; + let out_wire = self + .get_func_value(self.funcs.join_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing out".into()))?; + let cond = self + .get_func_value(self.funcs.join_op_cond, elem) + .ok_or_else(|| RelAlgError::InvalidOp("JoinOp missing cond".into()))?; + + ops.push(ParsedOp::Join { + left_wire, + right_wire, + out_wire, + cond, + }); + } + + // Find all UnionOp elements + for elem_idx in structure.carriers[self.ids.union_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let left_wire = self + .get_func_value(self.funcs.union_op_left_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing left_in".into()))?; + let right_wire = self + .get_func_value(self.funcs.union_op_right_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing right_in".into()))?; + let out_wire = self + .get_func_value(self.funcs.union_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("UnionOp missing out".into()))?; + + ops.push(ParsedOp::Union { + left_wire, + right_wire, + out_wire, + }); + } + + // Find all EmptyOp elements + for elem_idx in structure.carriers[self.ids.empty_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let out_wire = self + .get_func_value(self.funcs.empty_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("EmptyOp missing out".into()))?; + + ops.push(ParsedOp::Empty { out_wire }); + } + + // Find all DelayOp elements + for elem_idx in structure.carriers[self.ids.delay_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.delay_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DelayOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.delay_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DelayOp missing out".into()))?; + + ops.push(ParsedOp::Delay { in_wire, out_wire }); + } + + // Find all DiffOp elements + for elem_idx in structure.carriers[self.ids.diff_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.diff_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DiffOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.diff_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("DiffOp missing out".into()))?; + + ops.push(ParsedOp::Diff { in_wire, out_wire }); + } + + // Find all IntegrateOp elements + for elem_idx in structure.carriers[self.ids.integrate_op].iter() { + let elem = Slid::from_usize(elem_idx as usize); + let in_wire = self + .get_func_value(self.funcs.integrate_op_in, elem) + .ok_or_else(|| RelAlgError::InvalidOp("IntegrateOp missing in".into()))?; + let out_wire = self + .get_func_value(self.funcs.integrate_op_out, elem) + .ok_or_else(|| RelAlgError::InvalidOp("IntegrateOp missing out".into()))?; + + ops.push(ParsedOp::Integrate { in_wire, out_wire }); + } + + Ok(ops) + } + + /// Parse a predicate element + fn parse_predicate(&self, pred: Slid) -> Result { + // Try to find which sort the predicate element belongs to + let structure = &self.instance.structure; + + // Check if it's TruePred + for elem_idx in structure.carriers[self.ids.true_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.true_pred_pred, elem) + && p == pred { + return Ok(ParsedPred::True); + } + } + + // Check if it's FalsePred + for elem_idx in structure.carriers[self.ids.false_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.false_pred_pred, elem) + && p == pred { + return Ok(ParsedPred::False); + } + } + + // Check if it's ColEqPred + for elem_idx in structure.carriers[self.ids.col_eq_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.col_eq_pred_pred, elem) + && p == pred { + let left_ref = self + .get_func_value(self.funcs.col_eq_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ColEqPred missing left".into()))?; + let right_ref = self + .get_func_value(self.funcs.col_eq_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ColEqPred missing right".into()))?; + + let left = self.parse_col_ref(left_ref)?; + let right = self.parse_col_ref(right_ref)?; + + return Ok(ParsedPred::ColEq { left, right }); + } + } + + // Check if it's ConstEqPred + for elem_idx in structure.carriers[self.ids.const_eq_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.const_eq_pred_pred, elem) + && p == pred { + let col_ref = self + .get_func_value(self.funcs.const_eq_pred_col, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ConstEqPred missing col".into()))?; + let elem_ref = self + .get_func_value(self.funcs.const_eq_pred_val, elem) + .ok_or_else(|| RelAlgError::InvalidOp("ConstEqPred missing val".into()))?; + + let col = self.parse_col_ref(col_ref)?; + + // Look up the original target value from the Elem element + let val = self.instance + .elem_value_mapping + .get(&elem_ref) + .copied() + .ok_or_else(|| RelAlgError::InvalidOp(format!( + "ConstEqPred val {:?} not in elem_value_mapping", + elem_ref + )))?; + + return Ok(ParsedPred::ConstEq { col, val }); + } + } + + // Check if it's AndPred + for elem_idx in structure.carriers[self.ids.and_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.and_pred_pred, elem) + && p == pred { + let left = self + .get_func_value(self.funcs.and_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("AndPred missing left".into()))?; + let right = self + .get_func_value(self.funcs.and_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("AndPred missing right".into()))?; + + let left_pred = self.parse_predicate(left)?; + let right_pred = self.parse_predicate(right)?; + + return Ok(ParsedPred::And(Box::new(left_pred), Box::new(right_pred))); + } + } + + // Check if it's OrPred + for elem_idx in structure.carriers[self.ids.or_pred].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.or_pred_pred, elem) + && p == pred { + let left = self + .get_func_value(self.funcs.or_pred_left, elem) + .ok_or_else(|| RelAlgError::InvalidOp("OrPred missing left".into()))?; + let right = self + .get_func_value(self.funcs.or_pred_right, elem) + .ok_or_else(|| RelAlgError::InvalidOp("OrPred missing right".into()))?; + + let left_pred = self.parse_predicate(left)?; + let right_pred = self.parse_predicate(right)?; + + return Ok(ParsedPred::Or(Box::new(left_pred), Box::new(right_pred))); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown predicate type for {:?}", + pred + ))) + } + + /// Parse a join condition element + fn parse_join_cond(&self, cond: Slid) -> Result { + let structure = &self.instance.structure; + + // Check if it's CrossJoinCond + for elem_idx in structure.carriers[self.ids.cross_join_cond].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(c) = self.get_func_value(self.funcs.cross_join_cond_cond, elem) + && c == cond { + return Ok(ParsedJoinCond::Cross); + } + } + + // Check if it's EquiJoinCond + for elem_idx in structure.carriers[self.ids.equi_join_cond].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(c) = self.get_func_value(self.funcs.equi_join_cond_cond, elem) + && c == cond { + let left_col_ref = self + .get_func_value(self.funcs.equi_join_cond_left_col, elem) + .ok_or_else(|| { + RelAlgError::InvalidOp("EquiJoinCond missing left_col".into()) + })?; + let right_col_ref = self + .get_func_value(self.funcs.equi_join_cond_right_col, elem) + .ok_or_else(|| { + RelAlgError::InvalidOp("EquiJoinCond missing right_col".into()) + })?; + + let left_col = self.parse_col_ref(left_col_ref)?; + let right_col = self.parse_col_ref(right_col_ref)?; + + return Ok(ParsedJoinCond::Equi { left_col, right_col }); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown join condition type for {:?}", + cond + ))) + } + + /// Parse a column reference to get the column index + fn parse_col_ref(&self, col_ref: Slid) -> Result { + // Get the path from the ColRef + let path = self + .get_func_value(self.funcs.col_ref_path, col_ref) + .ok_or_else(|| RelAlgError::InvalidOp("ColRef missing path".into()))?; + + self.parse_col_path(path) + } + + /// Parse a column path to get the column index + fn parse_col_path(&self, path: Slid) -> Result { + let structure = &self.instance.structure; + + // Check if it's HerePath (index 0) + for elem_idx in structure.carriers[self.ids.here_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.here_path_path, elem) + && p == path { + return Ok(0); + } + } + + // Check if it's LeftPath + for elem_idx in structure.carriers[self.ids.left_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.left_path_path, elem) + && p == path { + let rest = self + .get_func_value(self.funcs.left_path_rest, elem) + .ok_or_else(|| RelAlgError::InvalidOp("LeftPath missing rest".into()))?; + return self.parse_col_path(rest); + } + } + + // Check if it's RightPath + for elem_idx in structure.carriers[self.ids.right_path].iter() { + let elem = Slid::from_usize(elem_idx as usize); + if let Some(p) = self.get_func_value(self.funcs.right_path_path, elem) + && p == path { + let rest = self + .get_func_value(self.funcs.right_path_rest, elem) + .ok_or_else(|| RelAlgError::InvalidOp("RightPath missing rest".into()))?; + // Right path adds 1 to the column index + return Ok(1 + self.parse_col_path(rest)?); + } + } + + Err(RelAlgError::InvalidOp(format!( + "Unknown path type for {:?}", + path + ))) + } + + /// Topologically sort operations (respecting dependencies) + fn topological_sort(&self, ops: &[ParsedOp]) -> Result, RelAlgError> { + // Build output wire -> operation index map + let mut wire_to_op: HashMap = HashMap::new(); + for (idx, op) in ops.iter().enumerate() { + wire_to_op.insert(op.out_wire(), idx); + } + + // Build dependency graph + let mut in_degree: Vec = vec![0; ops.len()]; + let mut dependents: Vec> = vec![Vec::new(); ops.len()]; + + for (idx, op) in ops.iter().enumerate() { + for in_wire in op.in_wires() { + if let Some(&producer_idx) = wire_to_op.get(&in_wire) { + // Skip delay edges for cycle breaking + if !ops[producer_idx].breaks_cycle() { + in_degree[idx] += 1; + dependents[producer_idx].push(idx); + } + } + } + } + + // Kahn's algorithm + let mut queue: VecDeque = VecDeque::new(); + for (idx, °ree) in in_degree.iter().enumerate() { + if degree == 0 { + queue.push_back(idx); + } + } + + let mut sorted = Vec::new(); + while let Some(idx) = queue.pop_front() { + sorted.push(idx); + for &dep_idx in &dependents[idx] { + in_degree[dep_idx] -= 1; + if in_degree[dep_idx] == 0 { + queue.push_back(dep_idx); + } + } + } + + if sorted.len() != ops.len() { + return Err(RelAlgError::InstantaneousCycle); + } + + Ok(sorted) + } + + /// Execute a single operation + fn execute_op(&mut self, op: &ParsedOp) -> Result { + match op { + ParsedOp::Scan { sort_idx, .. } => { + // Emit all elements of the sort as singleton tuples + let mut result = Bag::new(); + if let Some(carrier) = self.target.carriers.get(*sort_idx) { + for elem in carrier.iter() { + let tuple = vec![Slid::from_usize(elem as usize)]; + result.insert(tuple, 1); + } + } + Ok(result) + } + + ParsedOp::Filter { + in_wire, + pred, + .. + } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| RelAlgError::InvalidOp("Filter input wire not found".into()))? + .clone(); + + let parsed_pred = self.parse_predicate(*pred)?; + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + if self.evaluate_predicate(&parsed_pred, tuple)? { + result.insert(tuple.clone(), *mult); + } + } + Ok(result) + } + + ParsedOp::Distinct { in_wire, .. } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Distinct input wire not found".into()) + })? + .clone(); + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + if *mult > 0 { + result.insert(tuple.clone(), 1); + } + } + Ok(result) + } + + ParsedOp::Negate { in_wire, .. } => { + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Negate input wire not found".into()) + })? + .clone(); + + let mut result = Bag::new(); + for (tuple, mult) in input.iter() { + result.insert(tuple.clone(), -mult); + } + Ok(result) + } + + ParsedOp::Join { + left_wire, + right_wire, + cond, + .. + } => { + let left = self + .wire_values + .get(left_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Join left input wire not found".into()) + })? + .clone(); + let right = self + .wire_values + .get(right_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Join right input wire not found".into()) + })? + .clone(); + + let parsed_cond = self.parse_join_cond(*cond)?; + + let mut result = Bag::new(); + + match parsed_cond { + ParsedJoinCond::Cross => { + // Cartesian product + for (l_tuple, l_mult) in left.iter() { + for (r_tuple, r_mult) in right.iter() { + let mut joined = l_tuple.clone(); + joined.extend(r_tuple.iter().cloned()); + result.insert(joined, l_mult * r_mult); + } + } + } + ParsedJoinCond::Equi { left_col, right_col } => { + // Hash join + let mut right_index: HashMap, i64)>> = HashMap::new(); + for (r_tuple, r_mult) in right.iter() { + if let Some(&key) = r_tuple.get(right_col) { + right_index.entry(key).or_default().push((r_tuple, *r_mult)); + } + } + + for (l_tuple, l_mult) in left.iter() { + if let Some(&key) = l_tuple.get(left_col) + && let Some(matches) = right_index.get(&key) { + for (r_tuple, r_mult) in matches { + let mut joined = l_tuple.clone(); + joined.extend(r_tuple.iter().cloned()); + result.insert(joined, l_mult * r_mult); + } + } + } + } + } + + Ok(result) + } + + ParsedOp::Union { + left_wire, + right_wire, + .. + } => { + let left = self + .wire_values + .get(left_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Union left input wire not found".into()) + })? + .clone(); + let right = self + .wire_values + .get(right_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Union right input wire not found".into()) + })? + .clone(); + + let mut result = left; + for (tuple, mult) in right.iter() { + result.insert(tuple.clone(), result.tuples.get(tuple).unwrap_or(&0) + mult); + } + Ok(result) + } + + ParsedOp::Empty { .. } => Ok(Bag::new()), + + ParsedOp::Delay { in_wire, .. } => { + // For non-streaming execution, delay is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Delay input wire not found".into()) + })? + .clone(); + Ok(input) + } + + ParsedOp::Diff { in_wire, .. } => { + // For non-streaming execution, diff is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Diff input wire not found".into()) + })? + .clone(); + Ok(input) + } + + ParsedOp::Integrate { in_wire, .. } => { + // For non-streaming execution, integrate is identity + let input = self + .wire_values + .get(in_wire) + .ok_or_else(|| { + RelAlgError::InvalidOp("Integrate input wire not found".into()) + })? + .clone(); + Ok(input) + } + } + } + + /// Evaluate a predicate on a tuple + #[allow(clippy::only_used_in_recursion)] + fn evaluate_predicate(&self, pred: &ParsedPred, tuple: &[Slid]) -> Result { + match pred { + ParsedPred::True => Ok(true), + ParsedPred::False => Ok(false), + ParsedPred::ColEq { left, right } => { + let l = tuple.get(*left); + let r = tuple.get(*right); + Ok(l.is_some() && l == r) + } + ParsedPred::ConstEq { col, val } => { + let c = tuple.get(*col); + Ok(c == Some(val)) + } + ParsedPred::And(left, right) => { + Ok(self.evaluate_predicate(left, tuple)? + && self.evaluate_predicate(right, tuple)?) + } + ParsedPred::Or(left, right) => { + Ok(self.evaluate_predicate(left, tuple)? + || self.evaluate_predicate(right, tuple)?) + } + } + } +} + +/// Execute a RelAlgIR instance against a target structure +/// +/// # Arguments +/// * `instance` - The RelAlgIR instance representing the query plan +/// * `relalg_theory` - The RelAlgIR theory +/// * `target` - The structure to query +/// * `output_wire_name` - Name of the output wire (defaults to "output") +/// +/// # Returns +/// The query result as a Z-set +pub fn execute_relalg( + instance: &RelAlgInstance, + relalg_theory: &ElaboratedTheory, + target: &Structure, + output_wire_name: Option<&str>, +) -> Result { + let mut ctx = InterpretContext::new(instance, relalg_theory, target)?; + + // Parse all operations + let ops = ctx.parse_operations()?; + + if ops.is_empty() { + return Ok(Bag::new()); + } + + // Find output wire - use instance.output_wire by default, or look up by name + let output_wire = if let Some(name) = output_wire_name { + instance + .names + .iter() + .find(|(_, n)| *n == name) + .map(|(slid, _)| *slid) + .ok_or(RelAlgError::NoOutputWire)? + } else { + instance.output_wire + }; + + // Topologically sort operations + let sorted = ctx.topological_sort(&ops)?; + + // Execute in order + for &idx in &sorted { + let result = ctx.execute_op(&ops[idx])?; + let out_wire = ops[idx].out_wire(); + ctx.wire_values.insert(out_wire, result); + } + + // Return output wire value + ctx.wire_values + .remove(&output_wire) + .ok_or(RelAlgError::NoOutputWire) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parsed_op_in_wires() { + let scan = ParsedOp::Scan { + sort_idx: 0, + out_wire: Slid::from_usize(0), + }; + assert!(scan.in_wires().is_empty()); + + let filter = ParsedOp::Filter { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + pred: Slid::from_usize(2), + }; + assert_eq!(filter.in_wires(), vec![Slid::from_usize(0)]); + + let join = ParsedOp::Join { + left_wire: Slid::from_usize(0), + right_wire: Slid::from_usize(1), + out_wire: Slid::from_usize(2), + cond: Slid::from_usize(3), + }; + assert_eq!( + join.in_wires(), + vec![Slid::from_usize(0), Slid::from_usize(1)] + ); + } + + #[test] + fn test_parsed_op_breaks_cycle() { + let scan = ParsedOp::Scan { + sort_idx: 0, + out_wire: Slid::from_usize(0), + }; + assert!(!scan.breaks_cycle()); + + let delay = ParsedOp::Delay { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + }; + assert!(delay.breaks_cycle()); + + let integrate = ParsedOp::Integrate { + in_wire: Slid::from_usize(0), + out_wire: Slid::from_usize(1), + }; + assert!(integrate.breaks_cycle()); + } +} diff --git a/src/query/mod.rs b/src/query/mod.rs new file mode 100644 index 0000000..563d9f2 --- /dev/null +++ b/src/query/mod.rs @@ -0,0 +1,43 @@ +//! Query engine for geolog. +//! +//! **Semantics:** Queries are theory extensions. The result is the set of maximal +//! elements in the posetal reflection of well-formed Ext_M(T') — the category +//! of T'-extensions of base model M. +//! +//! See `loose_thoughts/2026-01-19_18:15_query_semantics.md` for full design. +//! +//! # Query Styles +//! +//! - **∃-style (closed sorts):** New sorts with declared constants. +//! Well-formedness requires exactly those constants exist. +//! Maximal elements = one per valid witness assignment. +//! Implementation: constraint satisfaction. +//! +//! - **∀-style (open sorts):** New sorts with no constants, constrained by +//! universal axioms. Bounded by constraint, posetal reflection identifies +//! observationally-equivalent duplicates. +//! Unique maximal element = cofree model. +//! Implementation: relational algebra / Datalog. +//! +//! # Implementation Phases +//! +//! 1. **Open sort computation** - what bootstrap_queries does manually +//! 2. **Closed sort enumeration** - constraint satisfaction +//! 3. **Chase for derived relations** - semi-naive fixpoint +//! 4. **Mixed queries** - combine both + +mod pattern; +mod exec; +pub mod backend; +pub mod optimize; +pub mod compile; +mod store_queries; +pub mod to_relalg; +pub mod from_relalg; +pub mod chase; + +pub use pattern::{Pattern, Constraint, Projection}; +pub use exec::{QueryResult, execute_pattern}; +pub use backend::{Bag, QueryOp, Predicate, JoinCond, execute, execute_optimized, StreamContext, execute_stream}; +pub use optimize::optimize; +pub use compile::{Query, QueryBuilder, compile_simple_filter, compile_filter_project}; diff --git a/src/query/optimize.rs b/src/query/optimize.rs new file mode 100644 index 0000000..69b69e6 --- /dev/null +++ b/src/query/optimize.rs @@ -0,0 +1,308 @@ +//! Query optimizer using algebraic laws. +//! +//! Applies rewrite rules corresponding to the algebraic laws defined in +//! RelAlgIR.geolog to transform query plans into more efficient forms. +//! +//! This is a simple "obviously correct" optimizer: +//! - Single-pass bottom-up rewriting +//! - No cost model (just simplification) +//! - Validated by proptests against the naive backend +//! +//! Key rewrites: +//! - Filter(True, x) → x +//! - Filter(False, x) → Empty +//! - Filter(p, Filter(q, x)) → Filter(And(p, q), x) +//! - Distinct(Distinct(x)) → Distinct(x) +//! - Union(x, Empty) → x +//! - Union(Empty, x) → x +//! - Negate(Negate(x)) → x +//! - Join(x, Empty) → Empty +//! - Join(Empty, x) → Empty + +use super::backend::{Predicate, QueryOp}; + +/// Optimize a query plan by applying algebraic laws. +/// +/// Returns an equivalent plan that may be more efficient to execute. +/// The optimization is semantics-preserving: optimize(p) produces the +/// same results as p for any structure. +pub fn optimize(plan: &QueryOp) -> QueryOp { + // Bottom-up: optimize children first, then apply rules + let optimized_children = optimize_children(plan); + apply_rules(optimized_children) +} + +/// Recursively optimize all children of a plan node. +fn optimize_children(plan: &QueryOp) -> QueryOp { + match plan { + QueryOp::Scan { sort_idx } => QueryOp::Scan { sort_idx: *sort_idx }, + + QueryOp::ScanRelation { rel_id } => QueryOp::ScanRelation { rel_id: *rel_id }, + + QueryOp::Filter { input, pred } => QueryOp::Filter { + input: Box::new(optimize(input)), + pred: pred.clone(), + }, + + QueryOp::Project { input, columns } => QueryOp::Project { + input: Box::new(optimize(input)), + columns: columns.clone(), + }, + + QueryOp::Join { left, right, cond } => QueryOp::Join { + left: Box::new(optimize(left)), + right: Box::new(optimize(right)), + cond: cond.clone(), + }, + + QueryOp::Union { left, right } => QueryOp::Union { + left: Box::new(optimize(left)), + right: Box::new(optimize(right)), + }, + + QueryOp::Distinct { input } => QueryOp::Distinct { + input: Box::new(optimize(input)), + }, + + QueryOp::Negate { input } => QueryOp::Negate { + input: Box::new(optimize(input)), + }, + + QueryOp::Constant { tuple } => QueryOp::Constant { tuple: tuple.clone() }, + + QueryOp::Empty => QueryOp::Empty, + + QueryOp::Apply { input, func_idx, arg_col } => QueryOp::Apply { + input: Box::new(optimize(input)), + func_idx: *func_idx, + arg_col: *arg_col, + }, + + QueryOp::ApplyField { input, func_idx, arg_col, field_name } => QueryOp::ApplyField { + input: Box::new(optimize(input)), + func_idx: *func_idx, + arg_col: *arg_col, + field_name: field_name.clone(), + }, + + // DBSP temporal operators: optimize children, preserve state_id + QueryOp::Delay { input, state_id } => QueryOp::Delay { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + + QueryOp::Diff { input, state_id } => QueryOp::Diff { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + + QueryOp::Integrate { input, state_id } => QueryOp::Integrate { + input: Box::new(optimize(input)), + state_id: *state_id, + }, + } +} + +/// Apply algebraic rewrite rules to a plan node. +/// Assumes children are already optimized. +fn apply_rules(plan: QueryOp) -> QueryOp { + match plan { + // ============================================================ + // Filter Laws + // ============================================================ + + // Filter(True, x) → x + QueryOp::Filter { input, pred: Predicate::True } => *input, + + // Filter(False, x) → Empty + QueryOp::Filter { pred: Predicate::False, .. } => QueryOp::Empty, + + // Filter(p, Filter(q, x)) → Filter(And(p, q), x) + QueryOp::Filter { input, pred: outer_pred } => { + if let QueryOp::Filter { input: inner_input, pred: inner_pred } = *input { + QueryOp::Filter { + input: inner_input, + pred: Predicate::And( + Box::new(outer_pred), + Box::new(inner_pred), + ), + } + } else { + QueryOp::Filter { + input: Box::new(*input), + pred: outer_pred, + } + } + } + + // ============================================================ + // Distinct Laws + // ============================================================ + + // Distinct(Distinct(x)) → Distinct(x) + QueryOp::Distinct { input } => { + if matches!(*input, QueryOp::Distinct { .. }) { + *input + } else { + QueryOp::Distinct { input } + } + } + + // ============================================================ + // Union Laws + // ============================================================ + + // Union(x, Empty) → x + // Union(Empty, x) → x + QueryOp::Union { left, right } => { + match (&*left, &*right) { + (QueryOp::Empty, _) => *right, + (_, QueryOp::Empty) => *left, + _ => QueryOp::Union { left, right }, + } + } + + // ============================================================ + // Negate Laws + // ============================================================ + + // Negate(Negate(x)) → x + QueryOp::Negate { input } => { + if let QueryOp::Negate { input: inner } = *input { + *inner + } else { + QueryOp::Negate { input } + } + } + + // ============================================================ + // Join Laws + // ============================================================ + + // Join(x, Empty) → Empty + // Join(Empty, x) → Empty + QueryOp::Join { left, right, cond } => { + if matches!(*left, QueryOp::Empty) || matches!(*right, QueryOp::Empty) { + QueryOp::Empty + } else { + QueryOp::Join { left, right, cond } + } + } + + // No rewrite applies + other => other, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::query::backend::JoinCond; + use crate::id::{NumericId, Slid}; + + #[test] + fn test_filter_true_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::True, + }; + let optimized = optimize(&filter); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_filter_false_to_empty() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + let optimized = optimize(&filter); + assert!(matches!(optimized, QueryOp::Empty)); + } + + #[test] + fn test_filter_fusion() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter1 = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::ColEqConst { col: 0, val: Slid::from_usize(1) }, + }; + let filter2 = QueryOp::Filter { + input: Box::new(filter1), + pred: Predicate::ColEqConst { col: 0, val: Slid::from_usize(2) }, + }; + let optimized = optimize(&filter2); + + // Should be a single filter with And predicate + if let QueryOp::Filter { pred: Predicate::And(_, _), .. } = optimized { + // Good! + } else { + panic!("Expected fused filter with And predicate, got {:?}", optimized); + } + } + + #[test] + fn test_distinct_idempotent() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let distinct1 = QueryOp::Distinct { + input: Box::new(scan), + }; + let distinct2 = QueryOp::Distinct { + input: Box::new(distinct1.clone()), + }; + let optimized = optimize(&distinct2); + + // Should be single distinct + if let QueryOp::Distinct { input } = optimized { + assert!(matches!(*input, QueryOp::Scan { .. })); + } else { + panic!("Expected Distinct, got {:?}", optimized); + } + } + + #[test] + fn test_union_empty_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let union = QueryOp::Union { + left: Box::new(scan.clone()), + right: Box::new(QueryOp::Empty), + }; + let optimized = optimize(&union); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + + // Also test left empty + let union2 = QueryOp::Union { + left: Box::new(QueryOp::Empty), + right: Box::new(scan), + }; + let optimized2 = optimize(&union2); + assert!(matches!(optimized2, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_negate_involution() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + let optimized = optimize(&negate2); + assert!(matches!(optimized, QueryOp::Scan { sort_idx: 0 })); + } + + #[test] + fn test_join_empty_elimination() { + let scan = QueryOp::Scan { sort_idx: 0 }; + let join = QueryOp::Join { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + cond: JoinCond::Cross, + }; + let optimized = optimize(&join); + assert!(matches!(optimized, QueryOp::Empty)); + } +} diff --git a/src/query/pattern.rs b/src/query/pattern.rs new file mode 100644 index 0000000..60cb4a6 --- /dev/null +++ b/src/query/pattern.rs @@ -0,0 +1,171 @@ +//! Pattern-based query representation. +//! +//! This represents the common pattern from bootstrap_queries: +//! "find all X : Sort where X.func₁ = Y₁ ∧ X.func₂ = Y₂ ∧ ..." +//! +//! In query semantics terms, this is an ∀-style query with an open result sort: +//! ```text +//! theory Query extends Base { +//! Result : Sort; // Open (no constants) +//! elem : Result → Sort; // Projection to base +//! axiom { r : Result ⊢ elem(r).func₁ = Y₁ ∧ elem(r).func₂ = Y₂ } +//! } +//! ``` +//! +//! The unique maximal element (cofree model) is the set of all elements +//! satisfying the constraint. + +use crate::id::Slid; + +/// A pattern query: find all elements of a sort matching constraints. +/// +/// Equivalent to SQL: `SELECT elem FROM Sort WHERE func₁(elem) = v₁ AND ...` +/// +/// Uses `usize` for sort/function IDs (internal indices) and `Slid` for +/// element values (external references). +#[derive(Debug, Clone)] +pub struct Pattern { + /// The sort to scan (sort index) + pub source_sort: usize, + /// Constraints: each is (func_index, expected_value) + pub constraints: Vec, + /// What to project/return + pub projection: Projection, +} + +/// A constraint: func(elem) must equal expected_value +#[derive(Debug, Clone)] +pub struct Constraint { + /// Function index to apply to the scanned element + pub func: usize, + /// Expected value (must match) + pub expected: Slid, +} + +/// What to return from the query +#[derive(Debug, Clone)] +pub enum Projection { + /// Return the element itself + Element, + /// Return the value of a function applied to the element + Func(usize), + /// Return a tuple of function values + Tuple(Vec), +} + +impl Pattern { + /// Create a new pattern query. + /// + /// # Example + /// + /// ```ignore + /// // Find all Srt where Srt.theory == theory_slid + /// let pattern = Pattern::new(store.sort_ids.srt.unwrap()) + /// .filter(store.func_ids.srt_theory.unwrap(), theory_slid); + /// ``` + pub fn new(source_sort: usize) -> Self { + Self { + source_sort, + constraints: Vec::new(), + projection: Projection::Element, + } + } + + /// Add a constraint: func(elem) must equal value. + pub fn filter(mut self, func: usize, value: Slid) -> Self { + self.constraints.push(Constraint { + func, + expected: value, + }); + self + } + + /// Project a function value instead of the element. + pub fn project(mut self, func: usize) -> Self { + self.projection = Projection::Func(func); + self + } + + /// Project a tuple of function values. + pub fn project_tuple(mut self, funcs: Vec) -> Self { + self.projection = Projection::Tuple(funcs); + self + } +} + +// ============================================================================ +// Pattern → QueryOp Compilation +// ============================================================================ + +use super::backend::{QueryOp, Predicate}; + +impl Pattern { + /// Compile a Pattern into a QueryOp for the naive backend. + /// + /// A Pattern query: + /// 1. Scans all elements of source_sort + /// 2. Filters by constraints: func(elem) = expected for each constraint + /// 3. Projects according to projection type + /// + /// We implement this as: + /// - Scan → single-column tuples (elem) + /// - For each constraint, use FuncEqConst predicate + /// - Project to requested columns + pub fn compile(&self) -> QueryOp { + // Start with a scan of the sort + let mut plan = QueryOp::Scan { sort_idx: self.source_sort }; + + // Apply constraints as filters + // Each constraint checks: func(elem) = expected + for constraint in &self.constraints { + plan = QueryOp::Filter { + input: Box::new(plan), + pred: Predicate::FuncEqConst { + func_idx: constraint.func, + arg_col: 0, // The scanned element is always in column 0 + expected: constraint.expected, + }, + }; + } + + // Apply projection + match &self.projection { + Projection::Element => { + // Already have the element in col 0, no change needed + } + Projection::Func(func_idx) => { + // Apply function to element, return that instead + // This requires an Apply operation + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func_idx, + arg_col: 0, + }; + // Now we have (elem, func(elem)), project to just col 1 + plan = QueryOp::Project { + input: Box::new(plan), + columns: vec![1], + }; + } + Projection::Tuple(func_indices) => { + // Apply each function in sequence, then project + for func_idx in func_indices.iter() { + plan = QueryOp::Apply { + input: Box::new(plan), + func_idx: *func_idx, + arg_col: 0, // Always apply to original element + }; + } + // Now we have (elem, f1(elem), f2(elem), ...), project to func results + // Columns 1, 2, ... are the func results + let columns: Vec = (1..=func_indices.len()).collect(); + plan = QueryOp::Project { + input: Box::new(plan), + columns, + }; + } + } + + plan + } +} diff --git a/src/query/store_queries.rs b/src/query/store_queries.rs new file mode 100644 index 0000000..63a46d3 --- /dev/null +++ b/src/query/store_queries.rs @@ -0,0 +1,672 @@ +//! Store query integration: using compiled queries to replace bootstrap_queries. +//! +//! This module provides query methods on Store that use the compiled Query API +//! instead of handcoded iterations. It demonstrates that the Query compiler +//! can replace bootstrap_queries.rs. +//! +//! # Migration Path +//! +//! 1. First, create query versions here that match bootstrap_queries behavior +//! 2. Add tests that validate both produce same results +//! 3. Once validated, swap implementations in bootstrap_queries +//! 4. Eventually deprecate bootstrap_queries in favor of these +//! +//! # Example +//! +//! ```ignore +//! // Old: bootstrap_queries.rs +//! for srt_slid in self.elements_of_sort(srt_sort) { +//! if self.get_func(theory_func, srt_slid) == Some(theory_slid) { ... } +//! } +//! +//! // New: store_queries.rs using Query compiler +//! let plan = Query::scan(srt_sort) +//! .filter_eq(theory_func, 0, theory_slid) +//! .compile(); +//! let result = execute(&plan, &store.meta); +//! ``` + +use crate::core::DerivedSort; +use crate::id::{NumericId, Slid, Uuid}; +use crate::store::Store; +use crate::store::append::AppendOps; +use crate::store::bootstrap_queries::{SortInfo, FuncInfo, RelInfo, ElemInfo, FuncValInfo, RelTupleInfo}; +use super::backend::execute; +use super::compile::compile_simple_filter; + +impl Store { + /// Get the UUID for an element in GeologMeta by its Slid. + /// Used for deterministic ordering: UUIDs v7 are time-ordered. + pub fn get_element_uuid(&self, slid: Slid) -> Uuid { + if let Some(&luid) = self.meta.luids.get(slid.index()) { + self.universe.get(luid).unwrap_or(Uuid::nil()) + } else { + Uuid::nil() + } + } + + /// Query all sorts belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_sorts` in bootstrap_queries.rs, + /// but uses the Query compiler instead of handcoded iteration. + pub fn query_theory_sorts_compiled(&self, theory_slid: Slid) -> Vec { + let Some(srt_sort) = self.sort_ids.srt else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.srt_theory else { + return vec![]; + }; + + // Compile and execute the query + let plan = compile_simple_filter(srt_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to SortInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&srt_slid) = tuple.first() { + let name = self.get_element_name(srt_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + infos.push(SortInfo { + name: short_name, + slid: srt_slid, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + // (UUIDs v7 are time-ordered, so earlier-created elements come first) + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all functions belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_funcs` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_theory_funcs_compiled(&self, theory_slid: Slid) -> Vec { + let Some(func_sort) = self.sort_ids.func else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.func_theory else { + return vec![]; + }; + let Some(dom_func) = self.func_ids.func_dom else { + return vec![]; + }; + let Some(cod_func) = self.func_ids.func_cod else { + return vec![]; + }; + + // Compile and execute the query to find matching functions + let plan = compile_simple_filter(func_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to FuncInfo (with domain/codomain lookups) + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&func_slid) = tuple.first() { + let name = self.get_element_name(func_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + // Get domain and codomain DSorts (using bootstrap logic) + let domain = self + .get_func(dom_func, func_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + let codomain = self + .get_func(cod_func, func_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + infos.push(FuncInfo { + name: short_name, + slid: func_slid, + domain, + codomain, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all relations belonging to a theory (using compiled query engine). + /// + /// This is equivalent to `query_theory_rels` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_theory_rels_compiled(&self, theory_slid: Slid) -> Vec { + let Some(rel_sort) = self.sort_ids.rel else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.rel_theory else { + return vec![]; + }; + let Some(dom_func) = self.func_ids.rel_dom else { + return vec![]; + }; + + // Compile and execute the query to find matching relations + let plan = compile_simple_filter(rel_sort, theory_func, theory_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to RelInfo (with domain lookup) + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&rel_slid) = tuple.first() { + let name = self.get_element_name(rel_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + // Get domain DSort (using bootstrap logic) + let domain = self + .get_func(dom_func, rel_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + infos.push(RelInfo { + name: short_name, + slid: rel_slid, + domain, + }); + } + } + // Sort by UUID to ensure deterministic order matching original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + // ======================================================================== + // Instance queries (compiled versions) + // ======================================================================== + + /// Query all elements belonging to an instance (using compiled query engine). + /// + /// This is equivalent to `query_instance_elems` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_instance_elems_compiled(&self, instance_slid: Slid) -> Vec { + let Some(elem_sort) = self.sort_ids.elem else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.elem_instance else { + return vec![]; + }; + let Some(sort_func) = self.func_ids.elem_sort else { + return vec![]; + }; + + // Compile and execute the query to find matching elements + let plan = compile_simple_filter(elem_sort, instance_func, instance_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to ElemInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&elem_slid) = tuple.first() { + let name = self.get_element_name(elem_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + let srt_slid = self.get_func(sort_func, elem_slid); + + infos.push(ElemInfo { + name: short_name, + slid: elem_slid, + srt_slid, + }); + } + } + // Sort by UUID to preserve original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all function values in an instance (using compiled query engine). + /// + /// This is equivalent to `query_instance_func_vals` in bootstrap_queries.rs, + /// but uses the Query compiler for the initial scan+filter. + pub fn query_instance_func_vals_compiled(&self, instance_slid: Slid) -> Vec { + let Some(fv_sort) = self.sort_ids.func_val else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.func_val_instance else { + return vec![]; + }; + let Some(func_func) = self.func_ids.func_val_func else { + return vec![]; + }; + let Some(arg_func) = self.func_ids.func_val_arg else { + return vec![]; + }; + let Some(result_func) = self.func_ids.func_val_result else { + return vec![]; + }; + + // Compile and execute the query + let plan = compile_simple_filter(fv_sort, instance_func, instance_slid); + let result = execute(&plan, &self.meta); + + // Convert query results to FuncValInfo + let mut infos = Vec::new(); + for tuple in result.tuples.keys() { + if let Some(&fv_slid) = tuple.first() { + infos.push(FuncValInfo { + slid: fv_slid, + func_slid: self.get_func(func_func, fv_slid), + arg_slid: self.get_func(arg_func, fv_slid), + result_slid: self.get_func(result_func, fv_slid), + }); + } + } + // Sort by UUID to preserve original creation order + infos.sort_by_key(|info| self.get_element_uuid(info.slid)); + infos + } + + /// Query all relation tuples in an instance. + /// + /// NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + /// not as individual GeologMeta elements. This function returns empty until + /// columnar batch loading is implemented. + /// + /// TODO: Implement columnar batch loading for relation tuples. + pub fn query_instance_rel_tuples_compiled(&self, _instance_slid: Slid) -> Vec { + // Relation tuples are stored in columnar batches, not GeologMeta elements. + // Return empty until columnar batch loading is implemented. + vec![] + } +} + +#[cfg(test)] +mod tests { + use crate::repl::ReplState; + + /// Test that compiled query matches bootstrap query results. + #[test] + fn test_compiled_matches_bootstrap_sorts() { + let source = r#" + theory TestTheory { + A : Sort; + B : Sort; + C : Sort; + f : A -> B; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("TestTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_sorts(theory_slid); + let compiled = repl.store.query_theory_sorts_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} sorts, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|s| &s.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|s| &s.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Sort names should match"); + } + + /// Test compiled query with theory that has no sorts. + #[test] + fn test_compiled_empty_theory() { + let source = r#" + theory EmptyTheory { + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("EmptyTheory") + .expect("Theory should exist").0; + + let bootstrap = repl.store.query_theory_sorts(theory_slid); + let compiled = repl.store.query_theory_sorts_compiled(theory_slid); + + assert_eq!(bootstrap.len(), 0); + assert_eq!(compiled.len(), 0); + } + + /// Test that multiple theories have independent sorts. + #[test] + fn test_compiled_multiple_theories() { + let source = r#" + theory Theory1 { + X : Sort; + Y : Sort; + } + theory Theory2 { + P : Sort; + Q : Sort; + R : Sort; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory1_slid = repl.store.resolve_name("Theory1") + .expect("Theory1 should exist").0; + let theory2_slid = repl.store.resolve_name("Theory2") + .expect("Theory2 should exist").0; + + // Theory1 should have X, Y + let t1_bootstrap = repl.store.query_theory_sorts(theory1_slid); + let t1_compiled = repl.store.query_theory_sorts_compiled(theory1_slid); + + assert_eq!(t1_bootstrap.len(), 2); + assert_eq!(t1_compiled.len(), 2); + + // Theory2 should have P, Q, R + let t2_bootstrap = repl.store.query_theory_sorts(theory2_slid); + let t2_compiled = repl.store.query_theory_sorts_compiled(theory2_slid); + + assert_eq!(t2_bootstrap.len(), 3); + assert_eq!(t2_compiled.len(), 3); + + // Names should be independent + let t1_names: std::collections::HashSet<_> = + t1_compiled.iter().map(|s| &s.name).collect(); + let t2_names: std::collections::HashSet<_> = + t2_compiled.iter().map(|s| &s.name).collect(); + + assert!(t1_names.contains(&"X".to_string())); + assert!(t1_names.contains(&"Y".to_string())); + assert!(t2_names.contains(&"P".to_string())); + assert!(t2_names.contains(&"Q".to_string())); + assert!(t2_names.contains(&"R".to_string())); + } + + /// Test that compiled query matches bootstrap query for functions. + #[test] + fn test_compiled_matches_bootstrap_funcs() { + let source = r#" + theory FuncTheory { + A : Sort; + B : Sort; + C : Sort; + f : A -> B; + g : B -> C; + h : A -> C; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("FuncTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_funcs(theory_slid); + let compiled = repl.store.query_theory_funcs_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} funcs, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|f| &f.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|f| &f.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Function names should match"); + + // Verify we have the expected functions + assert!(compiled_names.contains(&&"f".to_string())); + assert!(compiled_names.contains(&&"g".to_string())); + assert!(compiled_names.contains(&&"h".to_string())); + } + + /// Test that compiled query matches bootstrap query for relations. + #[test] + fn test_compiled_matches_bootstrap_rels() { + let source = r#" + theory RelTheory { + Node : Sort; + Source : Node -> Prop; + Sink : Node -> Prop; + Connected : [x: Node, y: Node] -> Prop; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let theory_slid = repl.store.resolve_name("RelTheory") + .expect("Theory should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_theory_rels(theory_slid); + let compiled = repl.store.query_theory_rels_compiled(theory_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} rels, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|r| &r.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|r| &r.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Relation names should match"); + + // Verify we have the expected relations + assert!(compiled_names.contains(&&"Source".to_string())); + assert!(compiled_names.contains(&&"Sink".to_string())); + assert!(compiled_names.contains(&&"Connected".to_string())); + } + + // ======================================================================== + // Instance query tests + // ======================================================================== + + /// Test that compiled query matches bootstrap for instance elements. + #[test] + fn test_compiled_matches_bootstrap_instance_elems() { + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + + instance SimpleGraph : Graph = { + a : V; + b : V; + c : V; + e1 : E; + e2 : E; + e1 src = a; + e1 tgt = b; + e2 src = b; + e2 tgt = c; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("SimpleGraph") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_instance_elems(instance_slid); + let compiled = repl.store.query_instance_elems_compiled(instance_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} elems, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Should have 5 elements: a, b, c, e1, e2 + assert_eq!(compiled.len(), 5, "Expected 5 elements"); + + // Same names (order may differ) + let mut bootstrap_names: Vec<_> = bootstrap.iter().map(|e| &e.name).collect(); + let mut compiled_names: Vec<_> = compiled.iter().map(|e| &e.name).collect(); + bootstrap_names.sort(); + compiled_names.sort(); + + assert_eq!(bootstrap_names, compiled_names, "Element names should match"); + } + + /// Test that compiled query matches bootstrap for function values. + #[test] + fn test_compiled_matches_bootstrap_func_vals() { + let source = r#" + theory Graph { + V : Sort; + E : Sort; + src : E -> V; + tgt : E -> V; + } + + instance TwoEdges : Graph = { + v1 : V; + v2 : V; + v3 : V; + edge1 : E; + edge2 : E; + edge1 src = v1; + edge1 tgt = v2; + edge2 src = v2; + edge2 tgt = v3; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("TwoEdges") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled + let bootstrap = repl.store.query_instance_func_vals(instance_slid); + let compiled = repl.store.query_instance_func_vals_compiled(instance_slid); + + // Same number of results + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} func_vals, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Should have 4 function values: edge1.src, edge1.tgt, edge2.src, edge2.tgt + assert_eq!(compiled.len(), 4, "Expected 4 function values"); + } + + /// Test that compiled query matches bootstrap for relation tuples. + /// + /// NOTE: Relation tuples are now stored in columnar batches (see store::columnar), + /// not as individual GeologMeta elements. The bootstrap and compiled queries + /// for RelTuple elements return empty since we no longer create those elements. + /// + /// Relation tuple data is now accessed via `Store::load_instance_data_batches()`. + #[test] + fn test_compiled_matches_bootstrap_rel_tuples() { + let source = r#" + theory NodeMarking { + Node : Sort; + Marked : [n: Node] -> Prop; + } + + instance ThreeNodes : NodeMarking = { + n1 : Node; + n2 : Node; + n3 : Node; + [n: n1] Marked; + [n: n3] Marked; + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("ThreeNodes") + .expect("Instance should exist").0; + + // Compare bootstrap vs compiled - both should return empty now + // since relation tuples are stored in columnar batches, not GeologMeta + let bootstrap = repl.store.query_instance_rel_tuples(instance_slid); + let compiled = repl.store.query_instance_rel_tuples_compiled(instance_slid); + + // Same number of results (both empty) + assert_eq!( + bootstrap.len(), compiled.len(), + "Bootstrap returned {} rel_tuples, compiled returned {}", + bootstrap.len(), compiled.len() + ); + + // Relation tuples are no longer stored as GeologMeta elements + // They're in columnar batches accessed via load_instance_data_batches() + assert_eq!(compiled.len(), 0, "RelTuple elements are not created (tuples in columnar batches)"); + + // Note: In in-memory mode (no store path), columnar batches aren't persisted. + // The in-memory Structure still has the relation tuples - they're just not + // serialized to disk. For tests with persistence, use a temp dir. + // + // The relation tuples are accessible via the in-memory Structure: + use crate::core::RelationStorage; + let entry = repl.instances.get("ThreeNodes").expect("Instance entry should exist"); + let rel_count: usize = entry.structure.relations.iter() + .map(|r| r.len()) + .sum(); + assert_eq!(rel_count, 2, "Expected 2 relation tuples in in-memory Structure"); + } + + /// Test compiled query with empty instance. + #[test] + fn test_compiled_empty_instance() { + let source = r#" + theory Simple { + T : Sort; + } + + instance EmptyInst : Simple = { + } + "#; + + let mut repl = ReplState::new(); + let _ = repl.execute_geolog(source); + + let instance_slid = repl.store.resolve_name("EmptyInst") + .expect("Instance should exist").0; + + let bootstrap_elems = repl.store.query_instance_elems(instance_slid); + let compiled_elems = repl.store.query_instance_elems_compiled(instance_slid); + assert_eq!(bootstrap_elems.len(), 0); + assert_eq!(compiled_elems.len(), 0); + + let bootstrap_fvs = repl.store.query_instance_func_vals(instance_slid); + let compiled_fvs = repl.store.query_instance_func_vals_compiled(instance_slid); + assert_eq!(bootstrap_fvs.len(), 0); + assert_eq!(compiled_fvs.len(), 0); + + let bootstrap_rts = repl.store.query_instance_rel_tuples(instance_slid); + let compiled_rts = repl.store.query_instance_rel_tuples_compiled(instance_slid); + assert_eq!(bootstrap_rts.len(), 0); + assert_eq!(compiled_rts.len(), 0); + } +} diff --git a/src/query/to_relalg.rs b/src/query/to_relalg.rs new file mode 100644 index 0000000..30dcfe4 --- /dev/null +++ b/src/query/to_relalg.rs @@ -0,0 +1,1386 @@ +//! Compiler from QueryOp plans to RelAlgIR instances. +//! +//! This module creates geolog Structure instances (of the RelAlgIR theory) +//! from QueryOp query plans. The resulting structures can be: +//! - Inspected as first-class data +//! - Optimized using the RelAlgIR optimization axioms +//! - Executed via a RelAlgIR backend +//! +//! # Design +//! +//! The compiler traverses a QueryOp tree and for each node: +//! 1. Creates the corresponding Op element (ScanOp, FilterOp, etc.) +//! 2. Creates Wire elements for inputs/outputs +//! 3. Creates Schema elements describing wire types +//! 4. Sets up function values connecting the elements +//! +//! The resulting Structure includes: +//! - GeologMeta elements representing the source signature (Srt, Func) +//! - RelAlgIR elements representing the query plan (Wire, Op, Schema) +//! +//! # Supported Operators +//! +//! The following QueryOp variants are compiled: +//! +//! | QueryOp | RelAlgIR Sort | Notes | +//! |------------------|------------------|------------------------------| +//! | `Scan` | `ScanOp` | Emits elements of a sort | +//! | `Filter` | `FilterOp` | With predicate compilation | +//! | `Distinct` | `DistinctOp` | Deduplication | +//! | `Join (Cross)` | `JoinOp` | Cartesian product | +//! | `Join (Equi)` | `JoinOp` | Hash join on key columns | +//! | `Union` | `UnionOp` | Bag union | +//! | `Project` | `ProjectOp` | Column selection/reordering | +//! | `Negate` | `NegateOp` | Flip multiplicities | +//! | `Empty` | `EmptyOp` | Identity for Union | +//! | `Delay` | `DelayOp` | DBSP: previous timestep | +//! | `Diff` | `DiffOp` | DBSP: change since last | +//! | `Integrate` | `IntegrateOp` | DBSP: accumulate | +//! +//! Not yet supported: `Constant` (needs Elem), `Apply` (needs Func). +//! +//! # Supported Predicates +//! +//! | Predicate | RelAlgIR Sort | Notes | +//! |------------------|---------------------|------------------------------| +//! | `True` | `TruePred` | Always true | +//! | `False` | `FalsePred` | Always false | +//! | `ColEqCol` | `ColEqPred` | Two columns equal | +//! | `ColEqConst` | `ConstEqPred` | Column equals constant | +//! | `FuncEq` | `FuncEqPred` | f(arg) = result | +//! | `FuncEqConst` | `FuncConstEqPred` | f(arg) = expected | +//! | `And` | `AndPred` | Conjunction | +//! | `Or` | `OrPred` | Disjunction | +//! +//! All predicate types are now supported! +//! +//! # Example +//! +//! ```ignore +//! use geolog::query::{QueryOp, to_relalg::compile_to_relalg}; +//! +//! let plan = QueryOp::Filter { +//! input: Box::new(QueryOp::Scan { sort_idx: 0 }), +//! pred: Predicate::True, +//! }; +//! +//! let instance = compile_to_relalg(&plan, &relalg_theory, &mut universe)?; +//! // instance.structure contains RelAlgIR elements +//! // instance.output_wire is the final Wire element +//! ``` + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::core::{ElaboratedTheory, SortId, Structure}; +use crate::id::Slid; +use crate::query::backend::QueryOp; +use crate::universe::Universe; + +/// Result of compiling a QueryOp to a RelAlgIR instance. +pub struct RelAlgInstance { + /// The RelAlgIR structure + pub structure: Structure, + /// The output wire of the compiled plan + pub output_wire: Slid, + /// Mapping from Slid to element names (for debugging) + pub names: HashMap, + /// Mapping from Srt elements to source sort indices (for interpreter) + pub sort_mapping: HashMap, + /// Mapping from Elem elements to original target Slid values (for interpreter) + pub elem_value_mapping: HashMap, +} + +/// Context for the compilation process. +struct CompileContext<'a> { + /// The RelAlgIR theory + relalg_theory: &'a ElaboratedTheory, + /// Universe for generating Luids + universe: &'a mut Universe, + /// The structure being built + structure: Structure, + /// Element names for debugging + names: HashMap, + /// Counter for generating unique names + counter: usize, + + // Sort IDs in RelAlgIR (cached for efficiency) + sort_ids: RelAlgSortIds, + + // GeologMeta sort elements already created + // Maps source signature SortId -> RelAlgIR Slid for GeologMeta/Srt element + srt_elements: HashMap, + + // GeologMeta/Elem elements for target instance elements + // Maps target instance Slid -> RelAlgIR Slid for GeologMeta/Elem element + elem_elements: HashMap, + + // GeologMeta/Func elements for target signature functions + // Maps target func index -> RelAlgIR Slid for GeologMeta/Func element + func_elements: HashMap, + + // The "self-referencing" Theory element (for standalone queries) + theory_elem: Option, + + // Placeholder Instance element for Elem references + instance_elem: Option, +} + +/// Cached sort IDs from the RelAlgIR theory. +/// Many fields are reserved for future operator support. +#[allow(dead_code)] +struct RelAlgSortIds { + // GeologMeta inherited sorts + theory: SortId, + srt: SortId, + dsort: SortId, + base_ds: SortId, + func: SortId, + elem: SortId, + instance: SortId, + + // RelAlgIR sorts + schema: SortId, + unit_schema: SortId, + base_schema: SortId, + prod_schema: SortId, + wire: SortId, + op: SortId, + scan_op: SortId, + filter_op: SortId, + distinct_op: SortId, + negate_op: SortId, + join_op: SortId, + union_op: SortId, + delay_op: SortId, + diff_op: SortId, + integrate_op: SortId, + empty_op: SortId, + const_op: SortId, + project_op: SortId, + apply_op: SortId, + + // Projection mapping + proj_mapping: SortId, + proj_entry: SortId, + + // Predicates + pred: SortId, + true_pred: SortId, + false_pred: SortId, + col_eq_pred: SortId, + const_eq_pred: SortId, + func_eq_pred: SortId, + func_const_eq_pred: SortId, + and_pred: SortId, + or_pred: SortId, + + // Join conditions + join_cond: SortId, + equi_join_cond: SortId, + cross_join_cond: SortId, + + // Column references + col_ref: SortId, + col_path: SortId, + here_path: SortId, +} + +impl RelAlgSortIds { + fn from_theory(theory: &ElaboratedTheory) -> Result { + let sig = &theory.theory.signature; + let lookup = |name: &str| -> Result { + sig.lookup_sort(name) + .ok_or_else(|| format!("RelAlgIR theory missing sort: {}", name)) + }; + + Ok(Self { + // GeologMeta sorts are prefixed + theory: lookup("GeologMeta/Theory")?, + srt: lookup("GeologMeta/Srt")?, + dsort: lookup("GeologMeta/DSort")?, + base_ds: lookup("GeologMeta/BaseDS")?, + func: lookup("GeologMeta/Func")?, + elem: lookup("GeologMeta/Elem")?, + instance: lookup("GeologMeta/Instance")?, + + // RelAlgIR sorts + schema: lookup("Schema")?, + unit_schema: lookup("UnitSchema")?, + base_schema: lookup("BaseSchema")?, + prod_schema: lookup("ProdSchema")?, + wire: lookup("Wire")?, + op: lookup("Op")?, + scan_op: lookup("ScanOp")?, + filter_op: lookup("FilterOp")?, + distinct_op: lookup("DistinctOp")?, + negate_op: lookup("NegateOp")?, + join_op: lookup("JoinOp")?, + union_op: lookup("UnionOp")?, + delay_op: lookup("DelayOp")?, + diff_op: lookup("DiffOp")?, + integrate_op: lookup("IntegrateOp")?, + empty_op: lookup("EmptyOp")?, + const_op: lookup("ConstOp")?, + project_op: lookup("ProjectOp")?, + apply_op: lookup("ApplyOp")?, + + proj_mapping: lookup("ProjMapping")?, + proj_entry: lookup("ProjEntry")?, + + pred: lookup("Pred")?, + true_pred: lookup("TruePred")?, + false_pred: lookup("FalsePred")?, + col_eq_pred: lookup("ColEqPred")?, + const_eq_pred: lookup("ConstEqPred")?, + func_eq_pred: lookup("FuncEqPred")?, + func_const_eq_pred: lookup("FuncConstEqPred")?, + and_pred: lookup("AndPred")?, + or_pred: lookup("OrPred")?, + + join_cond: lookup("JoinCond")?, + equi_join_cond: lookup("EquiJoinCond")?, + cross_join_cond: lookup("CrossJoinCond")?, + + col_ref: lookup("ColRef")?, + col_path: lookup("ColPath")?, + here_path: lookup("HerePath")?, + }) + } +} + +impl<'a> CompileContext<'a> { + fn new( + relalg_theory: &'a ElaboratedTheory, + universe: &'a mut Universe, + ) -> Result { + let sort_ids = RelAlgSortIds::from_theory(relalg_theory)?; + let num_sorts = relalg_theory.theory.signature.sorts.len(); + let num_funcs = relalg_theory.theory.signature.functions.len(); + + let mut structure = Structure::new(num_sorts); + + // Initialize function storage with empty columns for each function + // We use Local columns that will grow as elements are added + structure.functions = (0..num_funcs) + .map(|_| crate::core::FunctionColumn::Local(Vec::new())) + .collect(); + + // Initialize relation storage + let rel_arities: Vec = relalg_theory + .theory + .signature + .relations + .iter() + .map(|r| r.domain.arity()) + .collect(); + structure.init_relations(&rel_arities); + + Ok(Self { + relalg_theory, + universe, + structure, + names: HashMap::new(), + counter: 0, + sort_ids, + srt_elements: HashMap::new(), + elem_elements: HashMap::new(), + func_elements: HashMap::new(), + theory_elem: None, + instance_elem: None, + }) + } + + fn fresh_name(&mut self, prefix: &str) -> String { + self.counter += 1; + format!("{}_{}", prefix, self.counter) + } + + fn add_element(&mut self, sort_id: SortId, name: &str) -> Slid { + let (slid, _) = self.structure.add_element(self.universe, sort_id); + self.names.insert(slid, name.to_string()); + slid + } + + fn define_func(&mut self, func_name: &str, domain: Slid, codomain: Slid) -> Result<(), String> { + let func_id = self + .relalg_theory + .theory + .signature + .lookup_func(func_name) + .ok_or_else(|| format!("RelAlgIR missing function: {}", func_name))?; + + self.structure + .define_function(func_id, domain, codomain) + .map_err(|existing| { + format!( + "Conflicting definition for {} on {:?}: already defined as {:?}", + func_name, domain, existing + ) + }) + } + + /// Get or create the Theory element (self-referencing for standalone queries) + fn get_theory_elem(&mut self) -> Slid { + if let Some(elem) = self.theory_elem { + return elem; + } + + let elem = self.add_element(self.sort_ids.theory, "query_theory"); + + // Self-reference: Theory/parent = self + let _ = self.define_func("GeologMeta/Theory/parent", elem, elem); + + self.theory_elem = Some(elem); + elem + } + + /// Get or create a GeologMeta/Srt element for a source sort + fn get_srt_elem(&mut self, source_sort: usize) -> Result { + if let Some(&elem) = self.srt_elements.get(&source_sort) { + return Ok(elem); + } + + let theory = self.get_theory_elem(); + let name = self.fresh_name("srt"); + let elem = self.add_element(self.sort_ids.srt, &name); + + // Srt/theory = our theory element + self.define_func("GeologMeta/Srt/theory", elem, theory)?; + + self.srt_elements.insert(source_sort, elem); + Ok(elem) + } + + /// Get or create a placeholder Instance element for Elem references. + /// This represents "the instance being queried" - resolved at execution time. + fn get_instance_elem(&mut self) -> Slid { + if let Some(elem) = self.instance_elem { + return elem; + } + + let theory = self.get_theory_elem(); + let elem = self.add_element(self.sort_ids.instance, "query_instance"); + + // Instance/theory = our theory element + let _ = self.define_func("GeologMeta/Instance/theory", elem, theory); + + self.instance_elem = Some(elem); + elem + } + + /// Get or create an Elem element for a target instance element. + /// + /// Note: Slid doesn't encode the sort, so we use sort 0 as a placeholder. + /// A full implementation would require passing the source structure to look up + /// the actual sort. The Elem is still created and linked, just with incomplete + /// sort information. + fn get_elem(&mut self, target_slid: Slid) -> Result { + if let Some(&elem) = self.elem_elements.get(&target_slid) { + return Ok(elem); + } + + // TODO: To properly set Elem/sort, we'd need access to the source structure + // to look up target_slid's sort. For now, use sort 0 as a placeholder. + let placeholder_sort = 0; + let srt_elem = self.get_srt_elem(placeholder_sort)?; + let instance = self.get_instance_elem(); + + let name = self.fresh_name("elem"); + let elem = self.add_element(self.sort_ids.elem, &name); + + // Elem/instance = our instance element + self.define_func("GeologMeta/Elem/instance", elem, instance)?; + // Elem/sort = the sort element (placeholder) + self.define_func("GeologMeta/Elem/sort", elem, srt_elem)?; + + self.elem_elements.insert(target_slid, elem); + Ok(elem) + } + + /// Get or create a Func element for a target signature function. + fn get_func_elem(&mut self, func_idx: usize) -> Result { + if let Some(&elem) = self.func_elements.get(&func_idx) { + return Ok(elem); + } + + let theory = self.get_theory_elem(); + let name = self.fresh_name("func"); + let elem = self.add_element(self.sort_ids.func, &name); + + // Func/theory = our theory element + self.define_func("GeologMeta/Func/theory", elem, theory)?; + // Note: Func/dom and Func/cod require DSort elements, which we don't + // track. For now, these are left undefined (partial function). + + self.func_elements.insert(func_idx, elem); + Ok(elem) + } + + /// Create a BaseSchema for a sort + fn create_base_schema(&mut self, srt_elem: Slid) -> Result<(Slid, Slid), String> { + let bs_name = self.fresh_name("base_schema"); + let bs = self.add_element(self.sort_ids.base_schema, &bs_name); + + let schema_name = self.fresh_name("schema"); + let schema = self.add_element(self.sort_ids.schema, &schema_name); + + self.define_func("BaseSchema/schema", bs, schema)?; + self.define_func("BaseSchema/srt", bs, srt_elem)?; + + Ok((bs, schema)) + } + + /// Create a Wire with a given schema + fn create_wire(&mut self, schema: Slid) -> Result { + let name = self.fresh_name("wire"); + let wire = self.add_element(self.sort_ids.wire, &name); + self.define_func("Wire/schema", wire, schema)?; + Ok(wire) + } + + /// Create a TruePred and return the Pred elem + fn create_true_pred(&mut self) -> Result<(Slid, Slid), String> { + let tp_name = self.fresh_name("true_pred"); + let tp = self.add_element(self.sort_ids.true_pred, &tp_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("TruePred/pred", tp, pred)?; + + Ok((tp, pred)) + } + + /// Create a FalsePred and return the Pred elem + fn create_false_pred(&mut self) -> Result { + let fp_name = self.fresh_name("false_pred"); + let fp = self.add_element(self.sort_ids.false_pred, &fp_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FalsePred/pred", fp, pred)?; + + Ok(pred) + } + + /// Create an AndPred combining two predicates + fn create_and_pred(&mut self, left: Slid, right: Slid) -> Result { + let and_name = self.fresh_name("and_pred"); + let and_pred = self.add_element(self.sort_ids.and_pred, &and_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("AndPred/pred", and_pred, pred)?; + self.define_func("AndPred/left", and_pred, left)?; + self.define_func("AndPred/right", and_pred, right)?; + + Ok(pred) + } + + /// Create an OrPred combining two predicates + fn create_or_pred(&mut self, left: Slid, right: Slid) -> Result { + let or_name = self.fresh_name("or_pred"); + let or_pred = self.add_element(self.sort_ids.or_pred, &or_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("OrPred/pred", or_pred, pred)?; + self.define_func("OrPred/left", or_pred, left)?; + self.define_func("OrPred/right", or_pred, right)?; + + Ok(pred) + } + + /// Create a ColEqPred (left_col = right_col) + fn create_col_eq_pred(&mut self, wire: Slid, left_col: usize, right_col: usize) -> Result { + // Create left ColRef + let left_ref = self.create_col_ref(wire, left_col)?; + // Create right ColRef + let right_ref = self.create_col_ref(wire, right_col)?; + + let eq_name = self.fresh_name("col_eq_pred"); + let col_eq = self.add_element(self.sort_ids.col_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("ColEqPred/pred", col_eq, pred)?; + self.define_func("ColEqPred/left", col_eq, left_ref)?; + self.define_func("ColEqPred/right", col_eq, right_ref)?; + + Ok(pred) + } + + /// Create a ColRef for column index + fn create_col_ref(&mut self, wire: Slid, _col: usize) -> Result { + // For now, always use HerePath (column 0) + // TODO: Implement proper column path navigation for nested schemas + let here_name = self.fresh_name("here_path"); + let here = self.add_element(self.sort_ids.here_path, &here_name); + + let path_name = self.fresh_name("col_path"); + let col_path = self.add_element(self.sort_ids.col_path, &path_name); + + self.define_func("HerePath/path", here, col_path)?; + + let ref_name = self.fresh_name("col_ref"); + let col_ref = self.add_element(self.sort_ids.col_ref, &ref_name); + + self.define_func("ColRef/wire", col_ref, wire)?; + self.define_func("ColRef/path", col_ref, col_path)?; + + Ok(col_ref) + } + + /// Create a ConstEqPred (col = constant) + fn create_const_eq_pred(&mut self, wire: Slid, col: usize, val: Slid) -> Result { + // Create ColRef for the column + let col_ref = self.create_col_ref(wire, col)?; + + // Create Elem element for the constant value + let elem = self.get_elem(val)?; + + let eq_name = self.fresh_name("const_eq_pred"); + let const_eq = self.add_element(self.sort_ids.const_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("ConstEqPred/pred", const_eq, pred)?; + self.define_func("ConstEqPred/col", const_eq, col_ref)?; + self.define_func("ConstEqPred/val", const_eq, elem)?; + + Ok(pred) + } + + /// Create a FuncEqPred (func(arg_col) = result_col) + fn create_func_eq_pred( + &mut self, + wire: Slid, + func_idx: usize, + arg_col: usize, + result_col: usize, + ) -> Result { + // Create Func element + let func = self.get_func_elem(func_idx)?; + + // Create ColRefs + let arg_ref = self.create_col_ref(wire, arg_col)?; + let result_ref = self.create_col_ref(wire, result_col)?; + + let eq_name = self.fresh_name("func_eq_pred"); + let func_eq = self.add_element(self.sort_ids.func_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FuncEqPred/pred", func_eq, pred)?; + self.define_func("FuncEqPred/func", func_eq, func)?; + self.define_func("FuncEqPred/arg", func_eq, arg_ref)?; + self.define_func("FuncEqPred/result", func_eq, result_ref)?; + + Ok(pred) + } + + /// Create a FuncConstEqPred (func(arg_col) = expected_elem) + fn create_func_const_eq_pred( + &mut self, + wire: Slid, + func_idx: usize, + arg_col: usize, + expected: Slid, + ) -> Result { + // Create Func element + let func = self.get_func_elem(func_idx)?; + + // Create ColRef for argument + let arg_ref = self.create_col_ref(wire, arg_col)?; + + // Create Elem for expected value + let expected_elem = self.get_elem(expected)?; + + let eq_name = self.fresh_name("func_const_eq_pred"); + let func_const_eq = self.add_element(self.sort_ids.func_const_eq_pred, &eq_name); + + let pred_name = self.fresh_name("pred"); + let pred = self.add_element(self.sort_ids.pred, &pred_name); + + self.define_func("FuncConstEqPred/pred", func_const_eq, pred)?; + self.define_func("FuncConstEqPred/func", func_const_eq, func)?; + self.define_func("FuncConstEqPred/arg", func_const_eq, arg_ref)?; + self.define_func("FuncConstEqPred/expected", func_const_eq, expected_elem)?; + + Ok(pred) + } +} + +/// Compile a predicate to a Pred element +fn compile_predicate( + ctx: &mut CompileContext<'_>, + wire: Slid, + pred: &crate::query::backend::Predicate, +) -> Result { + use crate::query::backend::Predicate; + + match pred { + Predicate::True => { + let (_, pred_elem) = ctx.create_true_pred()?; + Ok(pred_elem) + } + Predicate::False => { + ctx.create_false_pred() + } + Predicate::ColEqCol { left, right } => { + ctx.create_col_eq_pred(wire, *left, *right) + } + Predicate::ColEqConst { col, val } => { + ctx.create_const_eq_pred(wire, *col, *val) + } + Predicate::FuncEq { + func_idx, + arg_col, + result_col, + } => { + ctx.create_func_eq_pred(wire, *func_idx, *arg_col, *result_col) + } + Predicate::FuncEqConst { + func_idx, + arg_col, + expected, + } => { + ctx.create_func_const_eq_pred(wire, *func_idx, *arg_col, *expected) + } + Predicate::And(left, right) => { + let left_pred = compile_predicate(ctx, wire, left)?; + let right_pred = compile_predicate(ctx, wire, right)?; + ctx.create_and_pred(left_pred, right_pred) + } + Predicate::Or(left, right) => { + let left_pred = compile_predicate(ctx, wire, left)?; + let right_pred = compile_predicate(ctx, wire, right)?; + ctx.create_or_pred(left_pred, right_pred) + } + } +} + +/// Compile a QueryOp into a RelAlgIR instance. +/// +/// # Arguments +/// * `plan` - The query plan to compile +/// * `relalg_theory` - The RelAlgIR theory +/// * `universe` - Universe for Luid generation +/// +/// # Returns +/// The compiled RelAlgIR instance, or an error message +pub fn compile_to_relalg( + plan: &QueryOp, + relalg_theory: &Rc, + universe: &mut Universe, +) -> Result { + let mut ctx = CompileContext::new(relalg_theory, universe)?; + + // Initialize function storage (will be lazy-initialized on first use) + // For now, we don't pre-init since we use define_function which auto-grows + + let output_wire = compile_op(&mut ctx, plan)?; + + // Invert srt_elements to get Slid -> sort_idx mapping + let sort_mapping: HashMap = ctx + .srt_elements + .iter() + .map(|(&sort_idx, &slid)| (slid, sort_idx)) + .collect(); + + // Invert elem_elements to get Elem Slid -> original value mapping + let elem_value_mapping: HashMap = ctx + .elem_elements + .iter() + .map(|(&target_slid, &elem_slid)| (elem_slid, target_slid)) + .collect(); + + Ok(RelAlgInstance { + structure: ctx.structure, + output_wire, + names: ctx.names, + sort_mapping, + elem_value_mapping, + }) +} + +/// Compile a single QueryOp, returning the output wire Slid. +fn compile_op(ctx: &mut CompileContext<'_>, op: &QueryOp) -> Result { + match op { + QueryOp::Scan { sort_idx } => compile_scan(ctx, *sort_idx), + + QueryOp::ScanRelation { rel_id } => { + // TODO: Add ScanRelationOp to RelAlgIR theory and implement + Err(format!("ScanRelation compilation not yet implemented (rel_id={})", rel_id)) + } + + QueryOp::Filter { input, pred } => { + let input_wire = compile_op(ctx, input)?; + compile_filter(ctx, input_wire, pred) + } + + QueryOp::Distinct { input } => { + let input_wire = compile_op(ctx, input)?; + compile_distinct(ctx, input_wire) + } + + QueryOp::Join { left, right, cond } => { + let left_wire = compile_op(ctx, left)?; + let right_wire = compile_op(ctx, right)?; + compile_join(ctx, left_wire, right_wire, cond) + } + + QueryOp::Union { left, right } => { + let left_wire = compile_op(ctx, left)?; + let right_wire = compile_op(ctx, right)?; + compile_union(ctx, left_wire, right_wire) + } + + // DBSP operators + QueryOp::Delay { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_delay(ctx, input_wire) + } + + QueryOp::Diff { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_diff(ctx, input_wire) + } + + QueryOp::Integrate { input, state_id: _ } => { + let input_wire = compile_op(ctx, input)?; + compile_integrate(ctx, input_wire) + } + + QueryOp::Negate { input } => { + let input_wire = compile_op(ctx, input)?; + compile_negate(ctx, input_wire) + } + + QueryOp::Empty => compile_empty(ctx), + + QueryOp::Project { input, columns } => { + let input_wire = compile_op(ctx, input)?; + compile_project(ctx, input_wire, columns) + } + + // Not yet implemented (require additional context) + QueryOp::Constant { .. } => Err("ConstantOp compilation not yet implemented (needs Elem)".to_string()), + QueryOp::Apply { .. } => Err("ApplyOp compilation not yet implemented (needs Func)".to_string()), + QueryOp::ApplyField { .. } => Err("ApplyFieldOp compilation not yet implemented".to_string()), + } +} + +fn compile_scan(ctx: &mut CompileContext<'_>, sort_idx: usize) -> Result { + // Get or create Srt element + let srt_elem = ctx.get_srt_elem(sort_idx)?; + + // Create schema for output + let (_, schema) = ctx.create_base_schema(srt_elem)?; + + // Create output wire + let out_wire = ctx.create_wire(schema)?; + + // Create ScanOp + let scan_name = ctx.fresh_name("scan"); + let scan = ctx.add_element(ctx.sort_ids.scan_op, &scan_name); + + // Create Op (sum type injection) + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + // Set function values + ctx.define_func("ScanOp/op", scan, op)?; + ctx.define_func("ScanOp/srt", scan, srt_elem)?; + ctx.define_func("ScanOp/out", scan, out_wire)?; + + Ok(out_wire) +} + +fn compile_filter( + ctx: &mut CompileContext<'_>, + input_wire: Slid, + predicate: &crate::query::backend::Predicate, +) -> Result { + // Compile the predicate + let pred = compile_predicate(ctx, input_wire, predicate)?; + + // Get input wire's schema for output + // In a full implementation, we'd look this up. For now, create a dummy schema. + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + + // Create output wire + let out_wire = ctx.create_wire(out_schema)?; + + // Create FilterOp + let filter_name = ctx.fresh_name("filter"); + let filter = ctx.add_element(ctx.sort_ids.filter_op, &filter_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("FilterOp/op", filter, op)?; + ctx.define_func("FilterOp/in", filter, input_wire)?; + ctx.define_func("FilterOp/out", filter, out_wire)?; + ctx.define_func("FilterOp/pred", filter, pred)?; + + Ok(out_wire) +} + +fn compile_distinct(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + // Create output schema (same as input) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create DistinctOp + let distinct_name = ctx.fresh_name("distinct"); + let distinct = ctx.add_element(ctx.sort_ids.distinct_op, &distinct_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DistinctOp/op", distinct, op)?; + ctx.define_func("DistinctOp/in", distinct, input_wire)?; + ctx.define_func("DistinctOp/out", distinct, out_wire)?; + + Ok(out_wire) +} + +fn compile_join( + ctx: &mut CompileContext<'_>, + left_wire: Slid, + right_wire: Slid, + condition: &crate::query::backend::JoinCond, +) -> Result { + use crate::query::backend::JoinCond; + + // Create output schema (product of inputs) - simplified for now + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create join condition based on type + let join_cond = match condition { + JoinCond::Cross => { + let cond_name = ctx.fresh_name("cross_join"); + let cross_join = ctx.add_element(ctx.sort_ids.cross_join_cond, &cond_name); + let join_cond_name = ctx.fresh_name("join_cond"); + let join_cond_elem = ctx.add_element(ctx.sort_ids.join_cond, &join_cond_name); + ctx.define_func("CrossJoinCond/cond", cross_join, join_cond_elem)?; + join_cond_elem + } + JoinCond::Equi { left_col, right_col } => { + // Create column references for the join keys + let left_ref = ctx.create_col_ref(left_wire, *left_col)?; + let right_ref = ctx.create_col_ref(right_wire, *right_col)?; + + let cond_name = ctx.fresh_name("equi_join"); + let equi_join = ctx.add_element(ctx.sort_ids.equi_join_cond, &cond_name); + let join_cond_name = ctx.fresh_name("join_cond"); + let join_cond_elem = ctx.add_element(ctx.sort_ids.join_cond, &join_cond_name); + + ctx.define_func("EquiJoinCond/cond", equi_join, join_cond_elem)?; + ctx.define_func("EquiJoinCond/left_col", equi_join, left_ref)?; + ctx.define_func("EquiJoinCond/right_col", equi_join, right_ref)?; + + join_cond_elem + } + }; + + // Create JoinOp + let join_name = ctx.fresh_name("join"); + let join = ctx.add_element(ctx.sort_ids.join_op, &join_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("JoinOp/op", join, op)?; + ctx.define_func("JoinOp/left_in", join, left_wire)?; + ctx.define_func("JoinOp/right_in", join, right_wire)?; + ctx.define_func("JoinOp/out", join, out_wire)?; + ctx.define_func("JoinOp/cond", join, join_cond)?; + + Ok(out_wire) +} + +fn compile_union( + ctx: &mut CompileContext<'_>, + left_wire: Slid, + right_wire: Slid, +) -> Result { + // Create output schema (same as inputs) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create UnionOp + let union_name = ctx.fresh_name("union"); + let union_op = ctx.add_element(ctx.sort_ids.union_op, &union_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("UnionOp/op", union_op, op)?; + ctx.define_func("UnionOp/left_in", union_op, left_wire)?; + ctx.define_func("UnionOp/right_in", union_op, right_wire)?; + ctx.define_func("UnionOp/out", union_op, out_wire)?; + + Ok(out_wire) +} + +fn compile_delay(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let delay_name = ctx.fresh_name("delay"); + let delay = ctx.add_element(ctx.sort_ids.delay_op, &delay_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DelayOp/op", delay, op)?; + ctx.define_func("DelayOp/in", delay, input_wire)?; + ctx.define_func("DelayOp/out", delay, out_wire)?; + + Ok(out_wire) +} + +fn compile_diff(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let diff_name = ctx.fresh_name("diff"); + let diff = ctx.add_element(ctx.sort_ids.diff_op, &diff_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("DiffOp/op", diff, op)?; + ctx.define_func("DiffOp/in", diff, input_wire)?; + ctx.define_func("DiffOp/out", diff, out_wire)?; + + Ok(out_wire) +} + +fn compile_integrate(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let integrate_name = ctx.fresh_name("integrate"); + let integrate = ctx.add_element(ctx.sort_ids.integrate_op, &integrate_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("IntegrateOp/op", integrate, op)?; + ctx.define_func("IntegrateOp/in", integrate, input_wire)?; + ctx.define_func("IntegrateOp/out", integrate, out_wire)?; + + Ok(out_wire) +} + +fn compile_negate(ctx: &mut CompileContext<'_>, input_wire: Slid) -> Result { + // Negate preserves schema (from wf/negate_schema axiom) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let negate_name = ctx.fresh_name("negate"); + let negate = ctx.add_element(ctx.sort_ids.negate_op, &negate_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("NegateOp/op", negate, op)?; + ctx.define_func("NegateOp/in", negate, input_wire)?; + ctx.define_func("NegateOp/out", negate, out_wire)?; + + Ok(out_wire) +} + +fn compile_empty(ctx: &mut CompileContext<'_>) -> Result { + // Empty produces a wire with some schema (we use a fresh placeholder) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + let empty_name = ctx.fresh_name("empty"); + let empty = ctx.add_element(ctx.sort_ids.empty_op, &empty_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("EmptyOp/op", empty, op)?; + ctx.define_func("EmptyOp/out", empty, out_wire)?; + + Ok(out_wire) +} + +fn compile_project( + ctx: &mut CompileContext<'_>, + input_wire: Slid, + columns: &[usize], +) -> Result { + // Create output schema (different from input - projected schema) + let schema_name = ctx.fresh_name("schema"); + let out_schema = ctx.add_element(ctx.sort_ids.schema, &schema_name); + let out_wire = ctx.create_wire(out_schema)?; + + // Create ProjMapping + let mapping_name = ctx.fresh_name("proj_mapping"); + let proj_mapping = ctx.add_element(ctx.sort_ids.proj_mapping, &mapping_name); + + // Create ProjEntry for each column + for (target_idx, &source_col) in columns.iter().enumerate() { + let entry_name = ctx.fresh_name("proj_entry"); + let entry = ctx.add_element(ctx.sort_ids.proj_entry, &entry_name); + + // Source column reference (from input wire) + let source_ref = ctx.create_col_ref(input_wire, source_col)?; + + // Target path (simplified: just use HerePath for now) + // In a full implementation, we'd create proper paths for each output column + let target_path_name = ctx.fresh_name("col_path"); + let target_path = ctx.add_element(ctx.sort_ids.col_path, &target_path_name); + + // If this is not the first column, we'd need FstPath/SndPath navigation + // For now, we just use HerePath for all (placeholder behavior) + if target_idx == 0 { + let here_name = ctx.fresh_name("here_path"); + let here = ctx.add_element(ctx.sort_ids.here_path, &here_name); + ctx.define_func("HerePath/path", here, target_path)?; + } + + ctx.define_func("ProjEntry/mapping", entry, proj_mapping)?; + ctx.define_func("ProjEntry/source", entry, source_ref)?; + ctx.define_func("ProjEntry/target_path", entry, target_path)?; + } + + // Create ProjectOp + let project_name = ctx.fresh_name("project"); + let project = ctx.add_element(ctx.sort_ids.project_op, &project_name); + + let op_name = ctx.fresh_name("op"); + let op = ctx.add_element(ctx.sort_ids.op, &op_name); + + ctx.define_func("ProjectOp/op", project, op)?; + ctx.define_func("ProjectOp/in", project, input_wire)?; + ctx.define_func("ProjectOp/out", project, out_wire)?; + ctx.define_func("ProjectOp/mapping", project, proj_mapping)?; + + Ok(out_wire) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::repl::ReplState; + use egglog_numeric_id::NumericId; + + fn load_relalg_theory() -> Rc { + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone() + } + + #[test] + fn test_compile_scan() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let plan = QueryOp::Scan { sort_idx: 0 }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Scan compilation should succeed"); + + let instance = result.unwrap(); + // Should have: Theory, Srt, BaseSchema, Schema, Wire, ScanOp, Op + assert!( + instance.structure.len() >= 7, + "Scan should create at least 7 elements" + ); + } + + #[test] + fn test_compile_filter_scan() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::True, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Filter(Scan) compilation should succeed"); + + let instance = result.unwrap(); + // Should have scan elements + filter elements + assert!( + instance.structure.len() >= 12, + "Filter(Scan) should create at least 12 elements" + ); + } + + #[test] + fn test_compile_join() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test cross join + let plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Cross, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Cross join compilation should succeed"); + + // Test equi-join + let equi_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let result = compile_to_relalg(&equi_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Equi-join compilation should succeed"); + } + + #[test] + fn test_compile_predicate() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test And predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::And( + Box::new(crate::query::backend::Predicate::True), + Box::new(crate::query::backend::Predicate::False), + ), + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "And predicate compilation should succeed"); + + // Test Or predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::Or( + Box::new(crate::query::backend::Predicate::True), + Box::new(crate::query::backend::Predicate::True), + ), + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Or predicate compilation should succeed"); + + // Test ColEqCol predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::ColEqCol { left: 0, right: 1 }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "ColEqCol predicate compilation should succeed"); + + // Test ColEqConst predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(42), + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!( + result.is_ok(), + "ColEqConst predicate compilation should succeed" + ); + let instance = result.unwrap(); + // Should have created an Elem element for the constant + assert!( + instance.names.values().any(|n| n.starts_with("elem_")), + "Should create Elem element for constant" + ); + + // Test FuncEq predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::FuncEq { + func_idx: 0, + arg_col: 0, + result_col: 1, + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "FuncEq predicate compilation should succeed"); + let instance = result.unwrap(); + // Should have created a Func element + assert!( + instance.names.values().any(|n| n.starts_with("func_")), + "Should create Func element for function reference" + ); + + // Test FuncEqConst predicate + let plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: crate::query::backend::Predicate::FuncEqConst { + func_idx: 0, + arg_col: 0, + expected: Slid::from_usize(99), + }, + }; + + let result = compile_to_relalg(&plan, &relalg_theory, &mut universe); + assert!( + result.is_ok(), + "FuncEqConst predicate compilation should succeed" + ); + let instance = result.unwrap(); + // Should have both Func and Elem elements + assert!( + instance.names.values().any(|n| n.starts_with("func_")), + "Should create Func element" + ); + assert!( + instance.names.values().any(|n| n.starts_with("elem_")), + "Should create Elem element for expected value" + ); + } + + #[test] + fn test_compile_dbsp_operators() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test Delay + let delay_plan = QueryOp::Delay { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&delay_plan, &relalg_theory, &mut universe).is_ok(), + "Delay compilation should succeed" + ); + + // Test Diff + let diff_plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&diff_plan, &relalg_theory, &mut universe).is_ok(), + "Diff compilation should succeed" + ); + + // Test Integrate + let integrate_plan = QueryOp::Integrate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + assert!( + compile_to_relalg(&integrate_plan, &relalg_theory, &mut universe).is_ok(), + "Integrate compilation should succeed" + ); + } + + #[test] + fn test_compile_negate_and_empty() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Test Negate + let negate_plan = QueryOp::Negate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + let result = compile_to_relalg(&negate_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Negate compilation should succeed"); + + // Test Empty + let empty_plan = QueryOp::Empty; + let result = compile_to_relalg(&empty_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Empty compilation should succeed"); + + // Should have: Schema, Wire, EmptyOp, Op + let instance = result.unwrap(); + assert!( + instance.structure.len() >= 4, + "Empty should create at least 4 elements" + ); + + // Test Union with Empty (common pattern) + let union_empty_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Empty), + }; + let result = compile_to_relalg(&union_empty_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Union(Scan, Empty) compilation should succeed"); + } + + #[test] + fn test_compile_project() { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Project columns 0 and 2 from a join result + let project_plan = QueryOp::Project { + input: Box::new(QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: crate::query::backend::JoinCond::Cross, + }), + columns: vec![0, 2], + }; + + let result = compile_to_relalg(&project_plan, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Project compilation should succeed: {:?}", result.err()); + + // Simple project: select single column + let simple_project = QueryOp::Project { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + columns: vec![0], + }; + let result = compile_to_relalg(&simple_project, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Single column project should succeed"); + + // Identity project (all columns in order) + let identity_project = QueryOp::Project { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + columns: vec![0, 1, 2], + }; + let result = compile_to_relalg(&identity_project, &relalg_theory, &mut universe); + assert!(result.is_ok(), "Identity project should succeed"); + } +} diff --git a/src/repl.rs b/src/repl.rs new file mode 100644 index 0000000..7482da8 --- /dev/null +++ b/src/repl.rs @@ -0,0 +1,1659 @@ +//! REPL (Read-Eval-Print Loop) for Geolog +//! +//! Provides an interactive environment for defining theories and instances, +//! inspecting structures, and managing workspaces. +//! +//! ## Architecture Note +//! +//! This module uses `Store` as the source of truth for all data. The `theories` +//! and `instances` HashMaps are TRANSITIONAL: they maintain runtime objects +//! needed for elaboration until the full GeologMeta-based elaboration is complete. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::rc::Rc; + +use crate::ast; +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::elaborate::{Env, ElaborationContext, InstanceElaborationResult, elaborate_instance_ctx, elaborate_theory}; +use crate::id::{NumericId, Slid}; +use crate::store::Store; + +// Re-export InstanceEntry from elaborate for backwards compatibility +pub use crate::elaborate::InstanceEntry; + +/// REPL state - backed by Store with transitional runtime objects. +/// +/// The `store` is the source of truth for persistence and version control. +/// The `theories` and `instances` HashMaps are transitional: they hold +/// runtime objects needed for elaboration until we complete the migration +/// to fully GeologMeta-based elaboration. +pub struct ReplState { + /// The append-only store (source of truth for persistence) + pub store: Store, + + /// TRANSITIONAL: Runtime theories for elaboration + /// Will be removed once elaboration writes directly to Store + pub theories: HashMap>, + + /// TRANSITIONAL: Runtime instances for elaboration and display + /// Will be removed once elaboration and display use Store directly + pub instances: HashMap, + + /// Multi-line input buffer + pub input_buffer: String, + + /// Bracket depth for multi-line detection + pub bracket_depth: i32, +} + +impl Default for ReplState { + fn default() -> Self { + Self::new() + } +} + +impl ReplState { + /// Create a new REPL state with empty store + pub fn new() -> Self { + Self { + store: Store::new(), + theories: HashMap::new(), + instances: HashMap::new(), + input_buffer: String::new(), + bracket_depth: 0, + } + } + + /// Create a new REPL state with a persistence path + /// + /// If the path exists, loads the persisted Store and reconstructs runtime objects. + pub fn with_path(path: impl Into) -> Self { + let path = path.into(); + let store = Store::load_or_create(&path); + + // Reconstruct theories from persisted GeologMeta + let theories = store.reconstruct_all_theories(); + + // Reconstruct instances from persisted GeologMeta + let reconstructed = store.reconstruct_all_instances(); + let instances: HashMap = reconstructed + .into_iter() + .map(|(name, ri)| { + // For now, use theory_name as theory_type too + // TODO: Store full theory_type in GeologMeta for proper reconstruction + let theory_type = ri.theory_name.clone(); + let mut entry = InstanceEntry::new(ri.structure, ri.theory_name, theory_type); + // Populate element names + for (slid, elem_name) in ri.element_names { + entry.register_element(elem_name, slid); + } + (name, entry) + }) + .collect(); + + Self { + store, + theories, + instances, + input_buffer: String::new(), + bracket_depth: 0, + } + } + + /// Reset to initial state (clear all theories and instances) + pub fn reset(&mut self) { + self.store = Store::new(); + self.theories.clear(); + self.instances.clear(); + self.input_buffer.clear(); + self.bracket_depth = 0; + } + + /// Get a structure by instance name + pub fn get_structure(&self, name: &str) -> Option<&Structure> { + self.instances.get(name).map(|e| &e.structure) + } + + /// Check if the state has uncommitted changes + pub fn is_dirty(&self) -> bool { + self.store.is_dirty() + } + + /// Commit current changes to the store + pub fn commit(&mut self, message: Option<&str>) -> Result { + self.store.commit(message) + } + + /// Get commit history + pub fn commit_history(&self) -> Vec { + self.store.commit_history() + } + + /// Process a line of input, handling multi-line bracket matching + pub fn process_line(&mut self, line: &str) -> InputResult { + let trimmed = line.trim(); + + // Empty line while buffering - submit incomplete input (will error) + if trimmed.is_empty() { + if self.input_buffer.is_empty() { + return InputResult::Empty; + } + // Double-empty-line to force submit + return InputResult::Incomplete; + } + + // Meta-command (only at start, not in continuation) + if trimmed.starts_with(':') && self.input_buffer.is_empty() { + return InputResult::MetaCommand(MetaCommand::parse(trimmed)); + } + + // Accumulate geolog input + if !self.input_buffer.is_empty() { + self.input_buffer.push('\n'); + } + self.input_buffer.push_str(line); + + // Update bracket depth (ignoring brackets in strings/comments) + for ch in line.chars() { + match ch { + '{' | '(' | '[' => self.bracket_depth += 1, + '}' | ')' | ']' => self.bracket_depth -= 1, + _ => {} + } + } + + // Complete when brackets balanced and line ends with } or ; + if self.bracket_depth <= 0 && (trimmed.ends_with('}') || trimmed.ends_with(';')) { + let input = std::mem::take(&mut self.input_buffer); + self.bracket_depth = 0; + InputResult::GeologInput(input) + } else { + InputResult::Incomplete + } + } + + /// Force submit current buffer (for Ctrl-D or double-empty-line) + pub fn force_submit(&mut self) -> Option { + if self.input_buffer.is_empty() { + None + } else { + self.bracket_depth = 0; + Some(std::mem::take(&mut self.input_buffer)) + } + } + + /// Execute geolog source code (theory or instance definitions) + /// + /// Returns a list of results, one for each declaration processed. + pub fn execute_geolog(&mut self, source: &str) -> Result, String> { + // Parse the input + let file = crate::parse(source).map_err(|e| format!("Parse error: {}", e))?; + + let mut results = Vec::new(); + + for decl in &file.declarations { + match &decl.node { + ast::Declaration::Namespace(name) => { + results.push(ExecuteResult::Namespace(name.clone())); + } + ast::Declaration::Theory(t) => { + // Check for duplicate theory name + if self.theories.contains_key(&t.name) { + return Err(format!( + "Theory '{}' already exists. Use a different name or :reset to clear.", + t.name + )); + } + + // TRANSITIONAL: Build an Env from self.theories for elaborate_theory + let mut env = Env::new(); + for (name, theory) in &self.theories { + env.theories.insert(name.clone(), theory.clone()); + } + + let elab = elaborate_theory(&mut env, t) + .map_err(|e| format!("Elaboration error: {}", e))?; + + let name = elab.theory.name.clone(); + let num_sorts = elab.theory.signature.sorts.len(); + let num_functions = elab.theory.signature.functions.len(); + let num_relations = elab.theory.signature.relations.len(); + let num_axioms = elab.theory.axioms.len(); + + // Register in Store with full signature + let theory_slid = self.store.create_theory(&name)?; + let sig_result = self.store.persist_signature(theory_slid, &elab.theory.signature)?; + + // Persist axioms + self.store.persist_axioms( + theory_slid, + &elab.theory.axioms, + &elab.theory.axiom_names, + &sig_result, + )?; + + // Store in transitional HashMap (will be removed once we query Store directly) + self.theories.insert(name.clone(), Rc::new(elab)); + + results.push(ExecuteResult::Theory { + name, + num_sorts, + num_functions, + num_relations, + num_axioms, + }); + } + ast::Declaration::Instance(inst) => { + // Check for duplicate instance name + if self.instances.contains_key(&inst.name) { + return Err(format!( + "Instance '{}' already exists. Use a different name or :reset to clear.", + inst.name + )); + } + + // Use the elaboration that works with our transitional state + // If totality check fails, try again with partial elaboration + let (elab_result, is_partial) = match self.elaborate_instance_internal(inst) { + Ok(result) => (result, false), + Err(e) if e.contains("partial function") => { + // Retry with partial elaboration + eprintln!("Note: Instance has partial functions, allowing for chase to complete them"); + let result = self.elaborate_instance_partial(inst) + .map_err(|e| format!("Elaboration error: {}", e))?; + (result, true) + } + Err(e) => return Err(format!("Elaboration error: {}", e)), + }; + let _ = is_partial; // Used for logging/warnings + + let instance_name = inst.name.clone(); + let theory_name = type_expr_to_theory_name(&inst.theory); + let theory_type = type_expr_to_full_string(&inst.theory); + let num_elements = elab_result.structure.len(); + + // Build InstanceEntry with element names from elaboration + // This includes BOTH imported elements AND locally declared elements + let mut entry = InstanceEntry::new(elab_result.structure, theory_name.clone(), theory_type); + + // Copy nested instance metadata for cross-instance references + entry.nested_meta = elab_result.nested_meta; + + // Register ALL element names from elaboration result + for (slid, elem_name) in elab_result.slid_to_name { + entry.register_element(elem_name.clone(), slid); + + // Register local (non-qualified) names in store's naming index + // Only register names that don't contain '/' (local to this instance) + if !elem_name.contains('/') { + let luid = entry.structure.get_luid(slid); + if let Some(uuid) = self.store.universe.get(luid) { + self.store.naming.insert( + uuid, + vec![instance_name.clone(), elem_name.clone()], + ); + } + } + } + + // Register in Store and persist instance data + if let Some((theory_slid, _)) = self.store.resolve_name(&theory_name) { + let instance_slid = self.store.create_instance(&instance_name, theory_slid)?; + + // Build element name map (Slid -> String) for persistence + let elem_names: HashMap = entry + .slid_to_name + .iter() + .map(|(&slid, name)| (slid, name.clone())) + .collect(); + + // Persist all instance data to GeologMeta + self.store.persist_instance_data( + instance_slid, + theory_slid, + &entry.structure, + &elem_names, + )?; + } + + // Store in transitional HashMap + self.instances.insert(instance_name.clone(), entry); + + results.push(ExecuteResult::Instance { + name: instance_name, + theory_name, + num_elements, + }); + } + ast::Declaration::Query(q) => { + let result = self.execute_query(q)?; + results.push(ExecuteResult::Query(result)); + } + } + } + + // Return all results + if results.is_empty() { + Err("No declarations found".to_string()) + } else { + Ok(results) + } + } + + /// Internal instance elaboration that works with our transitional state + fn elaborate_instance_internal(&mut self, inst: &ast::InstanceDecl) -> Result { + // Build elaboration context from our state + let mut ctx = ElaborationContext { + theories: &self.theories, + instances: &self.instances, + universe: &mut self.store.universe, + siblings: HashMap::new(), + }; + + elaborate_instance_ctx(&mut ctx, inst) + .map_err(|e| e.to_string()) + } + + /// Internal instance elaboration that skips totality validation. + /// Use this for instances that will be completed by the chase algorithm. + pub fn elaborate_instance_partial(&mut self, inst: &ast::InstanceDecl) -> Result { + use crate::elaborate::elaborate_instance_ctx_partial; + + // Build elaboration context from our state + let mut ctx = ElaborationContext { + theories: &self.theories, + instances: &self.instances, + universe: &mut self.store.universe, + siblings: HashMap::new(), + }; + + elaborate_instance_ctx_partial(&mut ctx, inst) + .map_err(|e| e.to_string()) + } + + /// Execute a query: find an instance satisfying the goal type. + /// + /// For a query like `query q { ? : ExampleNet problem0 Solution instance; }`: + /// 1. Parse the goal type to get theory name and type arguments + /// 2. Look up the theory and param instances + /// 3. Build a base structure with imported elements from param instances + /// 4. Run the solver to find a satisfying extension + fn execute_query(&mut self, q: &ast::QueryDecl) -> Result { + use crate::solver::{query, Budget, EnumerationResult}; + + let start = std::time::Instant::now(); + + // The goal should be an Instance type: tokens ending with `instance` + if !q.goal.is_instance() { + return Err("Query goal must be an instance type (e.g., `T instance`)".to_string()); + } + let inner_type = q.goal.instance_inner() + .ok_or_else(|| "Failed to extract inner type from instance".to_string())?; + + // Resolve the instance type to get theory name and arguments + let resolved = self.resolve_query_type(&inner_type)?; + let theory = self.theories.get(&resolved.theory_name) + .ok_or_else(|| format!("Unknown theory: {}", resolved.theory_name))? + .clone(); + + // Build base structure from param instances + let (base_structure, universe) = self.build_query_base(&resolved, &theory)?; + + // Run the solver + let budget = Budget::new(5000, 10000); // 5 second timeout, 10k step limit + let result = query(base_structure, universe, theory.clone(), budget); + + let time_ms = start.elapsed().as_secs_f64() * 1000.0; + + match result { + EnumerationResult::Found { model, .. } => Ok(QueryResult::Found { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + model, + time_ms, + }), + EnumerationResult::Unsat { .. } => Ok(QueryResult::Unsat { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + time_ms, + }), + EnumerationResult::Incomplete { reason, .. } => Ok(QueryResult::Incomplete { + query_name: q.name.clone(), + theory_name: resolved.theory_name, + reason, + time_ms, + }), + } + } + + /// Resolve a query goal type expression to get theory name and param bindings. + fn resolve_query_type(&self, ty: &ast::TypeExpr) -> Result { + use crate::ast::TypeToken; + + // Collect all path tokens from the type expression + let all_paths: Vec = ty.tokens.iter() + .filter_map(|t| match t { + TypeToken::Path(p) => Some(p.to_string()), + _ => None, + }) + .collect(); + + if all_paths.is_empty() { + return Err("Query type has no path components".to_string()); + } + + // Simple case: just one path + if all_paths.len() == 1 { + return Ok(ResolvedQueryType { + theory_name: all_paths[0].clone(), + arguments: vec![], + }); + } + + // Multiple paths: rightmost is theory name, rest are args + let theory_name = all_paths.last().unwrap().clone(); + let args: Vec = all_paths[..all_paths.len() - 1].to_vec(); + + // Look up theory to match params + let theory = self.theories.get(&theory_name) + .ok_or_else(|| format!("Unknown theory: {}", theory_name))?; + + if args.len() != theory.params.len() { + return Err(format!( + "Theory {} expects {} parameters, got {}", + theory_name, theory.params.len(), args.len() + )); + } + + let arguments: Vec<(String, String)> = theory.params + .iter() + .zip(args.iter()) + .map(|(param, arg)| (param.name.clone(), arg.clone())) + .collect(); + + Ok(ResolvedQueryType { + theory_name, + arguments, + }) + } + + /// Build a base structure for a query by importing elements from param instances. + fn build_query_base( + &self, + resolved: &ResolvedQueryType, + theory: &Rc, + ) -> Result<(Structure, crate::universe::Universe), String> { + use crate::core::FunctionDomainInfo; + + let sig = &theory.theory.signature; + let mut structure = Structure::new(sig.sorts.len()); + let mut universe = crate::universe::Universe::new(); + + // Initialize relation storage + let relation_arities: Vec = sig.relations + .iter() + .map(|rel| rel.domain.arity()) + .collect(); + structure.init_relations(&relation_arities); + + // Track imported UUIDs to avoid duplicates across params + let mut imported_uuids = std::collections::HashSet::new(); + // Track Luid -> new Slid mapping for importing function values later + let mut luid_to_new_slid: std::collections::HashMap = std::collections::HashMap::new(); + + // Import elements from each param instance + for (param_name, instance_name) in &resolved.arguments { + let param_entry = self.instances.get(instance_name) + .ok_or_else(|| format!("Unknown instance: {}", instance_name))?; + + let param_theory = self.theories.get(¶m_entry.theory_name) + .ok_or_else(|| format!("Unknown theory: {}", param_entry.theory_name))?; + + // Import each element from the param instance + for &slid in param_entry.slid_to_name.keys() { + let param_sort_id = param_entry.structure.sorts[slid.index()]; + let param_sort_name = ¶m_theory.theory.signature.sorts[param_sort_id]; + + // Try different mappings for the local sort name. + // The sort might be: + // 1. param_name/param_sort_name (e.g., "N/P" for a PetriNet param) + // 2. Just param_sort_name if it already has a prefix (for nested params) + let local_sort_id = if let Some(id) = sig.lookup_sort(&format!("{}/{}", param_name, param_sort_name)) { + id + } else if let Some(id) = sig.lookup_sort(param_sort_name) { + // The sort might already be prefixed from an earlier param in the chain + // (e.g., "N/P" in problem0 should map to "N/P" in Solution, not "RP/N/P") + id + } else { + // Sort not found - skip this element (might be from a nested instance + // that will be imported separately or doesn't map to this theory) + continue; + }; + + // Get the existing Luid and its Uuid + let luid = param_entry.structure.get_luid(slid); + let uuid = self.store.universe.get(luid) + .ok_or_else(|| format!("No Uuid for Luid {:?}", luid))?; + + // Skip if already imported from an earlier param + if imported_uuids.contains(&uuid) { + continue; + } + imported_uuids.insert(uuid); + + // Register in our new universe and add element + let new_luid = universe.intern(uuid); + let local_slid = structure.add_element_with_luid(new_luid, local_sort_id); + luid_to_new_slid.insert(luid, local_slid); + } + + // Import elements from nested structures (e.g., initial_marking, target_marking in ReachabilityProblem) + for (nested_name, nested_struct) in ¶m_entry.structure.nested { + // Nested structure elements have sorts like "initial_marking/token" in the param theory + // They map to sorts like "RP/initial_marking/token" in the target theory + for slid_idx in 0..nested_struct.sorts.len() { + let slid = crate::id::Slid::from_usize(slid_idx); + let _nested_sort_id = nested_struct.sorts[slid_idx]; + + // Get sort name from the nested theory (we don't have it directly, so reconstruct) + // The nested structure sorts are indexed locally starting from 0 + // We need to find the corresponding sort name in the target theory + let nested_sort_prefix = format!("{}/{}", param_name, nested_name); + + // Try to find a sort in the target theory that matches this nested element + let local_sort_id = sig.sorts.iter().position(|s| { + s.starts_with(&nested_sort_prefix) + }); + + if let Some(local_sort_id) = local_sort_id { + // Get the Luid and Uuid + let luid = nested_struct.get_luid(slid); + if let Some(uuid) = self.store.universe.get(luid) + && !imported_uuids.contains(&uuid) { + imported_uuids.insert(uuid); + let new_luid = universe.intern(uuid); + let local_slid = structure.add_element_with_luid(new_luid, local_sort_id); + luid_to_new_slid.insert(luid, local_slid); + } + } + } + } + } + + // Initialize function storage + let domains: Vec = sig.functions + .iter() + .map(|func| match &func.domain { + DerivedSort::Base(id) => FunctionDomainInfo::Base(*id), + DerivedSort::Product(fields) => { + let field_sorts: Vec = fields + .iter() + .filter_map(|(_, ds)| match ds { + DerivedSort::Base(id) => Some(*id), + DerivedSort::Product(_) => None, + }) + .collect(); + FunctionDomainInfo::Product(field_sorts) + } + }) + .collect(); + structure.init_functions_full(&domains); + + // Import function values from param instances + for (param_name, instance_name) in &resolved.arguments { + let param_entry = self.instances.get(instance_name).unwrap(); + let param_theory = self.theories.get(¶m_entry.theory_name).unwrap(); + let param_sig = ¶m_theory.theory.signature; + + // For each function in the param theory + for (param_func_id, param_func) in param_sig.functions.iter().enumerate() { + // Find the corresponding function in the target theory + // It should be named "{param_name}/{func_name}" + let target_func_name = format!("{}/{}", param_name, param_func.name); + let target_func_id = sig.functions.iter().position(|f| f.name == target_func_name); + + if let Some(target_func_id) = target_func_id { + // Copy function values + // Iterate over all elements in the domain of the param function + if let DerivedSort::Base(domain_sort) = ¶m_func.domain { + // Get all elements of the domain sort in the param instance + for (idx, &sort_id) in param_entry.structure.sorts.iter().enumerate() { + if sort_id == *domain_sort { + let domain_slid = Slid::from_usize(idx); + let domain_sort_slid = param_entry.structure.sort_local_id(domain_slid); + + // Get the function value in the param instance + if let Some(codomain_slid) = param_entry.structure.get_function(param_func_id, domain_sort_slid) { + // Map both domain and codomain to new Slids + let domain_luid = param_entry.structure.get_luid(domain_slid); + let codomain_luid = param_entry.structure.get_luid(codomain_slid); + + if let (Some(&new_domain_slid), Some(&new_codomain_slid)) = + (luid_to_new_slid.get(&domain_luid), luid_to_new_slid.get(&codomain_luid)) + { + // Define the function value in the new structure + let _ = structure.define_function(target_func_id, new_domain_slid, new_codomain_slid); + } + } + } + } + } + } + } + } + + Ok((structure, universe)) + } + + /// List all theories (runtime + persisted) + pub fn list_theories(&self) -> Vec { + use crate::store::BindingKind; + use std::collections::HashSet; + + let mut result: Vec = self.theories + .iter() + .map(|(name, theory)| TheoryInfo { + name: name.clone(), + num_sorts: theory.theory.signature.sorts.len(), + num_functions: theory.theory.signature.functions.len(), + num_relations: theory.theory.signature.relations.len(), + num_axioms: theory.theory.axioms.len(), + }) + .collect(); + + // Add persisted theories that aren't in runtime + let runtime_names: HashSet<_> = self.theories.keys().cloned().collect(); + for (name, kind, slid) in self.store.list_bindings() { + if kind == BindingKind::Theory && !runtime_names.contains(&name) { + // Query the Store for theory structure + let sorts = self.store.query_theory_sorts(slid); + let funcs = self.store.query_theory_funcs(slid); + let rels = self.store.query_theory_rels(slid); + let axioms = self.store.query_theory_sequents(slid); + result.push(TheoryInfo { + name, + num_sorts: sorts.len(), + num_functions: funcs.len(), + num_relations: rels.len(), + num_axioms: axioms.len(), + }); + } + } + + result + } + + /// List all instances (runtime + persisted) + pub fn list_instances(&self) -> Vec { + use crate::store::BindingKind; + use std::collections::HashSet; + + let mut result: Vec = self.instances + .iter() + .map(|(name, entry)| InstanceInfo { + name: name.clone(), + theory_name: entry.theory_name.clone(), + num_elements: entry.structure.len(), + }) + .collect(); + + // Add persisted instances that aren't in runtime + let runtime_names: HashSet<_> = self.instances.keys().cloned().collect(); + for (name, kind, _slid) in self.store.list_bindings() { + if kind == BindingKind::Instance && !runtime_names.contains(&name) { + result.push(InstanceInfo { + name, + theory_name: "(persisted)".to_string(), // Would need query to get + num_elements: 0, // Unknown + }); + } + } + + result + } + + /// Inspect a theory or instance by name + pub fn inspect(&self, name: &str) -> Option { + // Check theories first + if let Some(theory) = self.theories.get(name) { + return Some(InspectResult::Theory(TheoryDetail { + name: name.to_string(), + params: theory + .params + .iter() + .map(|p| (p.name.clone(), p.theory_name.clone())) + .collect(), + sorts: theory.theory.signature.sorts.clone(), + functions: theory + .theory + .signature + .functions + .iter() + .map(|f| { + let domain = format_derived_sort(&f.domain, &theory.theory.signature); + let codomain = format_derived_sort(&f.codomain, &theory.theory.signature); + (f.name.clone(), domain, codomain) + }) + .collect(), + relations: theory + .theory + .signature + .relations + .iter() + .map(|r| { + let domain = format_derived_sort(&r.domain, &theory.theory.signature); + (r.name.clone(), domain) + }) + .collect(), + instance_fields: theory + .theory + .signature + .instance_fields + .iter() + .map(|f| (f.name.clone(), f.theory_type.clone())) + .collect(), + axioms: theory + .theory + .axioms + .iter() + .map(|ax| format_axiom(ax, &theory.theory.signature)) + .collect(), + })); + } + + // Check instances + if let Some(entry) = self.instances.get(name) { + let theory = self.theories.get(&entry.theory_name)?; + + // Collect nested instance info + let nested: Vec<(String, usize)> = entry + .structure + .nested + .iter() + .map(|(field_name, nested_struct)| { + (field_name.clone(), nested_struct.len()) + }) + .collect(); + + return Some(InspectResult::Instance(InstanceDetail { + name: name.to_string(), + theory_name: entry.theory_name.clone(), + elements: self.collect_elements(entry, &theory.theory.signature), + functions: self.collect_function_values(entry, &theory.theory.signature), + relations: self.collect_relation_tuples(entry, &theory.theory.signature), + nested, + })); + } + + None + } + + /// Collect elements grouped by sort + fn collect_elements( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec)> { + let mut result = Vec::new(); + for (sort_id, sort_name) in signature.sorts.iter().enumerate() { + let elements: Vec = entry + .structure + .carriers[sort_id] + .iter() + .map(|slid_u64| { + let slid = Slid::from_usize(slid_u64 as usize); + entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)) + }) + .collect(); + if !elements.is_empty() { + result.push((sort_name.clone(), elements)); + } + } + result + } + + /// Collect function values as "domain func = codomain" + fn collect_function_values( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec)> { + use crate::core::FunctionColumn; + + let mut result = Vec::new(); + for (func_id, func_sym) in signature.functions.iter().enumerate() { + if func_id >= entry.structure.functions.len() { + continue; + } + let mut values = Vec::new(); + + match &func_sym.domain { + DerivedSort::Base(domain_sort_id) => { + // Check if this is a product codomain function + if let FunctionColumn::ProductCodomain { field_columns, field_names, .. } = + &entry.structure.functions[func_id] + { + // Product codomain: format as `domain func = [field1: v1, ...]` + for slid_u64 in entry.structure.carriers[*domain_sort_id].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = entry.structure.sort_local_id(slid); + let idx = sort_slid.index(); + + // Check if all fields are defined for this element + let all_defined = field_columns.iter().all(|col| { + col.get(idx) + .and_then(|opt| crate::id::get_slid(*opt)) + .is_some() + }); + + if all_defined { + let domain_name = entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)); + + let field_strs: Vec = field_names + .iter() + .zip(field_columns.iter()) + .map(|(name, col)| { + let codomain_slid = crate::id::get_slid(col[idx]).unwrap(); + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + format!("{}: {}", name, codomain_name) + }) + .collect(); + + values.push(format!( + "{} {} = [{}]", + domain_name, func_sym.name, field_strs.join(", ") + )); + } + } + } else { + // Base codomain: iterate over carrier elements + for slid_u64 in entry.structure.carriers[*domain_sort_id].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = entry.structure.sort_local_id(slid); + if let Some(codomain_slid) = + entry.structure.get_function(func_id, sort_slid) + { + let domain_name = entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid_u64)); + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + values.push(format!( + "{} {} = {}", + domain_name, func_sym.name, codomain_name + )); + } + } + } + } + DerivedSort::Product(fields) => { + // Product domain: iterate over defined entries in storage + if let FunctionColumn::ProductLocal { storage, .. } = + &entry.structure.functions[func_id] + { + for (tuple_indices, codomain_slid) in storage.iter_defined() { + // Map sort-local indices back to Slids for name lookup + let tuple_strs: Vec = tuple_indices + .iter() + .zip(fields.iter()) + .map(|(&local_idx, (field_name, field_sort))| { + // Get the Slid at this sort-local position + let slid = if let DerivedSort::Base(sort_id) = field_sort { + entry.structure.carriers[*sort_id] + .iter() + .nth(local_idx) + .map(|u| Slid::from_usize(u as usize)) + } else { + None + }; + + let elem_name = slid + .and_then(|s| entry.get_name(s).map(|n| n.to_string())) + .unwrap_or_else(|| format!("#{}", local_idx)); + + format!("{}: {}", field_name, elem_name) + }) + .collect(); + + let codomain_name = entry + .get_name(codomain_slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", codomain_slid)); + + values.push(format!( + "[{}] {} = {}", + tuple_strs.join(", "), + func_sym.name, + codomain_name + )); + } + } + } + } + + if !values.is_empty() { + result.push((func_sym.name.clone(), values)); + } + } + result + } + + /// Collect relation tuples as vectors of element names + fn collect_relation_tuples( + &self, + entry: &InstanceEntry, + signature: &crate::core::Signature, + ) -> Vec<(String, Vec, Vec>)> { + let mut result = Vec::new(); + + for (rel_id, rel_sym) in signature.relations.iter().enumerate() { + if rel_id >= entry.structure.relations.len() { + continue; + } + + // Extract field names from the relation's domain type + let field_names: Vec = match &rel_sym.domain { + crate::core::DerivedSort::Base(_) => vec![], // Unary relation, no field names + crate::core::DerivedSort::Product(fields) => { + fields.iter().map(|(name, _)| name.clone()).collect() + } + }; + + let relation = &entry.structure.relations[rel_id]; + let mut tuples: Vec> = Vec::new(); + + for tuple in relation.iter() { + let tuple_names: Vec = tuple + .iter() + .map(|&slid| { + entry + .get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid)) + }) + .collect(); + tuples.push(tuple_names); + } + + if !tuples.is_empty() { + result.push((rel_sym.name.clone(), field_names, tuples)); + } + } + + result + } + + /// Execute a query on an instance. + /// + /// Returns all elements of the given sort in the instance. + pub fn query_sort(&self, instance_name: &str, sort_name: &str) -> Result, String> { + // Get the instance + let entry = self.instances.get(instance_name) + .ok_or_else(|| format!("Instance '{}' not found", instance_name))?; + + // Get the theory + let theory = self.theories.get(&entry.theory_name) + .ok_or_else(|| format!("Theory '{}' not found", entry.theory_name))?; + + // Find the sort index + let sort_idx = theory.theory.signature.sorts + .iter() + .position(|s| s == sort_name) + .ok_or_else(|| format!("Sort '{}' not found in theory '{}'", sort_name, entry.theory_name))?; + + // Use the query backend to scan all elements + use crate::query::{QueryOp, execute}; + + let plan = QueryOp::Scan { sort_idx }; + let result = execute(&plan, &entry.structure); + + // Convert results to element names + let elements: Vec = result.iter() + .filter_map(|(tuple, _)| tuple.first()) + .map(|&slid| { + entry.get_name(slid) + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("#{}", slid)) + }) + .collect(); + + Ok(elements) + } +} + +/// Helper to extract theory name from a type expression +/// +/// For parameterized types like `ExampleNet Trace`, the theory is the rightmost +/// path element, not the first argument. +fn type_expr_to_theory_name(type_expr: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + // Handle special cases first + if type_expr.is_sort() { + return "Sort".to_string(); + } + if type_expr.is_prop() { + return "Prop".to_string(); + } + + // For instance types, recurse on the inner type + if let Some(inner) = type_expr.instance_inner() { + return type_expr_to_theory_name(&inner); + } + + // Find the last path token - that's the theory name + for token in type_expr.tokens.iter().rev() { + if let TypeToken::Path(path) = token { + return path.segments.join("/"); + } + } + + // Fallback for arrows, records, etc. + if type_expr.tokens.iter().any(|t| matches!(t, TypeToken::Arrow)) { + return "Arrow".to_string(); + } + if type_expr.as_record().is_some() { + return "Record".to_string(); + } + + "Unknown".to_string() +} + +/// Convert a type expression to its full string representation. +/// E.g., tokens [Path(ExampleNet), Path(problem0), Path(Solution)] -> "ExampleNet problem0 Solution" +fn type_expr_to_full_string(type_expr: &ast::TypeExpr) -> String { + use crate::ast::TypeToken; + + let mut parts: Vec = vec![]; + + for token in &type_expr.tokens { + match token { + TypeToken::Sort => parts.push("Sort".to_string()), + TypeToken::Prop => parts.push("Prop".to_string()), + TypeToken::Path(path) => parts.push(path.segments.join("/")), + TypeToken::Arrow => parts.push("->".to_string()), + TypeToken::Instance => parts.push("instance".to_string()), + TypeToken::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, ty)| format!("{}: {}", name, type_expr_to_full_string(ty))) + .collect(); + parts.push(format!("[{}]", field_strs.join(", "))); + } + } + } + + parts.join(" ") +} + +/// Format a DerivedSort as a string using sort names from the signature +fn format_derived_sort(ds: &DerivedSort, sig: &crate::core::Signature) -> String { + match ds { + DerivedSort::Base(sort_id) => sig + .sorts + .get(*sort_id) + .cloned() + .unwrap_or_else(|| format!("Sort#{}", sort_id)), + DerivedSort::Product(fields) => { + if fields.is_empty() { + "Unit".to_string() + } else { + let field_strs: Vec = fields + .iter() + .map(|(name, ds)| format!("{}: {}", name, format_derived_sort(ds, sig))) + .collect(); + format!("[{}]", field_strs.join(", ")) + } + } + } +} + +/// Format a core::Sequent (axiom) for display +fn format_axiom(ax: &crate::core::Sequent, sig: &crate::core::Signature) -> AxiomDetail { + let context: Vec<(String, String)> = ax + .context + .vars + .iter() + .map(|(name, sort)| (name.clone(), format_derived_sort(sort, sig))) + .collect(); + let premise = format_core_formula(&ax.premise, sig); + let conclusion = format_core_formula(&ax.conclusion, sig); + AxiomDetail { + context, + premise, + conclusion, + } +} + +/// Format a core::Term for display +fn format_core_term(term: &crate::core::Term, sig: &crate::core::Signature) -> String { + match term { + crate::core::Term::Var(name, _) => name.clone(), + crate::core::Term::App(func_id, arg) => { + let func_name = sig + .functions + .get(*func_id) + .map(|f| f.name.clone()) + .unwrap_or_else(|| format!("func#{}", func_id)); + format!("{} {}", format_core_term(arg, sig), func_name) + } + crate::core::Term::Record(fields) => { + let field_strs: Vec = fields + .iter() + .map(|(name, t)| format!("{}: {}", name, format_core_term(t, sig))) + .collect(); + format!("[{}]", field_strs.join(", ")) + } + crate::core::Term::Project(base, field) => { + format!("{} .{}", format_core_term(base, sig), field) + } + } +} + +/// Format a core::Formula for display +fn format_core_formula(formula: &crate::core::Formula, sig: &crate::core::Signature) -> String { + match formula { + crate::core::Formula::True => "true".to_string(), + crate::core::Formula::False => "false".to_string(), + crate::core::Formula::Eq(lhs, rhs) => { + format!( + "{} = {}", + format_core_term(lhs, sig), + format_core_term(rhs, sig) + ) + } + crate::core::Formula::Rel(rel_id, arg) => { + let rel_name = sig + .relations + .get(*rel_id) + .map(|r| r.name.clone()) + .unwrap_or_else(|| format!("rel#{}", rel_id)); + format!("{} {}", format_core_term(arg, sig), rel_name) + } + crate::core::Formula::Conj(conjuncts) => { + if conjuncts.is_empty() { + "true".to_string() + } else { + conjuncts + .iter() + .map(|f| format_core_formula(f, sig)) + .collect::>() + .join(", ") + } + } + crate::core::Formula::Disj(disjuncts) => { + if disjuncts.is_empty() { + "false".to_string() + } else { + disjuncts + .iter() + .map(|f| { + let s = format_core_formula(f, sig); + if matches!( + f, + crate::core::Formula::Conj(_) | crate::core::Formula::Disj(_) + ) { + format!("({})", s) + } else { + s + } + }) + .collect::>() + .join(" \\/ ") + } + } + crate::core::Formula::Exists(name, sort, body) => { + format!( + "(exists {} : {}. {})", + name, + format_derived_sort(sort, sig), + format_core_formula(body, sig) + ) + } + } +} + +/// Resolved query type with theory name and argument bindings. +struct ResolvedQueryType { + /// The base theory name (e.g., "Solution") + theory_name: String, + /// Param bindings: (param_name, instance_name) pairs + /// e.g., [("N", "ExampleNet"), ("RP", "problem0")] + arguments: Vec<(String, String)>, +} + +/// Result of processing a line of input +#[derive(Debug)] +pub enum InputResult { + MetaCommand(MetaCommand), + GeologInput(String), + Incomplete, + Empty, +} + +/// Meta-commands supported by the REPL +#[derive(Debug)] +pub enum MetaCommand { + Help(Option), + Quit, + List(ListTarget), + Inspect(String), + Clear, + Reset, + Source(PathBuf), + /// Commit current changes with optional message + Commit(Option), + /// Show commit history + History, + /// Add element to instance: `:add ` + Add { instance: String, element: String, sort: String }, + /// Assert relation tuple: `:assert ` + Assert { instance: String, relation: String, args: Vec }, + /// Retract element from instance: `:retract ` + Retract { instance: String, element: String }, + /// Query instance: `:query [filter conditions]` + Query { instance: String, sort: String }, + /// Explain query plan: `:explain ` + Explain { instance: String, sort: String }, + /// Compile query to RelAlgIR: `:compile ` + Compile { instance: String, sort: String }, + /// Solve: find an instance of a theory using the geometric logic solver + /// `:solve [budget_ms]` + Solve { theory: String, budget_ms: Option }, + /// Extend: find extensions of an existing instance to a (larger) theory + /// `:extend [budget_ms]` + Extend { instance: String, theory: String, budget_ms: Option }, + /// Chase: run chase algorithm on instance to compute derived relations/functions + /// `:chase [max_iterations]` + Chase { instance: String, max_iterations: Option }, + Unknown(String), +} + +impl MetaCommand { + pub fn parse(input: &str) -> Self { + let input = input.trim_start_matches(':').trim(); + let mut parts = input.split_whitespace(); + let cmd = parts.next().unwrap_or(""); + let arg = parts.next(); + + match cmd { + "help" | "h" | "?" => MetaCommand::Help(arg.map(String::from)), + "quit" | "q" | "exit" => MetaCommand::Quit, + "list" | "ls" | "l" => { + let target = match arg { + Some("theories" | "theory" | "t") => ListTarget::Theories, + Some("instances" | "instance" | "i") => ListTarget::Instances, + _ => ListTarget::All, + }; + MetaCommand::List(target) + } + "inspect" | "i" | "show" => { + if let Some(name) = arg { + MetaCommand::Inspect(name.to_string()) + } else { + MetaCommand::Unknown(":inspect requires a name".to_string()) + } + } + "clear" | "cls" => MetaCommand::Clear, + "reset" => MetaCommand::Reset, + "source" | "load" => { + if let Some(path) = arg { + MetaCommand::Source(PathBuf::from(path)) + } else { + MetaCommand::Unknown(":source requires a file path".to_string()) + } + } + "commit" | "ci" => { + // Collect remaining args as message + let message: Vec<&str> = parts.collect(); + let msg = if let Some(first) = arg { + let mut full_msg = first.to_string(); + for part in message { + full_msg.push(' '); + full_msg.push_str(part); + } + Some(full_msg) + } else { + None + }; + MetaCommand::Commit(msg) + } + "history" | "log" => MetaCommand::History, + "add" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 3 { + MetaCommand::Add { + instance: args[0].to_string(), + element: args[1].to_string(), + sort: args[2].to_string(), + } + } else { + MetaCommand::Unknown(":add requires ".to_string()) + } + } + "assert" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Assert { + instance: args[0].to_string(), + relation: args[1].to_string(), + args: args[2..].iter().map(|s| s.to_string()).collect(), + } + } else { + MetaCommand::Unknown(":assert requires [args...]".to_string()) + } + } + "retract" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Retract { + instance: args[0].to_string(), + element: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":retract requires ".to_string()) + } + } + "query" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Query { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":query requires ".to_string()) + } + } + "explain" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Explain { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":explain requires ".to_string()) + } + } + "compile" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + MetaCommand::Compile { + instance: args[0].to_string(), + sort: args[1].to_string(), + } + } else { + MetaCommand::Unknown(":compile requires ".to_string()) + } + } + "solve" => { + if let Some(theory_name) = arg { + // Optional budget in ms + let budget_ms = parts.next().and_then(|s| s.parse().ok()); + MetaCommand::Solve { + theory: theory_name.to_string(), + budget_ms, + } + } else { + MetaCommand::Unknown(":solve requires [budget_ms]".to_string()) + } + } + "extend" => { + let args: Vec<&str> = std::iter::once(arg).flatten().chain(parts).collect(); + if args.len() >= 2 { + let budget_ms = args.get(2).and_then(|s| s.parse().ok()); + MetaCommand::Extend { + instance: args[0].to_string(), + theory: args[1].to_string(), + budget_ms, + } + } else { + MetaCommand::Unknown(":extend requires [budget_ms]".to_string()) + } + } + "chase" => { + if let Some(instance_name) = arg { + let max_iterations = parts.next().and_then(|s| s.parse().ok()); + MetaCommand::Chase { + instance: instance_name.to_string(), + max_iterations, + } + } else { + MetaCommand::Unknown(":chase requires [max_iterations]".to_string()) + } + } + other => MetaCommand::Unknown(format!("Unknown command: :{}", other)), + } + } +} + +#[derive(Debug)] +pub enum ListTarget { + Theories, + Instances, + All, +} + +#[derive(Debug)] +pub enum ExecuteResult { + Namespace(String), + Theory { + name: String, + num_sorts: usize, + num_functions: usize, + num_relations: usize, + num_axioms: usize, + }, + Instance { + name: String, + theory_name: String, + num_elements: usize, + }, + Query(QueryResult), +} + +/// Result of executing a query +#[derive(Debug)] +pub enum QueryResult { + /// Found a satisfying instance + Found { + query_name: String, + theory_name: String, + model: crate::core::Structure, + time_ms: f64, + }, + /// No solution exists + Unsat { + query_name: String, + theory_name: String, + time_ms: f64, + }, + /// Search incomplete (timeout or other reason) + Incomplete { + query_name: String, + theory_name: String, + reason: String, + time_ms: f64, + }, +} + +#[derive(Debug)] +pub struct TheoryInfo { + pub name: String, + pub num_sorts: usize, + pub num_functions: usize, + pub num_relations: usize, + pub num_axioms: usize, +} + +#[derive(Debug)] +pub struct InstanceInfo { + pub name: String, + pub theory_name: String, + pub num_elements: usize, +} + +#[derive(Debug)] +pub struct TheoryDetail { + pub name: String, + pub params: Vec<(String, String)>, + pub sorts: Vec, + pub functions: Vec<(String, String, String)>, + pub relations: Vec<(String, String)>, + /// Instance fields: (name, theory_type) + pub instance_fields: Vec<(String, String)>, + pub axioms: Vec, +} + +#[derive(Debug)] +pub struct AxiomDetail { + pub context: Vec<(String, String)>, + pub premise: String, + pub conclusion: String, +} + +#[derive(Debug)] +pub struct InstanceDetail { + pub name: String, + pub theory_name: String, + pub elements: Vec<(String, Vec)>, + pub functions: Vec<(String, Vec)>, + /// Relations: (name, field_names, list of tuples-as-element-names) + pub relations: Vec<(String, Vec, Vec>)>, + /// Nested instances: (field_name, element_count) + pub nested: Vec<(String, usize)>, +} + +#[derive(Debug)] +pub enum InspectResult { + Theory(TheoryDetail), + Instance(InstanceDetail), +} + +/// Format instance detail as geolog-like syntax +pub fn format_instance_detail(detail: &InstanceDetail) -> String { + let mut out = String::new(); + out.push_str(&format!( + "instance {} : {} = {{\n", + detail.name, detail.theory_name + )); + + for (sort_name, elements) in &detail.elements { + out.push_str(&format!(" // {} ({}):\n", sort_name, elements.len())); + for elem in elements { + out.push_str(&format!(" {} : {};\n", elem, sort_name)); + } + } + + for (func_name, values) in &detail.functions { + if !values.is_empty() { + out.push_str(&format!(" // {}:\n", func_name)); + for value in values { + out.push_str(&format!(" {};\n", value)); + } + } + } + + // Relations + for (rel_name, field_names, tuples) in &detail.relations { + if !tuples.is_empty() { + out.push_str(&format!(" // {} ({} tuples):\n", rel_name, tuples.len())); + for tuple in tuples { + if field_names.is_empty() { + // Unary relation: just the element name + out.push_str(&format!(" {} {};\n", tuple.join(", "), rel_name)); + } else { + // Multi-ary relation: format as [field1: val1, field2: val2] rel_name; + let formatted: Vec = field_names + .iter() + .zip(tuple.iter()) + .map(|(fname, val)| format!("{}: {}", fname, val)) + .collect(); + out.push_str(&format!(" [{}] {};\n", formatted.join(", "), rel_name)); + } + } + } + } + + // Nested instances + if !detail.nested.is_empty() { + out.push_str(" // Nested instances:\n"); + for (field_name, element_count) in &detail.nested { + out.push_str(&format!(" {} = {{ /* {} elements */ }};\n", field_name, element_count)); + } + } + + out.push_str("}\n"); + out +} + +/// Format theory detail +pub fn format_theory_detail(detail: &TheoryDetail) -> String { + let mut out = String::new(); + + out.push_str("theory "); + for (param_name, theory_name) in &detail.params { + if theory_name == "Sort" { + out.push_str(&format!("({} : Sort) ", param_name)); + } else { + out.push_str(&format!("({} : {} instance) ", param_name, theory_name)); + } + } + out.push_str(&format!("{} {{\n", detail.name)); + + for sort in &detail.sorts { + out.push_str(&format!(" {} : Sort;\n", sort)); + } + + for (name, domain, codomain) in &detail.functions { + out.push_str(&format!(" {} : {} -> {};\n", name, domain, codomain)); + } + + for (name, domain) in &detail.relations { + out.push_str(&format!(" {} : {} -> Prop;\n", name, domain)); + } + + for (name, theory_type) in &detail.instance_fields { + out.push_str(&format!(" {} : {} instance;\n", name, theory_type)); + } + + for axiom in &detail.axioms { + let quantified: Vec = axiom + .context + .iter() + .map(|(name, sort)| format!("{} : {}", name, sort)) + .collect(); + + if axiom.premise == "true" { + out.push_str(&format!( + " forall {}. |- {};\n", + quantified.join(", "), + axiom.conclusion + )); + } else { + out.push_str(&format!( + " forall {}. {} |- {};\n", + quantified.join(", "), + axiom.premise, + axiom.conclusion + )); + } + } + + out.push_str("}\n"); + out +} diff --git a/src/serialize.rs b/src/serialize.rs new file mode 100644 index 0000000..fbcea7b --- /dev/null +++ b/src/serialize.rs @@ -0,0 +1,294 @@ +//! Structure serialization and deserialization. +//! +//! Provides rkyv-based serialization for `Structure` with both: +//! - `save_structure` / `load_structure`: heap-allocated deserialization +//! - `load_structure_mapped`: zero-copy memory-mapped access + +use std::fs::{self, File}; +use std::io::Write; +use std::path::Path; + +use memmap2::Mmap; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::ser::Serializer; +use rkyv::{check_archived_root, Archive, Deserialize, Serialize}; + +use crate::core::{FunctionColumn, ProductStorage, RelationStorage, SortId, Structure, TupleId, VecRelation}; +use crate::id::{get_luid, get_slid, some_luid, some_slid, Luid, NumericId, Slid}; + +// ============================================================================ +// SERIALIZABLE DATA TYPES +// ============================================================================ + +/// Serializable form of a relation +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct RelationData { + pub arity: usize, + pub tuples: Vec>, + pub extent: Vec, +} + +/// Serializable form of a function column +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub enum FunctionColumnData { + Local(Vec>), + External(Vec>), + /// Product domain: maps tuples of sort-local indices to result Slid index, + /// along with the field sort IDs for reconstruction + ProductLocal { + entries: Vec<(Vec, usize)>, + field_sorts: Vec, + }, + /// Product codomain: base domain maps to multiple fields + ProductCodomain { + /// One column per field - each Vec> is indexed by domain sort-local index + field_columns: Vec>>, + field_names: Vec, + field_sorts: Vec, + domain_sort: usize, + }, +} + +/// Serializable form of a Structure +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +pub struct StructureData { + pub num_sorts: usize, + pub luids: Vec, + pub sorts: Vec, + pub functions: Vec, + pub relations: Vec, +} + +impl StructureData { + pub fn from_structure(structure: &Structure) -> Self { + let functions = structure + .functions + .iter() + .map(|func_col| match func_col { + FunctionColumn::Local(col) => FunctionColumnData::Local( + col.iter() + .map(|&opt| get_slid(opt).map(|s| s.index())) + .collect(), + ), + FunctionColumn::External(col) => FunctionColumnData::External( + col.iter() + .map(|&opt| get_luid(opt).map(|l| l.index())) + .collect(), + ), + FunctionColumn::ProductLocal { + storage, + field_sorts, + } => { + let entries: Vec<(Vec, usize)> = storage + .iter_defined() + .map(|(tuple, result)| (tuple, result.index())) + .collect(); + FunctionColumnData::ProductLocal { + entries, + field_sorts: field_sorts.clone(), + } + } + FunctionColumn::ProductCodomain { + field_columns, + field_names, + field_sorts, + domain_sort, + } => { + let serialized_columns: Vec>> = field_columns + .iter() + .map(|col| { + col.iter() + .map(|&opt| get_slid(opt).map(|s| s.index())) + .collect() + }) + .collect(); + FunctionColumnData::ProductCodomain { + field_columns: serialized_columns, + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + }) + .collect(); + + let relations = structure + .relations + .iter() + .map(|rel| RelationData { + arity: rel.arity(), + tuples: rel.tuples.clone(), + extent: rel.iter_ids().collect(), + }) + .collect(); + + Self { + num_sorts: structure.num_sorts(), + luids: structure.luids.clone(), + sorts: structure.sorts.clone(), + functions, + relations, + } + } + + pub fn to_structure(&self) -> Structure { + use crate::id::NumericId; + + let mut structure = Structure::new(self.num_sorts); + + for (slid_idx, (&luid, &sort_id)) in self.luids.iter().zip(self.sorts.iter()).enumerate() { + let added_slid = structure.add_element_with_luid(luid, sort_id); + debug_assert_eq!(added_slid, Slid::from_usize(slid_idx)); + } + + structure.functions = self + .functions + .iter() + .map(|func_data| match func_data { + FunctionColumnData::Local(col) => FunctionColumn::Local( + col.iter() + .map(|&opt| opt.map(Slid::from_usize).and_then(some_slid)) + .collect(), + ), + FunctionColumnData::External(col) => FunctionColumn::External( + col.iter() + .map(|&opt| opt.map(Luid::from_usize).and_then(some_luid)) + .collect(), + ), + FunctionColumnData::ProductLocal { + entries, + field_sorts, + } => { + let mut storage = ProductStorage::new_general(); + for (tuple, result) in entries { + storage + .set(tuple, Slid::from_usize(*result)) + .expect("no conflicts in serialized data"); + } + FunctionColumn::ProductLocal { + storage, + field_sorts: field_sorts.clone(), + } + } + FunctionColumnData::ProductCodomain { + field_columns, + field_names, + field_sorts, + domain_sort, + } => { + let restored_columns: Vec> = field_columns + .iter() + .map(|col| { + col.iter() + .map(|&opt| opt.map(Slid::from_usize).and_then(some_slid)) + .collect() + }) + .collect(); + FunctionColumn::ProductCodomain { + field_columns: restored_columns, + field_names: field_names.clone(), + field_sorts: field_sorts.clone(), + domain_sort: *domain_sort, + } + } + }) + .collect(); + + structure.relations = self + .relations + .iter() + .map(|rel_data| { + let mut rel = VecRelation::new(rel_data.arity); + for tuple in &rel_data.tuples { + rel.tuple_to_id.insert(tuple.clone(), rel.tuples.len()); + rel.tuples.push(tuple.clone()); + } + for &tuple_id in &rel_data.extent { + rel.extent.insert(tuple_id as u64); + } + rel + }) + .collect(); + + structure + } +} + +// ============================================================================ +// SAVE / LOAD FUNCTIONS +// ============================================================================ + +/// Save a Structure to a file +pub fn save_structure(structure: &Structure, path: &Path) -> Result<(), String> { + let data = StructureData::from_structure(structure); + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("Failed to create directory: {}", e))?; + } + + let mut serializer = AllocSerializer::<4096>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize structure: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let temp_path = path.with_extension("tmp"); + { + let mut file = + File::create(&temp_path).map_err(|e| format!("Failed to create temp file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write file: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync file: {}", e))?; + } + + fs::rename(&temp_path, path).map_err(|e| format!("Failed to rename file: {}", e))?; + + Ok(()) +} + +/// Load a Structure from a file (deserializes into heap-allocated Structure) +/// +/// Use this when you need a mutable Structure or when access patterns involve +/// heavy computation on the data. For read-only access to large structures, +/// prefer `load_structure_mapped` which is ~100-400x faster. +pub fn load_structure(path: &Path) -> Result { + let file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?; + + let mmap = unsafe { Mmap::map(&file) }.map_err(|e| format!("Failed to mmap file: {}", e))?; + + if mmap.is_empty() { + return Err("Empty structure file".to_string()); + } + + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate archive: {}", e))?; + + let data: StructureData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize structure")?; + + Ok(data.to_structure()) +} + +/// Load a Structure from a file with zero-copy access (memory-mapped) +/// +/// This is ~100-400x faster than `load_structure` for large structures because +/// it doesn't deserialize the data - it accesses the archived format directly +/// from the memory map. +/// +/// Use this for: +/// - Read-only access to large structures +/// - Fast startup when you just need to query existing data +/// - Reducing memory footprint (only the mmap exists, no heap copies) +/// +/// Trade-offs: +/// - Read-only (cannot modify the structure) +/// - Slightly different API (returns `MappedStructure` instead of `Structure`) +/// - File must remain valid for lifetime of `MappedStructure` +pub fn load_structure_mapped(path: &Path) -> Result { + crate::zerocopy::MappedStructure::open(path) +} diff --git a/src/solver/mod.rs b/src/solver/mod.rs new file mode 100644 index 0000000..3cc87cc --- /dev/null +++ b/src/solver/mod.rs @@ -0,0 +1,415 @@ +//! Solver infrastructure for instance synthesis +//! +//! This module provides the search tree and tactics for finding instances +//! of geometric theories. The architecture is designed for AI-agent-driven +//! search: the agent manipulates an explicit search tree, running automated +//! tactics for bounded time and providing strategic guidance. +//! +//! # Key Concepts +//! +//! - **Search Tree**: Explicit tree of partial models, not implicit in call stack +//! - **Partial Model**: A `Structure` where carriers can grow, functions can become +//! more defined, and relations can have more tuples asserted +//! - **Refinement**: Natural preorder on Structures (really a category of partial +//! models with refinement morphisms) +//! - **Obligation**: When an axiom's premise is satisfied but conclusion isn't, +//! we have an obligation to witness the conclusion (not a failure!) +//! - **Tactic**: Automated search strategy that runs for bounded time +//! - **Agent Loop**: AI decides which nodes to explore, provides hints, estimates +//! success probabilities, allocates resources +//! +//! # The Refinement Order +//! +//! A Structure S₁ refines to S₂ (S₁ ≤ S₂) when: +//! - All carriers in S₁ are subsets of corresponding carriers in S₂ +//! - All defined function values in S₁ are preserved in S₂ +//! - All asserted relation tuples in S₁ are preserved in S₂ +//! +//! A search node conjectures: "∃ complete, axiom-satisfying Structure above this one" +//! +//! # Obligations, Equations, and Derivations +//! +//! In geometric logic, axiom consequents are always positive (existentials, +//! disjunctions, atomic relations, equations). The refinement order on partial +//! models includes not just adding facts, but also **quotienting by equations** +//! (merging elements). This means: +//! +//! - **Obligation**: Premise satisfied, conclusion not yet witnessed → need to +//! witness. Can always potentially be done by refinement. +//! +//! - **Pending Equation**: Two terms must be equal. Resolved by merging elements +//! and propagating consequences (congruence closure). +//! +//! - **Unsat**: The ONLY way to get true unsatisfiability is if there exists a +//! **Derivation** of `⊢ False` from the instantiated axioms. This is +//! proof-theoretic: we need to actually derive False, not just have "conflicts". +//! +//! For example, "function f already maps a to b, but we need f(a) = c" is NOT +//! unsat—it's a pending equation `b = c`. We merge b and c, propagate, and only +//! if this leads to deriving False (via some axiom like `R(x), S(x) ⊢ False`) +//! do we have true unsatisfiability. +//! +//! This is essentially SMT solving with EUF (equality + uninterpreted functions) +//! plus geometric theory axioms, where the goal is to either: +//! 1. Find a complete model satisfying all axioms, or +//! 2. Derive `⊢ False` proving no such model exists +//! +//! # Unified Model Enumeration API +//! +//! The high-level API unifies `:solve` and `:query` under a common abstraction: +//! finding maximal elements of the posetal reflection of the category of models. +//! +//! - [`solve`]: Find models from scratch (base = empty structure) +//! - [`query`]: Find extensions of an existing model to a larger theory +//! - [`enumerate_models`]: Core unified function (both above are wrappers) +//! +//! ```ignore +//! // Find any model of a theory +//! let result = solve(theory, Budget::quick()); +//! +//! // Extend an existing model to satisfy additional axioms +//! let result = query(base_structure, universe, extended_theory, budget); +//! ``` + +mod tactics; +mod tree; +mod types; + +// Re-export main types +pub use tactics::{AutoTactic, Budget, CheckTactic, EnumerateFunctionTactic, ForwardChainingTactic, PropagateEquationsTactic, Tactic, TacticResult}; +pub use tree::SearchTree; +pub use types::{ + AxiomCheckResult, ConflictClause, CongruenceClosure, EquationReason, NodeDetail, NodeId, + NodeStatus, Obligation, PendingEquation, SearchNode, SearchSummary, +}; + +// Unified model enumeration API (see below) +// - enumerate_models: core unified function +// - solve: convenience for :solve (find models from scratch) +// - query: convenience for :query (extend existing models) +// - EnumerationResult: result type + +// Re-export union-find for convenience +pub use egglog_union_find::UnionFind; + +// ============================================================================ +// UNIFIED MODEL ENUMERATION API +// ============================================================================ + +use std::rc::Rc; +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::universe::Universe; + +/// Result of model enumeration. +#[derive(Debug, Clone)] +pub enum EnumerationResult { + /// Found a complete model satisfying all axioms. + Found { + /// The witness structure (model). + model: Structure, + /// Time taken in milliseconds. + time_ms: f64, + }, + /// Proved no model exists (derived False). + Unsat { + /// Time taken in milliseconds. + time_ms: f64, + }, + /// Search incomplete (budget exhausted or still has obligations). + Incomplete { + /// Partial structure so far. + partial: Structure, + /// Time taken in milliseconds. + time_ms: f64, + /// Description of why incomplete. + reason: String, + }, +} + +/// Unified model enumeration: find models of `theory` extending `base`. +/// +/// This is the core API that unifies `:solve` and `:query`: +/// - `:solve T` = `enumerate_models(empty, T, budget)` +/// - `:query M T'` = `enumerate_models(M, T', budget)` where T' extends M's theory +/// +/// # Arguments +/// - `base`: Starting structure (empty for `:solve`, existing model for `:query`) +/// - `universe`: Universe for Luid allocation (should contain Luids from base) +/// - `theory`: The theory to satisfy +/// - `budget`: Resource limits for the search +/// +/// # Returns +/// - `Found` if a complete model was found +/// - `Unsat` if no model exists (derived False) +/// - `Incomplete` if budget exhausted or search blocked +pub fn enumerate_models( + base: Structure, + universe: Universe, + theory: Rc, + budget: Budget, +) -> EnumerationResult { + let start = std::time::Instant::now(); + let sig = &theory.theory.signature; + + // Create search tree from base + let mut tree = SearchTree::from_base(theory.clone(), base, universe); + + // Initialize function and relation storage at root (if not already initialized) + // This preserves any function values that were imported from param instances. + let num_funcs = sig.functions.len(); + let num_rels = sig.relations.len(); + + // Only init functions if not already initialized (or wrong size) + if tree.nodes[0].structure.functions.len() != num_funcs { + let domain_sort_ids: Vec> = sig + .functions + .iter() + .map(|f| match &f.domain { + DerivedSort::Base(sid) => Some(*sid), + DerivedSort::Product(_) => None, + }) + .collect(); + + if tree.init_functions(0, &domain_sort_ids).is_err() { + return EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms: start.elapsed().as_secs_f64() * 1000.0, + reason: "Failed to initialize function storage".to_string(), + }; + } + } + + // Only init relations if not already initialized (or wrong size) + if tree.nodes[0].structure.relations.len() != num_rels { + let arities: Vec = sig + .relations + .iter() + .map(|r| match &r.domain { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + }) + .collect(); + + if tree.init_relations(0, &arities).is_err() { + return EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms: start.elapsed().as_secs_f64() * 1000.0, + reason: "Failed to initialize relation storage".to_string(), + }; + } + } + + // Run AutoTactic + let result = AutoTactic.run(&mut tree, 0, &budget); + let time_ms = start.elapsed().as_secs_f64() * 1000.0; + + match result { + TacticResult::Solved => EnumerationResult::Found { + model: tree.nodes[0].structure.clone(), + time_ms, + }, + TacticResult::Unsat(_) => EnumerationResult::Unsat { time_ms }, + TacticResult::HasObligations(obs) => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Has {} unfulfilled obligations", obs.len()), + }, + TacticResult::Progress { steps_taken, .. } => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Made progress ({} steps) but not complete", steps_taken), + }, + TacticResult::Timeout { steps_taken } => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Timeout after {} steps", steps_taken), + }, + TacticResult::Error(e) => EnumerationResult::Incomplete { + partial: tree.nodes[0].structure.clone(), + time_ms, + reason: format!("Error: {}", e), + }, + } +} + +/// Convenience: solve a theory from scratch (find any model). +/// +/// Equivalent to `enumerate_models(empty_structure, Universe::new(), theory, budget)`. +pub fn solve(theory: Rc, budget: Budget) -> EnumerationResult { + let num_sorts = theory.theory.signature.sorts.len(); + let base = Structure::new(num_sorts); + enumerate_models(base, Universe::new(), theory, budget) +} + +/// Convenience: query/extend an existing model. +/// +/// Equivalent to `enumerate_models(base, universe, extension_theory, budget)`. +pub fn query( + base: Structure, + universe: Universe, + extension_theory: Rc, + budget: Budget, +) -> EnumerationResult { + enumerate_models(base, universe, extension_theory, budget) +} + +#[cfg(test)] +mod unified_api_tests { + use super::*; + use crate::core::{Context, Formula, RelationStorage, Sequent, Signature, Term, Theory}; + + fn make_existential_theory() -> Rc { + // Theory: Node sort, R relation + // Axiom: True |- ∃x:Node. R(x) + let mut sig = Signature::new(); + let node = sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(node)); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(node), + Box::new(Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(node)))), + ), + }; + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }) + } + + #[test] + fn test_solve_finds_model() { + // solve = enumerate_models with empty base + let theory = make_existential_theory(); + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // Should have at least one element with R + assert!(model.carrier_size(0) >= 1); + assert!(!model.relations[0].is_empty()); + } + other => panic!("Expected Found, got {:?}", other), + } + } + + #[test] + fn test_query_extends_base() { + // query = enumerate_models with existing base + let theory = make_existential_theory(); + + // Create base with one element, R not yet holding + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (_elem, _) = base.add_element(&mut universe, 0); + base.init_relations(&[1]); + + // query should extend the base to satisfy the axiom + let result = query(base, universe, theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // R should now have at least one tuple + assert!(!model.relations[0].is_empty()); + } + other => panic!("Expected Found, got {:?}", other), + } + } + + #[test] + fn test_unification_equivalence() { + // Demonstrate: solve(T) = enumerate_models(empty, T) + let theory = make_existential_theory(); + let budget = Budget::quick(); + + // Method 1: solve + let result1 = solve(theory.clone(), budget.clone()); + + // Method 2: enumerate_models with empty base + let num_sorts = theory.theory.signature.sorts.len(); + let empty_base = Structure::new(num_sorts); + let result2 = enumerate_models(empty_base, Universe::new(), theory, budget); + + // Both should succeed (find a model) + match (&result1, &result2) { + (EnumerationResult::Found { .. }, EnumerationResult::Found { .. }) => { + // Both found models - the unification works! + } + _ => panic!( + "Expected both to find models, got {:?} and {:?}", + result1, result2 + ), + } + } + + #[test] + fn test_solve_unsat_theory() { + // Theory that derives False: forall a:A. |- false + let mut sig = Signature::new(); + let _sort_a = sig.add_sort("A".to_string()); + + // Axiom: forall a:A. |- false + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }); + + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Unsat { .. } => { + // Correctly detected UNSAT + } + other => panic!("Expected Unsat, got {:?}", other), + } + } + + #[test] + fn test_solve_trivial_theory() { + // Theory with no axioms - should be trivially satisfied by empty structure + let mut sig = Signature::new(); + sig.add_sort("A".to_string()); + sig.add_sort("B".to_string()); + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Trivial".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }); + + let result = solve(theory, Budget::quick()); + + match result { + EnumerationResult::Found { model, .. } => { + // Empty structure is a valid model + assert_eq!(model.carrier_size(0), 0); + assert_eq!(model.carrier_size(1), 0); + } + other => panic!("Expected Found with empty model, got {:?}", other), + } + } +} diff --git a/src/solver/tactics.rs b/src/solver/tactics.rs new file mode 100644 index 0000000..9a25eca --- /dev/null +++ b/src/solver/tactics.rs @@ -0,0 +1,1398 @@ +//! Tactics for automated search. + +use crate::id::{NumericId, Slid}; + +use super::tree::SearchTree; +use super::types::{ConflictClause, NodeId, Obligation}; + +/// Budget for tactic execution +#[derive(Clone, Debug)] +pub struct Budget { + /// Maximum wall-clock time in milliseconds + pub time_ms: u64, + /// Maximum number of refinement steps + pub steps: usize, +} + +impl Budget { + pub fn new(time_ms: u64, steps: usize) -> Self { + Self { time_ms, steps } + } + + /// A short budget for quick checks + pub fn quick() -> Self { + Self { + time_ms: 100, + steps: 100, + } + } + + /// A medium budget for exploratory search + pub fn medium() -> Self { + Self { + time_ms: 1000, + steps: 1000, + } + } + + /// A longer budget for deeper search + pub fn long() -> Self { + Self { + time_ms: 5000, + steps: 10000, + } + } +} + +/// Result of running a tactic +#[derive(Clone, Debug)] +pub enum TacticResult { + /// Found a valid complete instance at this node + Solved, + /// Proved this node has no solution (with optional conflict clause) + /// This only happens when fulfilling obligations would CONFLICT with + /// existing commitments, not merely because axioms aren't yet satisfied. + Unsat(Option), + /// Axioms have unsatisfied consequents that need to be witnessed. + /// This is NOT failure—the agent should fulfill these obligations + /// (add elements, define functions, assert relations) to make progress. + HasObligations(Vec), + /// Made progress, can continue with more budget + Progress { + /// Number of refinement steps taken + steps_taken: usize, + /// Number of branches created + branches_created: usize, + }, + /// Budget exhausted without conclusive result + Timeout { + /// Where we got to + steps_taken: usize, + }, + /// Error during execution + Error(String), +} + +/// A tactic for automated search +/// +/// Tactics implement specific search strategies. They run for bounded time/steps +/// and return a result. The agent orchestrates tactics across the search tree. +pub trait Tactic { + /// Run the tactic on a node with the given budget + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult; + + /// Human-readable name for this tactic + fn name(&self) -> &str; +} + +// ============================================================================ +// BUILT-IN TACTICS +// ============================================================================ + +/// Check tactic: check axioms and report obligations +/// +/// In geometric logic, axiom "violations" are really OBLIGATIONS to fulfill. +/// The consequent is always positive, so we can potentially satisfy it by +/// adding elements, defining functions, or asserting relations. +/// +/// This tactic checks current state and returns: +/// - Solved if complete and all axioms satisfied +/// - HasObligations if there are consequents to witness +/// - Progress if incomplete but no current obligations +pub struct CheckTactic; + +impl Tactic for CheckTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, _budget: &Budget) -> TacticResult { + match tree.check_axioms(node) { + Ok(()) => { + // No violations - check if complete + match tree.is_complete(node) { + Ok(true) => { + tree.mark_solved(node); + TacticResult::Solved + } + Ok(false) => { + // Model is incomplete (e.g., undefined functions) but no axiom violations. + // Don't report progress - we need external help (function enumeration) + // or the model can be considered valid with partial functions. + TacticResult::Progress { + steps_taken: 0, + branches_created: 0, + } + } + Err(e) => TacticResult::Error(e), + } + } + Err(violations) => { + // Convert violations to obligations + // Violations mean: premise satisfied, conclusion not yet satisfied + // This is an OBLIGATION to witness the conclusion, not unsat! + let obligations: Vec = violations + .iter() + .flat_map(|(axiom_idx, viols)| { + viols.iter().map(move |v| Obligation { + axiom_idx: *axiom_idx, + // Convert variable assignment to (name, sort_id, slid) + // For now, we don't have sort info in Violation, so approximate + assignment: v + .variable_names + .iter() + .zip(v.assignment.iter()) + .map(|(name, &idx)| (name.clone(), 0, Slid::from_usize(idx))) + // sort_id=0 is placeholder + .collect(), + description: format!( + "Axiom {} needs consequent witnessed for assignment {:?}", + axiom_idx, v.assignment + ), + }) + }) + .collect(); + + TacticResult::HasObligations(obligations) + } + } + } + + fn name(&self) -> &str { + "check" + } +} + +/// Enumeration tactic: try all values for an undefined function +pub struct EnumerateFunctionTactic { + pub func_id: usize, +} + +impl Tactic for EnumerateFunctionTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut steps = 0; + let mut branches = 0; + + let sig = tree.signature().clone(); + let func_sym = match sig.functions.get(self.func_id) { + Some(f) => f, + None => return TacticResult::Error("Invalid function ID".to_string()), + }; + + // Get domain and codomain sorts + let (domain_sort, codomain_sort) = match (&func_sym.domain, &func_sym.codomain) { + (crate::core::DerivedSort::Base(d), crate::core::DerivedSort::Base(c)) => (*d, *c), + _ => return TacticResult::Error("Only base sorts supported for now".to_string()), + }; + + // Find undefined function applications + let node_ref = match tree.get(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + + if self.func_id >= node_ref.structure.functions.len() { + return TacticResult::Error("Function storage not initialized".to_string()); + } + + // Find first undefined domain element (for local functions) + let mut undefined_domain: Option = None; + for slid_u64 in node_ref.structure.carriers[domain_sort].iter() { + let slid = Slid::from_usize(slid_u64 as usize); + let sort_slid = node_ref.structure.sort_local_id(slid); + if node_ref + .structure + .get_function(self.func_id, sort_slid) + .is_none() + { + undefined_domain = Some(slid); + break; + } + } + + let domain_slid = match undefined_domain { + Some(d) => d, + None => { + // All defined - check current state + return CheckTactic.run(tree, node, budget); + } + }; + + // Enumerate codomain values + let codomain_elements: Vec = node_ref.structure.carriers[codomain_sort] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect(); + + if codomain_elements.is_empty() { + return TacticResult::Error("Empty codomain - need to add elements first".to_string()); + } + + // Create a branch for each possible value + for &codomain_slid in &codomain_elements { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + let child = tree.branch( + node, + Some(format!( + "f{}({})={}", + self.func_id, domain_slid, codomain_slid + )), + ); + + if let Err(e) = tree.define_function(child, self.func_id, domain_slid, codomain_slid) { + tree.mark_unsat( + child, + Some(ConflictClause { + required_elements: vec![], + required_functions: vec![], + required_relations: vec![], + violated_axiom: None, + explanation: Some(e), + }), + ); + } + + steps += 1; + branches += 1; + } + + // Mark parent as non-leaf (it has children now) + // Parent stays Open but is no longer on frontier + + TacticResult::Progress { + steps_taken: steps, + branches_created: branches, + } + } + + fn name(&self) -> &str { + "enumerate_function" + } +} + +/// Forward chaining tactic: automatically fulfill simple obligations. +/// +/// When an axiom's premise is satisfied but conclusion isn't, we have an obligation. +/// This tactic automatically fulfills simple obligations: +/// - **Relation assertions**: assert the relation tuple +/// - **Equations**: add to pending equations in congruence closure +/// - **Existentials**: add a fresh witness element (then recurse) +/// - **Disjunctions**: create branches (one per disjunct) +/// - **False**: mark as unsat (derivation of False found!) +/// +/// This is Datalog-style forward chaining for geometric logic. +pub struct ForwardChainingTactic; + +impl Tactic for ForwardChainingTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + use crate::core::Formula; + use crate::tensor::check_theory_axioms; + + let start = std::time::Instant::now(); + let mut steps = 0; + let mut branches = 0; + + // Get current structure and axioms + let axioms = tree.theory.theory.axioms.clone(); + let sig = tree.theory.theory.signature.clone(); + + // Check axioms and get violations + let violations = { + let node_ref = match tree.get(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + check_theory_axioms(&axioms, &node_ref.structure, &sig) + }; + + if violations.is_empty() { + // No violations - check if complete + return CheckTactic.run(tree, node, budget); + } + + // Process each violation + for (axiom_idx, viols) in violations { + for viol in viols { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + let axiom = &axioms[axiom_idx]; + let conclusion = &axiom.conclusion; + + // Build variable assignment map from violation + let assignment: std::collections::HashMap = viol + .variable_names + .iter() + .zip(viol.assignment.iter()) + .map(|(name, &idx)| (name.clone(), idx)) + .collect(); + + // Process the conclusion based on its type + match conclusion { + Formula::False => { + // Found a derivation of False! + // This is true unsatisfiability. + tree.mark_unsat( + node, + Some(ConflictClause { + required_elements: vec![], + required_functions: vec![], + required_relations: vec![], + violated_axiom: Some(axiom_idx), + explanation: Some(format!( + "Axiom {} derives False for assignment {:?}", + axiom_idx, assignment + )), + }), + ); + return TacticResult::Unsat(None); + } + + Formula::Disj(disjuncts) if !disjuncts.is_empty() => { + // Create a branch for each disjunct and process in that branch + for (i, disjunct) in disjuncts.iter().enumerate() { + let child = tree.branch( + node, + Some(format!("axiom{}:disj{}", axiom_idx, i)), + ); + branches += 1; + // Process the disjunct in the child branch + let mut processor = FormulaProcessor::new( + tree, + child, + assignment.clone(), + axiom_idx, + ); + if let Err(e) = processor.process(disjunct) { + return TacticResult::Error(e); + } + steps += processor.steps; + } + } + + Formula::Disj(_) => { + // Empty disjunction - should be handled as False + // For now, skip (shouldn't happen in well-formed theories) + } + + // For Rel, Eq, Exists, Conj, True - use recursive processor + other_formula => { + let mut processor = FormulaProcessor::new( + tree, + node, + assignment.clone(), + axiom_idx, + ); + if let Err(e) = processor.process(other_formula) { + return TacticResult::Error(e); + } + steps += processor.steps; + } + } + } + } + + if steps > 0 || branches > 0 { + TacticResult::Progress { + steps_taken: steps, + branches_created: branches, + } + } else { + // No progress made - return obligations for agent + CheckTactic.run(tree, node, budget) + } + } + + fn name(&self) -> &str { + "forward_chaining" + } +} + +/// Equation propagation tactic: process pending equations in the congruence closure. +/// +/// This tactic: +/// 1. Pops pending equations from the CC queue +/// 2. Merges the equivalence classes +/// 3. Checks for function conflicts (f(a) = x and f(b) = y where a = b implies x = y) +/// 4. Adds any new equations discovered via congruence +/// +/// This is a simplified version that doesn't do full congruence closure, +/// but handles the basic case of merging and detecting function conflicts. +pub struct PropagateEquationsTactic; + +impl Tactic for PropagateEquationsTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut steps = 0; + let mut _new_equations = 0; + + // Process pending equations + loop { + if start.elapsed().as_millis() as u64 > budget.time_ms || steps >= budget.steps { + return TacticResult::Timeout { steps_taken: steps }; + } + + // Pop next equation + let eq = { + let node = match tree.get_mut(node) { + Some(n) => n, + None => return TacticResult::Error("Invalid node ID".to_string()), + }; + node.cc.pop_pending() + }; + + let eq = match eq { + Some(e) => e, + None => break, // No more pending equations + }; + + // Merge the equivalence classes + let merged = { + let node = tree.get_mut(node).unwrap(); + node.cc.merge(eq.lhs, eq.rhs) + }; + + if merged { + steps += 1; + + // Check for function conflicts + // For each function f, if f(a) and f(b) are both defined and a = b, + // then we need f(a) = f(b) (congruence) + let sig = tree.signature().clone(); + let conflicts: Vec<(Slid, Slid, usize)> = { + let node = tree.get(node).unwrap(); + let mut conflicts = Vec::new(); + + for func_id in 0..sig.functions.len() { + if func_id >= node.structure.functions.len() { + continue; + } + + // Get values for eq.lhs and eq.rhs + let lhs_sort_slid = node.structure.sort_local_id(eq.lhs); + let rhs_sort_slid = node.structure.sort_local_id(eq.rhs); + + let lhs_val = node.structure.get_function(func_id, lhs_sort_slid); + let rhs_val = node.structure.get_function(func_id, rhs_sort_slid); + + if let (Some(lv), Some(rv)) = (lhs_val, rhs_val) + && lv != rv + { + // Function conflict: f(a) = lv and f(b) = rv, but a = b + // Add equation lv = rv with func_id for debugging + conflicts.push((lv, rv, func_id)); + } + } + conflicts + }; + + // Add conflict-induced equations + for (lv, rv, func_id) in conflicts { + tree.add_pending_equation( + node, + lv, + rv, + super::types::EquationReason::Congruence { func_id }, + ); + _new_equations += 1; + } + } + } + + if steps > 0 { + TacticResult::Progress { + steps_taken: steps, + branches_created: 0, + } + } else { + // No pending equations - fall through to check + CheckTactic.run(tree, node, budget) + } + } + + fn name(&self) -> &str { + "propagate_equations" + } +} + +/// Automatic solving tactic: runs forward chaining and equation propagation to fixpoint. +/// +/// This composite tactic: +/// 1. Runs ForwardChainingTactic until no progress +/// 2. Runs PropagateEquationsTactic until no progress +/// 3. Repeats until fixpoint (no more progress from either) +/// +/// This is the main "auto-solve" tactic for geometric logic. +pub struct AutoTactic; + +impl Tactic for AutoTactic { + fn run(&mut self, tree: &mut SearchTree, node: NodeId, budget: &Budget) -> TacticResult { + let start = std::time::Instant::now(); + let mut total_steps = 0; + let mut total_branches = 0; + let mut iterations = 0; + + loop { + if start.elapsed().as_millis() as u64 > budget.time_ms { + return TacticResult::Timeout { steps_taken: total_steps }; + } + + iterations += 1; + let mut made_progress = false; + + // Run forward chaining + let remaining_budget = Budget { + time_ms: budget.time_ms.saturating_sub(start.elapsed().as_millis() as u64), + steps: budget.steps.saturating_sub(total_steps), + }; + + match ForwardChainingTactic.run(tree, node, &remaining_budget) { + TacticResult::Progress { steps_taken, branches_created } => { + total_steps += steps_taken; + total_branches += branches_created; + if steps_taken > 0 || branches_created > 0 { + made_progress = true; + } + } + TacticResult::Solved => return TacticResult::Solved, + TacticResult::Unsat(clause) => return TacticResult::Unsat(clause), + TacticResult::Timeout { steps_taken } => { + total_steps += steps_taken; + return TacticResult::Timeout { steps_taken: total_steps }; + } + TacticResult::Error(e) => return TacticResult::Error(e), + TacticResult::HasObligations(_) => { + // Has obligations but made no progress - continue to propagation + } + } + + // Run equation propagation + let remaining_budget = Budget { + time_ms: budget.time_ms.saturating_sub(start.elapsed().as_millis() as u64), + steps: budget.steps.saturating_sub(total_steps), + }; + + match PropagateEquationsTactic.run(tree, node, &remaining_budget) { + TacticResult::Progress { steps_taken, .. } => { + total_steps += steps_taken; + if steps_taken > 0 { + made_progress = true; + } + } + TacticResult::Solved => return TacticResult::Solved, + TacticResult::Unsat(clause) => return TacticResult::Unsat(clause), + TacticResult::Timeout { steps_taken } => { + total_steps += steps_taken; + return TacticResult::Timeout { steps_taken: total_steps }; + } + TacticResult::Error(e) => return TacticResult::Error(e), + TacticResult::HasObligations(_) => { + // Has obligations but made no progress - continue to next iteration + } + } + + // Check for fixpoint + if !made_progress { + break; + } + + // Safety limit on iterations + if iterations > 1000 { + return TacticResult::Error("AutoTactic exceeded iteration limit".to_string()); + } + } + + TacticResult::Progress { + steps_taken: total_steps, + branches_created: total_branches, + } + } + + fn name(&self) -> &str { + "auto" + } +} + +/// Recursive formula processor for forward chaining. +/// +/// Processes a positive geometric formula by: +/// - Asserting relation tuples +/// - Adding pending equations +/// - Adding witness elements for existentials (and recursively processing bodies) +/// - Processing conjuncts +/// - NOT handling disjunctions (those need branching at a higher level) +struct FormulaProcessor<'a> { + tree: &'a mut SearchTree, + node: NodeId, + assignment: std::collections::HashMap, + axiom_idx: usize, + steps: usize, +} + +impl<'a> FormulaProcessor<'a> { + fn new( + tree: &'a mut SearchTree, + node: NodeId, + assignment: std::collections::HashMap, + axiom_idx: usize, + ) -> Self { + Self { + tree, + node, + assignment, + axiom_idx, + steps: 0, + } + } + + /// Process a formula, accumulating steps. Returns Err on failure. + fn process(&mut self, formula: &crate::core::Formula) -> Result<(), String> { + use crate::core::Formula; + + match formula { + Formula::True => { + // Nothing to do + Ok(()) + } + + Formula::Rel(rel_id, term) => { + // Assert relation tuple + let tuple = self.tree.get(self.node).and_then(|n| { + eval_term_to_tuple(term, &self.assignment, &n.structure) + }); + if let Some(tuple) = tuple { + self.tree.assert_relation(self.node, *rel_id, tuple)?; + self.steps += 1; + } + Ok(()) + } + + Formula::Eq(t1, t2) => { + // Add equation to congruence closure if not already equal + let eq_slids = self.tree.get(self.node).and_then(|n| { + let lhs = eval_term_to_slid(t1, &self.assignment, &n.structure)?; + let rhs = eval_term_to_slid(t2, &self.assignment, &n.structure)?; + Some((lhs, rhs)) + }); + if let Some((lhs, rhs)) = eq_slids { + let search_node = self.tree.get_mut(self.node).ok_or("Invalid node")?; + if !search_node.cc.are_equal(lhs, rhs) { + search_node.cc.add_equation( + lhs, + rhs, + super::types::EquationReason::AxiomConsequent { + axiom_idx: self.axiom_idx, + }, + ); + self.steps += 1; + } + } + Ok(()) + } + + Formula::Conj(conjuncts) => { + // Process each conjunct recursively + for conjunct in conjuncts { + self.process(conjunct)?; + } + Ok(()) + } + + Formula::Exists(var_name, sort, body) => { + // Add fresh witness and recursively process body + if let crate::core::DerivedSort::Base(sort_id) = sort { + match self.tree.add_element(self.node, *sort_id) { + Ok((slid, _luid)) => { + // Add witness to assignment + self.assignment.insert(var_name.clone(), slid.index()); + self.steps += 1; + // Recursively process body with updated assignment + self.process(body) + } + Err(e) => Err(format!("Failed to add witness: {}", e)), + } + } else { + // Product sort witness - not yet implemented + Ok(()) + } + } + + Formula::False | Formula::Disj(_) => { + // These should be handled at a higher level + // False triggers unsat, Disj triggers branching + Ok(()) + } + } + } +} + +/// Helper: evaluate a term to a single Slid given variable assignment and structure. +/// Returns None if the term contains constructs we can't handle or if evaluation fails. +fn eval_term_to_slid( + term: &crate::core::Term, + assignment: &std::collections::HashMap, + structure: &crate::core::Structure, +) -> Option { + use crate::core::Term; + + match term { + Term::Var(name, _sort) => { + // Simple variable - look up in assignment + assignment.get(name).map(|&idx| Slid::from_usize(idx)) + } + Term::App(func_id, arg) => { + // Function application: evaluate arg, then look up function value + let arg_slid = eval_term_to_slid(arg, assignment, structure)?; + let sort_slid = structure.sort_local_id(arg_slid); + structure.get_function(*func_id, sort_slid) + } + Term::Record(_fields) => { + // Records evaluate to product elements - not a single Slid + // Would need product element lookup + None + } + Term::Project(base, field_name) => { + // Projection: evaluate base (must be a record element), then project + // This would require looking up the product element's components + let _ = (base, field_name); + None // Not yet implemented - needs product element storage + } + } +} + +/// Helper: evaluate a term to a tuple of Slids given variable assignment. +/// Used for relation assertions where the domain may be a product. +fn eval_term_to_tuple( + term: &crate::core::Term, + assignment: &std::collections::HashMap, + structure: &crate::core::Structure, +) -> Option> { + use crate::core::Term; + + match term { + Term::Var(name, _sort) => { + // Simple variable - look up in assignment + assignment.get(name).map(|&idx| vec![Slid::from_usize(idx)]) + } + Term::Record(fields) => { + // Record term - collect all field values + let mut tuple = Vec::new(); + for (_, field_term) in fields { + match eval_term_to_tuple(field_term, assignment, structure) { + Some(mut field_tuple) => tuple.append(&mut field_tuple), + None => return None, + } + } + Some(tuple) + } + Term::App(_func_id, _arg) => { + // Function application - evaluate to single value, wrap in vec + eval_term_to_slid(term, assignment, structure).map(|s| vec![s]) + } + Term::Project(_, _) => { + // Projection - would need product element storage + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{DerivedSort, ElaboratedTheory, Signature, Theory}; + use crate::id::Slid; + use std::rc::Rc; + + fn make_simple_theory() -> Rc { + // A simple theory with one sort and one function + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + sig.add_function( + "f".to_string(), + DerivedSort::Base(node_id), + DerivedSort::Base(node_id), + ); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }) + } + + #[test] + fn test_search_tree_creation() { + let theory = make_simple_theory(); + let tree = SearchTree::new(theory); + + assert_eq!(tree.nodes.len(), 1); + assert_eq!(tree.root(), 0); + assert_eq!(tree.frontier(), vec![0]); + } + + #[test] + fn test_branching() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + let child1 = tree.branch(0, Some("child1".to_string())); + let child2 = tree.branch(0, Some("child2".to_string())); + + assert_eq!(tree.nodes.len(), 3); + assert_eq!(tree.nodes[0].children, vec![child1, child2]); + assert_eq!(tree.nodes[child1].parent, Some(0)); + assert_eq!(tree.nodes[child2].parent, Some(0)); + + // Frontier should now be the children + let frontier = tree.frontier(); + assert!(frontier.contains(&child1)); + assert!(frontier.contains(&child2)); + assert!(!frontier.contains(&0)); // Parent no longer on frontier (has children) + } + + #[test] + fn test_add_elements() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // Add elements to root + let (slid1, _luid1) = tree.add_element(0, 0).unwrap(); + let (slid2, _luid2) = tree.add_element(0, 0).unwrap(); + + assert_eq!(slid1, Slid::from_usize(0)); + assert_eq!(slid2, Slid::from_usize(1)); + assert_eq!(tree.nodes[0].structure.carrier_size(0), 2); + } + + #[test] + fn test_check_tactic() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // Empty structure should be "incomplete" but no obligations (no axioms) + let result = CheckTactic.run(&mut tree, 0, &Budget::quick()); + + // No axioms means no obligations, but also not complete (no elements, no function values) + match result { + TacticResult::Progress { .. } => {} // Expected + other => panic!("Unexpected result: {:?}", other), + } + } + + #[test] + fn test_summary() { + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + tree.branch(0, Some("a".to_string())); + tree.branch(0, Some("b".to_string())); + + let summary = tree.summary(5); + assert_eq!(summary.total_nodes, 3); + assert_eq!(summary.frontier_size, 2); + assert_eq!(summary.solved_count, 0); + } + + #[test] + fn test_union_find_with_slid() { + use egglog_union_find::UnionFind; + + // Helper for cleaner syntax + fn s(n: usize) -> Slid { + Slid::from_usize(n) + } + + // Verify egglog's union-find works with our Slid type (which is usize) + let mut uf: UnionFind = UnionFind::default(); + + // Union some elements + let (parent, child) = uf.union(s(0), s(1)); + assert_eq!(parent, s(0)); // union-by-min: smaller id becomes parent + assert_eq!(child, s(1)); + + // Find should return canonical representative + assert_eq!(uf.find(s(0)), s(0)); + assert_eq!(uf.find(s(1)), s(0)); + + // Add more elements and union + let (parent2, child2) = uf.union(s(2), s(3)); + assert_eq!(parent2, s(2)); + assert_eq!(child2, s(3)); + + // Union the two equivalence classes + let (parent3, child3) = uf.union(s(1), s(3)); + // Now 0, 1, 2, 3 should all be in same class with 0 as root + assert_eq!(parent3, s(0)); // find(1) = 0, find(3) = 2, min(0,2) = 0 + assert_eq!(child3, s(2)); + + assert_eq!(uf.find(s(0)), s(0)); + assert_eq!(uf.find(s(1)), s(0)); + assert_eq!(uf.find(s(2)), s(0)); + assert_eq!(uf.find(s(3)), s(0)); + } + + #[test] + fn test_forward_chaining_tactic() { + // Create a theory with no axioms - forward chaining should just fall through + let theory = make_simple_theory(); + let mut tree = SearchTree::new(theory); + + // On an empty structure with no axioms, forward chaining should report progress + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + // No axioms means no violations, should fall through to CheckTactic + match result { + TacticResult::Progress { .. } => {} // Expected - incomplete but no obligations + other => panic!("Expected Progress, got {:?}", other), + } + } + + #[test] + fn test_forward_chaining_detects_false() { + use crate::core::{Context, Formula, Sequent}; + + // Create a theory with an axiom: True |- False + // This means any model is immediately unsat + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Forward chaining should detect the derivation of False + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Unsat(_) => {} // Expected - True |- False is violated + other => panic!("Expected Unsat, got {:?}", other), + } + } + + #[test] + fn test_forward_chaining_adds_equations() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory with an axiom: ∀x:Node, y:Node. True |- x = y + // (Every two nodes are equal) + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add two elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + assert_ne!(a, b); + + // Forward chaining should detect the equation obligation and add pending equations + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have made progress"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Check that pending equations were added to congruence closure + let node = tree.get(0).unwrap(); + assert!(!node.cc.pending.is_empty(), "Should have pending equations"); + } + + #[test] + fn test_propagate_equations_merges() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory with an axiom: ∀x:Node, y:Node. True |- x = y + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add two elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + + // First run forward chaining to add equations + ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + // Verify equations are pending + assert!(!tree.get(0).unwrap().cc.pending.is_empty()); + + // Run equation propagation + let result = PropagateEquationsTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have processed equations"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Check that a and b are now in the same equivalence class + let node = tree.get_mut(0).unwrap(); + assert!(node.cc.are_equal(a, b), "a and b should be equal after propagation"); + } + + #[test] + fn test_auto_tactic() { + use crate::core::{Context, Formula, Sequent, Term}; + + // Create a theory where all elements are equal + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + + let ctx = Context::new() + .extend("x".to_string(), DerivedSort::Base(0)) + .extend("y".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: ctx, + premise: Formula::True, + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "AllEqual".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/all_equal".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + + // Add three elements + let (a, _) = tree.add_element(0, 0).unwrap(); + let (b, _) = tree.add_element(0, 0).unwrap(); + let (c, _) = tree.add_element(0, 0).unwrap(); + + // Run AutoTactic - should do forward chaining + propagation to fixpoint + let result = AutoTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken > 0, "Should have made progress"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // All three should be in the same equivalence class + let node = tree.get_mut(0).unwrap(); + assert!(node.cc.are_equal(a, b), "a and b should be equal"); + assert!(node.cc.are_equal(b, c), "b and c should be equal"); + assert!(node.cc.are_equal(a, c), "a and c should be equal (transitively)"); + } + + #[test] + fn test_existential_body_processing() { + use crate::core::{Context, Formula, RelationStorage, Sequent, Term}; + + // Create a theory with: + // - Sort: Node + // - Relation: R : Node -> Prop + // - Axiom: True |- ∃x:Node. R(x) + // This should add a witness and assert R(witness) + + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // R + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + tree.init_relations(0, &[1]).unwrap(); // R has arity 1 + + // Initially no elements + assert_eq!(tree.get(0).unwrap().structure.carrier_size(0), 0); + + // Run forward chaining + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken >= 2, "Should have added witness AND asserted R"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Should now have one element (the witness) + let node = tree.get(0).unwrap(); + assert_eq!(node.structure.carrier_size(0), 1, "Should have one witness"); + + // R(witness) should be asserted + let witness = Slid::from_usize(0); + assert!( + node.structure.relations[0].contains(&[witness]), + "R(witness) should be asserted" + ); + } + + #[test] + fn test_nested_existential_body() { + use crate::core::{Context, Formula, RelationStorage, Sequent, Term}; + + // Create a theory with: + // - Sort: Node + // - Relation: E : Node × Node -> Prop + // - Axiom: True |- ∃x:Node. ∃y:Node. E(x, y) + // This should add two witnesses and assert E(w1, w2) + + let mut sig = Signature::new(); + sig.add_sort("Node".to_string()); + // E : Node × Node -> Prop (binary relation as product domain) + sig.add_relation( + "E".to_string(), + DerivedSort::Product(vec![ + ("0".to_string(), DerivedSort::Base(0)), + ("1".to_string(), DerivedSort::Base(0)), + ]), + ); + + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Exists( + "y".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // E + Term::Record(vec![ + ("0".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("1".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + )), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsEdge".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_edge".to_string()], + }, + }); + + let mut tree = SearchTree::new(theory); + tree.init_relations(0, &[2]).unwrap(); // E has arity 2 + + // Run forward chaining + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { steps_taken, .. } => { + assert!(steps_taken >= 3, "Should have added 2 witnesses AND asserted E"); + } + other => panic!("Expected Progress, got {:?}", other), + } + + // Should have two elements + let node = tree.get(0).unwrap(); + assert_eq!(node.structure.carrier_size(0), 2, "Should have two witnesses"); + + // E(w1, w2) should be asserted + let w1 = Slid::from_usize(0); + let w2 = Slid::from_usize(1); + assert!( + node.structure.relations[0].contains(&[w1, w2]), + "E(w1, w2) should be asserted" + ); + } + + #[test] + fn test_from_base_preserves_structure() { + // Test that from_base preserves the base structure's elements and facts + use crate::core::Structure; + use crate::universe::Universe; + + let theory = make_simple_theory(); + + // Create a base structure with some elements + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (elem_a, _) = base.add_element(&mut universe, 0); + let (elem_b, _) = base.add_element(&mut universe, 0); + + // Initialize function storage and define f(a) = b + base.init_functions(&[Some(0)]); + base.define_function(0, elem_a, elem_b).unwrap(); + + // Create search tree from base + let tree = SearchTree::from_base(theory, base, universe); + + // The root should preserve the base structure + let root = tree.get(0).unwrap(); + assert_eq!(root.structure.carrier_size(0), 2, "Should have 2 elements from base"); + let sort_slid_a = root.structure.sort_local_id(elem_a); + assert_eq!( + root.structure.get_function(0, sort_slid_a), + Some(elem_b), + "f(a) = b should be preserved" + ); + } + + #[test] + fn test_from_base_solver_can_extend() { + // Test that the solver can extend a base structure to satisfy axioms + use crate::core::{Context, Formula, RelationStorage, Sequent, Structure, Term}; + use crate::universe::Universe; + + // Theory: Node sort with relation R : Node -> Prop + // Axiom: ∀x:Node. ∃y:Node. R(y) + // (every existing element implies existence of some R-element) + let mut sig = Signature::new(); + let node = sig.add_sort("Node".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(node)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(node))], + }, + premise: Formula::True, + conclusion: Formula::Exists( + "y".to_string(), + DerivedSort::Base(node), + Box::new(Formula::Rel( + 0, // R + Term::Var("y".to_string(), DerivedSort::Base(node)), + )), + ), + }; + + let theory = Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "ExistsR".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/exists_r".to_string()], + }, + }); + + // Create base structure with one element, R not yet holding + let mut universe = Universe::new(); + let mut base = Structure::new(1); + let (_elem_a, _) = base.add_element(&mut universe, 0); + base.init_relations(&[1]); // R has arity 1 + + // Create search tree from base + let mut tree = SearchTree::from_base(theory, base, universe); + + // Verify starting state: one element, R is empty + assert_eq!(tree.get(0).unwrap().structure.carrier_size(0), 1); + assert!(tree.get(0).unwrap().structure.relations[0].is_empty()); + + // Run forward chaining - should create witness for R(y) + let result = ForwardChainingTactic.run(&mut tree, 0, &Budget::quick()); + + match result { + TacticResult::Progress { .. } => { + let node = tree.get(0).unwrap(); + // Should have at least one R-element now + assert!( + !node.structure.relations[0].is_empty(), + "R should have at least one tuple after forward chaining" + ); + } + other => panic!("Expected Progress, got {:?}", other), + } + } +} diff --git a/src/solver/tree.rs b/src/solver/tree.rs new file mode 100644 index 0000000..d2fb15b --- /dev/null +++ b/src/solver/tree.rs @@ -0,0 +1,465 @@ +//! Search tree for instance synthesis. + +use std::rc::Rc; + +use crate::core::{ElaboratedTheory, RelationStorage, Signature, Structure}; +use crate::id::{Luid, Slid, Uuid}; +use crate::tensor::{CheckResult, Violation}; +use crate::universe::Universe; + +use super::types::{ + ConflictClause, CongruenceClosure, NodeDetail, NodeId, NodeStatus, SearchNode, SearchSummary, +}; + +/// The search tree +#[derive(Debug)] +pub struct SearchTree { + /// All nodes, indexed by NodeId + pub(crate) nodes: Vec, + /// The theory we're trying to instantiate + pub theory: Rc, + /// Universe for Luid allocation + pub universe: Universe, +} + +impl SearchTree { + /// Create a new search tree for instantiating a theory + /// + /// The root node contains an empty Structure with the right number of + /// sorts but no elements. + /// + /// This is equivalent to `SearchTree::from_base(theory, empty_structure)`. + /// Use this for `:solve` (finding models from scratch). + pub fn new(theory: Rc) -> Self { + let num_sorts = theory.theory.signature.sorts.len(); + let root_structure = Structure::new(num_sorts); + Self::from_base_inner(theory, root_structure, Universe::new()) + } + + /// Create a search tree starting from an existing base structure. + /// + /// This enables the unified model-finding API: + /// - `:solve T` = `SearchTree::new(T)` = find models of T from scratch + /// - `:query M T'` = `SearchTree::from_base(T', M)` = find extensions of M to T' + /// + /// The base structure's elements, function values, and relation tuples are + /// preserved as "frozen" facts. The solver will only add new facts, not remove + /// existing ones (the refinement order). + /// + /// # Arguments + /// - `theory`: The theory to satisfy (may extend the base structure's theory) + /// - `base`: The starting structure (may already have elements, functions, relations) + /// - `universe`: The universe for Luid allocation (should contain Luids from base) + /// + /// # Panics + /// Panics if the base structure has more sorts than the theory signature. + pub fn from_base(theory: Rc, base: Structure, universe: Universe) -> Self { + let num_sorts = theory.theory.signature.sorts.len(); + assert!( + base.carriers.len() <= num_sorts, + "Base structure has {} sorts but theory only has {}", + base.carriers.len(), + num_sorts + ); + Self::from_base_inner(theory, base, universe) + } + + /// Internal constructor shared by `new` and `from_base`. + fn from_base_inner(theory: Rc, root_structure: Structure, universe: Universe) -> Self { + let root = SearchNode { + id: 0, + parent: None, + children: Vec::new(), + structure: root_structure, + cc: CongruenceClosure::new(), + status: NodeStatus::Open, + p_success: 0.5, // Prior: 50% chance of solution existing + conflicts: Vec::new(), + label: Some("root".to_string()), + }; + + Self { + nodes: vec![root], + theory, + universe, + } + } + + /// Get the root node ID + pub fn root(&self) -> NodeId { + 0 + } + + /// Get a node by ID + pub fn get(&self, id: NodeId) -> Option<&SearchNode> { + self.nodes.get(id) + } + + /// Get a mutable reference to a node + pub fn get_mut(&mut self, id: NodeId) -> Option<&mut SearchNode> { + self.nodes.get_mut(id) + } + + /// Get the signature of the theory + pub fn signature(&self) -> &Signature { + &self.theory.theory.signature + } + + /// Get all open frontier nodes + pub fn frontier(&self) -> Vec { + self.nodes + .iter() + .filter(|n| n.status == NodeStatus::Open && n.children.is_empty()) + .map(|n| n.id) + .collect() + } + + /// Get frontier nodes sorted by p_success (descending) + pub fn frontier_by_probability(&self) -> Vec { + let mut frontier = self.frontier(); + frontier.sort_by(|&a, &b| { + let pa = self.nodes[a].p_success; + let pb = self.nodes[b].p_success; + pb.partial_cmp(&pa).unwrap_or(std::cmp::Ordering::Equal) + }); + frontier + } + + /// Create a child node by cloning the parent's structure + /// + /// Returns the new node's ID. The child starts with the same structure + /// as the parent (will be refined by subsequent operations). + pub fn branch(&mut self, parent: NodeId, label: Option) -> NodeId { + let parent_node = &self.nodes[parent]; + let child_structure = parent_node.structure.clone(); + let child_cc = parent_node.cc.clone(); + let child_p = parent_node.p_success; + + let child_id = self.nodes.len(); + let child = SearchNode { + id: child_id, + parent: Some(parent), + children: Vec::new(), + structure: child_structure, + cc: child_cc, + status: NodeStatus::Open, + p_success: child_p, + conflicts: Vec::new(), + label, + }; + + self.nodes.push(child); + self.nodes[parent].children.push(child_id); + child_id + } + + /// Mark a node as solved (found valid instance) + pub fn mark_solved(&mut self, id: NodeId) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Solved; + } + } + + /// Mark a node as unsatisfiable + pub fn mark_unsat(&mut self, id: NodeId, conflict: Option) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Unsat; + if let Some(c) = conflict { + node.conflicts.push(c); + } + } + } + + /// Mark a node as pruned (agent decided not to explore) + pub fn mark_pruned(&mut self, id: NodeId) { + if let Some(node) = self.nodes.get_mut(id) { + node.status = NodeStatus::Pruned; + } + } + + /// Update a node's success probability estimate + pub fn set_probability(&mut self, id: NodeId, p: f64) { + if let Some(node) = self.nodes.get_mut(id) { + node.p_success = p.clamp(0.0, 1.0); + } + } + + /// Check if any node has been solved + pub fn has_solution(&self) -> Option { + self.nodes + .iter() + .find(|n| n.status == NodeStatus::Solved) + .map(|n| n.id) + } + + /// Get the path from root to a node (list of NodeIds) + pub fn path_to(&self, id: NodeId) -> Vec { + let mut path = Vec::new(); + let mut current = Some(id); + while let Some(nid) = current { + path.push(nid); + current = self.nodes[nid].parent; + } + path.reverse(); + path + } +} + +// ============================================================================ +// REFINEMENT OPERATIONS +// ============================================================================ + +/// Operations for refining a partial model (moving up in the refinement order) +impl SearchTree { + /// Add a new element to a sort in a node's structure + /// + /// Returns the (Slid, Luid) of the new element. + pub fn add_element(&mut self, node: NodeId, sort_id: usize) -> Result<(Slid, Luid), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node.structure.add_element(&mut self.universe, sort_id)) + } + + /// Add a new element with a specific UUID + pub fn add_element_with_uuid( + &mut self, + node: NodeId, + uuid: Uuid, + sort_id: usize, + ) -> Result<(Slid, Luid), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node + .structure + .add_element_with_uuid(&mut self.universe, uuid, sort_id)) + } + + /// Define a function value: f(domain) = codomain + /// + /// The function must not already be defined at this domain element + /// (that would be a conflict, not a refinement). + pub fn define_function( + &mut self, + node: NodeId, + func_id: usize, + domain_slid: Slid, + codomain_slid: Slid, + ) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + node.structure + .define_function(func_id, domain_slid, codomain_slid) + } + + /// Assert a relation tuple: R(tuple) = true + pub fn assert_relation( + &mut self, + node: NodeId, + rel_id: usize, + tuple: Vec, + ) -> Result { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + if node.status != NodeStatus::Open { + return Err("Cannot refine a non-open node".to_string()); + } + Ok(node.structure.assert_relation(rel_id, tuple)) + } + + /// Initialize function storage for a node (call after adding elements) + pub fn init_functions( + &mut self, + node: NodeId, + domain_sort_ids: &[Option], + ) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + node.structure.init_functions(domain_sort_ids); + Ok(()) + } + + /// Initialize relation storage for a node + pub fn init_relations(&mut self, node: NodeId, arities: &[usize]) -> Result<(), String> { + let node = self.nodes.get_mut(node).ok_or("Invalid node ID")?; + node.structure.init_relations(arities); + Ok(()) + } + + /// Add a pending equation to a node's congruence closure + /// + /// Equations arise from axiom consequents, function conflicts, etc. + /// They are processed later during propagation. + pub fn add_pending_equation( + &mut self, + node: NodeId, + lhs: Slid, + rhs: Slid, + reason: super::types::EquationReason, + ) { + if let Some(node) = self.nodes.get_mut(node) { + node.cc.add_equation(lhs, rhs, reason); + } + } +} + +// ============================================================================ +// CONSTRAINT CHECKING +// ============================================================================ + +impl SearchTree { + /// Check all axioms against a node's current structure + /// + /// Returns Ok(()) if all axioms are satisfied, or Err with violations. + pub fn check_axioms(&self, node: NodeId) -> Result<(), Vec<(usize, Vec)>> { + let node = self.nodes.get(node).ok_or_else(Vec::new)?; + let violations = crate::tensor::check_theory_axioms( + &self.theory.theory.axioms, + &node.structure, + &self.theory.theory.signature, + ); + if violations.is_empty() { + Ok(()) + } else { + Err(violations) + } + } + + /// Check a single axiom + pub fn check_axiom(&self, node: NodeId, axiom_idx: usize) -> CheckResult { + let node = match self.nodes.get(node) { + Some(n) => n, + None => return CheckResult::Satisfied, // Invalid node = vacuously true? + }; + let axiom = match self.theory.theory.axioms.get(axiom_idx) { + Some(a) => a, + None => return CheckResult::Satisfied, + }; + // Return Satisfied on compile error (unsupported patterns handled elsewhere) + crate::tensor::check_sequent(axiom, &node.structure, &self.theory.theory.signature) + .unwrap_or(CheckResult::Satisfied) + } + + /// Check if a structure is "complete" (all functions total, all axioms satisfied) + /// + /// A complete structure is a valid model of the theory. + pub fn is_complete(&self, node: NodeId) -> Result { + let node = self.nodes.get(node).ok_or("Invalid node ID")?; + let sig = &self.theory.theory.signature; + + // Check all functions are total + for (func_id, func_sym) in sig.functions.iter().enumerate() { + if func_id >= node.structure.functions.len() { + return Ok(false); // Function storage not initialized + } + + // Get domain cardinality (works for base and product sorts) + let domain_size = func_sym.domain.cardinality(&node.structure); + + // Check all domain elements have values (local functions only for now) + let func_col = &node.structure.functions[func_id]; + if func_col.len() < domain_size { + return Ok(false); + } + if let Some(local_col) = func_col.as_local() { + for opt in local_col { + if opt.is_none() { + return Ok(false); + } + } + } + } + + // Check all axioms + match self.check_axioms(node.id) { + Ok(()) => Ok(true), + Err(_) => Ok(false), + } + } +} + +// ============================================================================ +// AGENT INTERFACE +// ============================================================================ + +impl SearchTree { + /// Get a summary of the search state + pub fn summary(&self, top_k: usize) -> SearchSummary { + let frontier = self.frontier_by_probability(); + let top_frontier: Vec<_> = frontier + .iter() + .take(top_k) + .map(|&id| { + let node = &self.nodes[id]; + (id, node.p_success, node.label.clone()) + }) + .collect(); + + SearchSummary { + total_nodes: self.nodes.len(), + frontier_size: frontier.len(), + solved_count: self + .nodes + .iter() + .filter(|n| n.status == NodeStatus::Solved) + .count(), + unsat_count: self + .nodes + .iter() + .filter(|n| n.status == NodeStatus::Unsat) + .count(), + top_frontier, + } + } + + /// Get detailed info about a node (for agent inspection) + pub fn node_detail(&self, id: NodeId) -> Option { + let node = self.nodes.get(id)?; + Some(NodeDetail { + id: node.id, + parent: node.parent, + children: node.children.clone(), + status: node.status.clone(), + p_success: node.p_success, + label: node.label.clone(), + carrier_sizes: node + .structure + .carriers + .iter() + .map(|c| c.len() as usize) + .collect(), + num_function_values: node + .structure + .functions + .iter() + .map(|f| match f { + crate::core::FunctionColumn::Local(col) => { + col.iter().filter(|opt| opt.is_some()).count() + } + crate::core::FunctionColumn::External(col) => { + col.iter().filter(|opt| opt.is_some()).count() + } + crate::core::FunctionColumn::ProductLocal { storage, .. } => { + storage.defined_count() + } + crate::core::FunctionColumn::ProductCodomain { field_columns, .. } => { + // Count elements where ALL fields are defined + if field_columns.is_empty() { + 0 + } else { + let len = field_columns[0].len(); + (0..len) + .filter(|&i| field_columns.iter().all(|col| col.get(i).is_some_and(|opt| opt.is_some()))) + .count() + } + } + }) + .collect(), + num_relation_tuples: node.structure.relations.iter().map(|r| r.len()).collect(), + conflicts: node.conflicts.clone(), + }) + } +} diff --git a/src/solver/types.rs b/src/solver/types.rs new file mode 100644 index 0000000..7c3f37e --- /dev/null +++ b/src/solver/types.rs @@ -0,0 +1,131 @@ +//! Core types for the solver infrastructure. + +use crate::core::Structure; +use crate::id::{Luid, Slid}; + +// Re-export congruence closure types from shared module +pub use crate::cc::{CongruenceClosure, EquationReason, PendingEquation}; + +/// Unique identifier for a search node +pub type NodeId = usize; + +/// A node in the search tree +#[derive(Clone, Debug)] +pub struct SearchNode { + /// Unique ID for this node + pub id: NodeId, + /// Parent node (None for root) + pub parent: Option, + /// Children (branches from this node) + pub children: Vec, + /// The partial model at this node + pub structure: Structure, + /// Congruence closure for tracking element equivalences + pub cc: CongruenceClosure, + /// Status of this node + pub status: NodeStatus, + /// Agent's estimate of success probability (0.0 to 1.0) + pub p_success: f64, + /// Conflict clauses learned at or below this node + pub conflicts: Vec, + /// Debug/display name for this node + pub label: Option, +} + +/// Status of a search node +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum NodeStatus { + /// Still exploring (frontier node) + Open, + /// Found a valid complete instance + Solved, + /// Proved unsatisfiable from this point + Unsat, + /// Agent decided not to explore further + Pruned, +} + +/// A learned conflict clause (derivation of False) +/// +/// Records a combination of commitments from which `⊢ False` was derived. +/// Used for CDCL-style pruning: if a node's commitments subsume a conflict +/// clause, that node can be immediately marked Unsat (since False is derivable). +/// +/// Note: This represents a PROOF of unsatisfiability, not mere "conflicts". +/// Even apparent conflicts (like function defined with two different values) +/// just create pending equations—only if propagating those equations leads +/// to deriving False do we have a true conflict clause. +#[derive(Clone, Debug)] +pub struct ConflictClause { + /// Elements that must exist (sort_id, luid) + pub required_elements: Vec<(usize, Luid)>, + /// Function values that must hold (func_id, domain_luid, codomain_luid) + pub required_functions: Vec<(usize, Luid, Luid)>, + /// Relation tuples that must be asserted (rel_id, tuple as Luids) + pub required_relations: Vec<(usize, Vec)>, + /// Which axiom was violated (index into theory's axiom list) + pub violated_axiom: Option, + /// Human-readable explanation + pub explanation: Option, +} + +/// An obligation to fulfill +/// +/// Geometric logic consequents are positive (existentials, disjunctions, relations). +/// When an axiom's premise is satisfied but conclusion isn't, we have an OBLIGATION +/// to make the conclusion true. This can always potentially be done by refinement +/// (adding elements, defining functions, asserting relations). +/// +/// Only when fulfilling the obligation would CONFLICT with existing commitments +/// is the node truly unsatisfiable. +#[derive(Clone, Debug)] +pub struct Obligation { + /// Which axiom generated this obligation + pub axiom_idx: usize, + /// The variable assignment where premise holds but conclusion doesn't + /// Maps variable name to (sort_id, slid) in the current structure + pub assignment: Vec<(String, usize, Slid)>, + /// Human-readable description of what needs to be witnessed + pub description: String, +} + +/// Result of checking axioms: either all satisfied, or obligations remain +#[derive(Clone, Debug)] +pub enum AxiomCheckResult { + /// All axioms satisfied for all substitutions + AllSatisfied, + /// Some axioms have unsatisfied consequents (obligations to fulfill) + Obligations(Vec), +} + +/// Summary of the current search state (for agent inspection) +#[derive(Debug)] +pub struct SearchSummary { + /// Total nodes in tree + pub total_nodes: usize, + /// Open frontier nodes + pub frontier_size: usize, + /// Solved nodes + pub solved_count: usize, + /// Unsat nodes + pub unsat_count: usize, + /// Top-k frontier nodes by probability + pub top_frontier: Vec<(NodeId, f64, Option)>, +} + +/// Detailed information about a search node +#[derive(Debug)] +pub struct NodeDetail { + pub id: NodeId, + pub parent: Option, + pub children: Vec, + pub status: NodeStatus, + pub p_success: f64, + pub label: Option, + pub carrier_sizes: Vec, + pub num_function_values: Vec, + pub num_relation_tuples: Vec, + pub conflicts: Vec, +} + +// Congruence closure types and tests are now in crate::cc diff --git a/src/store/append.rs b/src/store/append.rs new file mode 100644 index 0000000..4d705c6 --- /dev/null +++ b/src/store/append.rs @@ -0,0 +1,31 @@ +//! Low-level append operations for the Store. +//! +//! These are the primitive operations that all higher-level operations use. +//! Note: We use a trait to document the interface, but the actual implementations +//! are on Store directly to avoid borrow checker issues. + +use crate::id::Slid; + +/// Low-level operations on the meta structure. +/// +/// This trait documents the interface that Store implements for low-level +/// element manipulation. The actual implementations are on Store directly. +pub trait AppendOps { + /// Add an element to a sort in the meta structure with a simple name + fn add_element(&mut self, sort_id: usize, name: &str) -> Slid; + + /// Add an element with a qualified name path + fn add_element_qualified(&mut self, sort_id: usize, path: Vec) -> Slid; + + /// Define a function value in the meta structure + fn define_func(&mut self, func_id: usize, domain: Slid, codomain: Slid) -> Result<(), String>; + + /// Get a function value from the meta structure + fn get_func(&self, func_id: usize, domain: Slid) -> Option; + + /// Get all elements of a sort + fn elements_of_sort(&self, sort_id: usize) -> Vec; + + /// Get the name of an element + fn get_element_name(&self, slid: Slid) -> String; +} diff --git a/src/store/batch.rs b/src/store/batch.rs new file mode 100644 index 0000000..bb550a8 --- /dev/null +++ b/src/store/batch.rs @@ -0,0 +1,355 @@ +//! Atomic batch creation for elements. +//! +//! This module enforces the Monotonic Submodel Property by requiring all facts +//! involving an element to be defined atomically at element creation time. +//! +//! # Design Principles +//! +//! 1. **All facts defined at creation**: When element `a` is created, all facts +//! involving `a` (function values `f(a)=b`, relation tuples `R(a,c)`) must be +//! defined in the same atomic batch. +//! +//! 2. **No post-hoc fact addition**: After an element's batch is committed, no new +//! facts involving that element can be added. This ensures existing submodels +//! remain valid as new elements are added. +//! +//! 3. **Relations are boolean functions**: Relations `R: A × B → Bool` are treated +//! as total functions. When element `a` is created, all `R(a, _)` and `R(_, a)` +//! values are implicitly `false` unless explicitly asserted as `true`. + +use crate::id::{NumericId, Slid}; + +use super::Store; + +/// An atomic batch of changes for creating a single new element. +/// +/// All facts involving the new element must be defined in this batch. +/// After the batch is committed, no new facts can be added. +#[derive(Debug, Clone)] +pub struct ElementBatch { + /// The instance this element belongs to + pub instance: Slid, + + /// The sort (from the theory) of this element + pub sort: Slid, + + /// Human-readable name for the element + pub name: String, + + /// Function values where this element is in the domain: f(elem) = value + pub func_vals: Vec<(Slid, Slid)>, // (func, codomain_value) + + /// Relation assertions where this element appears: R(..., elem, ...) = true + /// Only the TRUE tuples are listed; everything else is implicitly false. + pub rel_tuples: Vec<(Slid, Slid)>, // (rel, arg) - for unary relations or when elem is the arg +} + +impl ElementBatch { + /// Create an empty/invalid batch (for use with mem::replace) + fn empty() -> Self { + Self { + instance: Slid::from_usize(0), + sort: Slid::from_usize(0), + name: String::new(), + func_vals: Vec::new(), + rel_tuples: Vec::new(), + } + } +} + +impl ElementBatch { + /// Create a new element batch + pub fn new(instance: Slid, sort: Slid, name: impl Into) -> Self { + Self { + instance, + sort, + name: name.into(), + func_vals: Vec::new(), + rel_tuples: Vec::new(), + } + } + + /// Add a function value: f(this_element) = value + pub fn with_func(mut self, func: Slid, value: Slid) -> Self { + self.func_vals.push((func, value)); + self + } + + /// Add a relation tuple: R(this_element) = true (for unary relations) + /// or R(arg) = true where this element is part of arg + pub fn with_rel(mut self, rel: Slid, arg: Slid) -> Self { + self.rel_tuples.push((rel, arg)); + self + } + + /// Define a function value: f(this_element) = value + pub fn define_func(&mut self, func: Slid, value: Slid) { + self.func_vals.push((func, value)); + } + + /// Assert a relation tuple as true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) { + self.rel_tuples.push((rel, arg)); + } +} + +/// Builder for creating elements with all their facts defined atomically. +/// +/// This enforces the Monotonic Submodel Property by ensuring all facts +/// are defined before the element is committed. +pub struct ElementBuilder<'a> { + store: &'a mut Store, + batch: ElementBatch, + committed: bool, +} + +impl<'a> ElementBuilder<'a> { + /// Create a new element builder + pub fn new(store: &'a mut Store, instance: Slid, sort: Slid, name: impl Into) -> Self { + Self { + store, + batch: ElementBatch::new(instance, sort, name), + committed: false, + } + } + + /// Define a function value: f(this_element) = value + pub fn define_func(&mut self, func: Slid, value: Slid) -> &mut Self { + self.batch.define_func(func, value); + self + } + + /// Assert a relation tuple as true: R(arg) = true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) -> &mut Self { + self.batch.assert_rel(rel, arg); + self + } + + /// Commit the element batch and return the new element's Slid. + /// + /// This atomically creates the element and all its facts. + /// After this, no new facts involving this element can be added. + pub fn commit(mut self) -> Result { + self.committed = true; + let batch = std::mem::replace(&mut self.batch, ElementBatch::empty()); + self.store.add_element_batch(batch) + } +} + +impl<'a> Drop for ElementBuilder<'a> { + fn drop(&mut self) { + if !self.committed { + // Log a warning if the builder was dropped without committing + // In debug builds, this could panic to catch bugs + #[cfg(debug_assertions)] + eprintln!( + "Warning: ElementBuilder for '{}' was dropped without committing", + self.batch.name + ); + } + } +} + +impl Store { + /// Create an element builder for atomic element creation. + /// + /// # Example + /// + /// ```ignore + /// let elem = store.build_element(instance, sort, "my_element") + /// .define_func(f, target) + /// .assert_rel(r, arg) + /// .commit()?; + /// ``` + pub fn build_element( + &mut self, + instance: Slid, + sort: Slid, + name: impl Into, + ) -> ElementBuilder<'_> { + ElementBuilder::new(self, instance, sort, name) + } + + /// Add an element with all its facts atomically. + /// + /// This is the low-level API; prefer `build_element()` for a builder pattern. + pub fn add_element_batch(&mut self, batch: ElementBatch) -> Result { + // 1. Create the element + let elem_slid = self.add_elem(batch.instance, batch.sort, &batch.name)?; + + // 2. Add all function values + for (func, value) in batch.func_vals { + self.add_func_val(batch.instance, func, elem_slid, value)?; + } + + // 3. Add all relation tuples (sparse: only the true ones) + for (rel, arg) in batch.rel_tuples { + self.add_rel_tuple(batch.instance, rel, arg)?; + } + + Ok(elem_slid) + } + + /// Create multiple elements atomically within a closure. + /// + /// This allows defining elements that reference each other within the same batch. + /// + /// # Example + /// + /// ```ignore + /// store.create_elements(instance, |ctx| { + /// let a = ctx.add_element(sort_a, "a")?; + /// let b = ctx.add_element(sort_b, "b")?; + /// + /// ctx.define_func(f, a, b)?; // f(a) = b + /// ctx.assert_rel(r, a)?; // R(a) = true + /// + /// Ok(vec![a, b]) + /// })?; + /// ``` + pub fn create_elements(&mut self, instance: Slid, f: F) -> Result + where + F: FnOnce(&mut ElementCreationContext<'_>) -> Result, + { + let mut ctx = ElementCreationContext::new(self, instance); + let result = f(&mut ctx)?; + ctx.commit()?; + Ok(result) + } +} + +/// Context for creating multiple elements atomically. +/// +/// All elements and facts created within this context are committed together. +pub struct ElementCreationContext<'a> { + store: &'a mut Store, + instance: Slid, + /// Elements created but not yet committed to GeologMeta + pending_elements: Vec<(Slid, Slid, String)>, // (sort, slid, name) + /// Function values to add + pending_func_vals: Vec<(Slid, Slid, Slid)>, // (func, arg, result) + /// Relation tuples to add + pending_rel_tuples: Vec<(Slid, Slid)>, // (rel, arg) + committed: bool, +} + +impl<'a> ElementCreationContext<'a> { + fn new(store: &'a mut Store, instance: Slid) -> Self { + Self { + store, + instance, + pending_elements: Vec::new(), + pending_func_vals: Vec::new(), + pending_rel_tuples: Vec::new(), + committed: false, + } + } + + /// Add a new element (returns Slid immediately for use in defining facts) + pub fn add_element(&mut self, sort: Slid, name: impl Into) -> Result { + let name = name.into(); + let elem_slid = self.store.add_elem(self.instance, sort, &name)?; + self.pending_elements.push((sort, elem_slid, name)); + Ok(elem_slid) + } + + /// Define a function value: f(arg) = result + pub fn define_func(&mut self, func: Slid, arg: Slid, result: Slid) -> Result<(), String> { + self.pending_func_vals.push((func, arg, result)); + Ok(()) + } + + /// Assert a relation tuple as true: R(arg) = true + pub fn assert_rel(&mut self, rel: Slid, arg: Slid) -> Result<(), String> { + self.pending_rel_tuples.push((rel, arg)); + Ok(()) + } + + /// Commit all pending elements and facts + fn commit(&mut self) -> Result<(), String> { + // Add all function values + for (func, arg, result) in std::mem::take(&mut self.pending_func_vals) { + self.store.add_func_val(self.instance, func, arg, result)?; + } + + // Add all relation tuples + for (rel, arg) in std::mem::take(&mut self.pending_rel_tuples) { + self.store.add_rel_tuple(self.instance, rel, arg)?; + } + + self.committed = true; + Ok(()) + } +} + +impl<'a> Drop for ElementCreationContext<'a> { + fn drop(&mut self) { + if !self.committed && !self.pending_elements.is_empty() { + #[cfg(debug_assertions)] + eprintln!( + "Warning: ElementCreationContext with {} pending elements was dropped without committing", + self.pending_elements.len() + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_element_batch_builder() { + let mut store = Store::new(); + + // Create a theory with a sort + let theory = store.create_theory("TestTheory").unwrap(); + let sort = store.add_sort(theory, "Node").unwrap(); + let sort_ds = store.make_base_dsort(sort).unwrap(); + + // Create a function + let _func = store.add_function(theory, "label", sort_ds, sort_ds).unwrap(); + + // Create an instance + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // Create an element using the batch API + let elem = store + .build_element(instance, sort, "node1") + .commit() + .unwrap(); + + // Verify element was created + let view = store.materialize(instance); + assert!(view.elements.contains(&elem)); + } + + #[test] + fn test_create_elements_context() { + let mut store = Store::new(); + + // Create a theory with a sort and relation + let theory = store.create_theory("TestTheory").unwrap(); + let sort = store.add_sort(theory, "Node").unwrap(); + let sort_ds = store.make_base_dsort(sort).unwrap(); + let rel = store.add_relation(theory, "connected", sort_ds).unwrap(); + + // Create an instance + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // Create multiple elements atomically + let (a, b) = store + .create_elements(instance, |ctx| { + let a = ctx.add_element(sort, "a")?; + let b = ctx.add_element(sort, "b")?; + ctx.assert_rel(rel, a)?; + Ok((a, b)) + }) + .unwrap(); + + // Verify elements were created + let view = store.materialize(instance); + assert!(view.elements.contains(&a)); + assert!(view.elements.contains(&b)); + } +} diff --git a/src/store/bootstrap_queries.rs b/src/store/bootstrap_queries.rs new file mode 100644 index 0000000..91bbfa1 --- /dev/null +++ b/src/store/bootstrap_queries.rs @@ -0,0 +1,1017 @@ +//! Bootstrap query methods for GeologMeta. +//! +//! These methods provide typed query APIs for GeologMeta. They now delegate +//! to the compiled query engine (see query/store_queries.rs) for the core +//! scan+filter operations, with additional lookups for complex fields. +//! +//! TODO(geolog-ubi): Further integrate with the full query engine. + +use std::collections::HashMap; + +use crate::core::{Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory}; +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::Store; + +/// Remap a DerivedSort from Slid indices to sort indices. +/// +/// During reconstruction, DerivedSort::Base contains Slid.index() values +/// (from resolve_dsort). This function maps them to proper sort indices +/// using the provided mapping. +fn remap_derived_sort( + ds: &DerivedSort, + srt_slid_to_idx: &HashMap, +) -> DerivedSort { + match ds { + DerivedSort::Base(slid_idx) => { + // The slid_idx is a Slid.index() from resolve_dsort + // Map it to a sort index + if let Some(&sort_idx) = srt_slid_to_idx.get(slid_idx) { + DerivedSort::Base(sort_idx) + } else { + // Fallback: assume it's already a sort index + DerivedSort::Base(*slid_idx) + } + } + DerivedSort::Product(fields) => { + let remapped: Vec<_> = fields + .iter() + .map(|(name, field_ds)| { + (name.clone(), remap_derived_sort(field_ds, srt_slid_to_idx)) + }) + .collect(); + DerivedSort::Product(remapped) + } + } +} + +/// Information about a sort in a theory +#[derive(Debug, Clone)] +pub struct SortInfo { + pub name: String, + pub slid: Slid, +} + +/// Information about a function in a theory +#[derive(Debug, Clone)] +pub struct FuncInfo { + pub name: String, + pub slid: Slid, + pub domain: DerivedSort, + pub codomain: DerivedSort, +} + +/// Information about a relation in a theory +#[derive(Debug, Clone)] +pub struct RelInfo { + pub name: String, + pub slid: Slid, + pub domain: DerivedSort, +} + +/// Information about a sequent (axiom) in a theory +#[derive(Debug, Clone)] +pub struct SequentInfo { + pub name: String, + pub slid: Slid, + pub premise_slid: Option, + pub conclusion_slid: Option, +} + +/// Information about a context variable in a sequent +#[derive(Debug, Clone)] +pub struct CtxVarInfo { + pub slid: Slid, + pub binder_slid: Option, +} + +impl Store { + /// Query all sorts belonging to a theory. + /// + /// Returns (name, slid) for each Srt where Srt/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_sorts(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_sorts_compiled(theory_slid) + } + + /// Query all functions belonging to a theory. + /// + /// Returns FuncInfo for each Func where Func/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_funcs(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_funcs_compiled(theory_slid) + } + + /// Query all relations belonging to a theory. + /// + /// Returns RelInfo for each Rel where Rel/theory == theory_slid. + /// Delegates to the compiled query engine. + pub fn query_theory_rels(&self, theory_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_theory_rels_compiled(theory_slid) + } + + /// Look up a sort by name within a theory. + pub fn lookup_sort_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_sorts(theory_slid) + .into_iter() + .find(|s| s.name == name) + .map(|s| s.slid) + } + + /// Look up a function by name within a theory. + pub fn lookup_func_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_funcs(theory_slid) + .into_iter() + .find(|f| f.name == name) + .map(|f| f.slid) + } + + /// Look up a relation by name within a theory. + pub fn lookup_rel_by_name(&self, theory_slid: Slid, name: &str) -> Option { + self.query_theory_rels(theory_slid) + .into_iter() + .find(|r| r.name == name) + .map(|r| r.slid) + } + + /// Query all sequents (axioms) belonging to a theory. + pub fn query_theory_sequents(&self, theory_slid: Slid) -> Vec { + let Some(sequent_sort) = self.sort_ids.sequent else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.sequent_theory else { + return vec![]; + }; + + let mut results = Vec::new(); + for sequent_slid in self.elements_of_sort(sequent_sort) { + if self.get_func(theory_func, sequent_slid) == Some(theory_slid) { + let name = self.get_element_name(sequent_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + let premise_slid = self + .func_ids + .sequent_premise + .and_then(|f| self.get_func(f, sequent_slid)); + let conclusion_slid = self + .func_ids + .sequent_conclusion + .and_then(|f| self.get_func(f, sequent_slid)); + + results.push(SequentInfo { + name: short_name, + slid: sequent_slid, + premise_slid, + conclusion_slid, + }); + } + } + results + } + + /// Query context variables for a sequent. + fn query_sequent_ctx_vars(&self, sequent_slid: Slid) -> Vec { + let Some(ctx_var_sort) = self.sort_ids.ctx_var else { + return vec![]; + }; + let Some(sequent_func) = self.func_ids.ctx_var_sequent else { + return vec![]; + }; + + let mut results = Vec::new(); + for ctx_var_slid in self.elements_of_sort(ctx_var_sort) { + if self.get_func(sequent_func, ctx_var_slid) == Some(sequent_slid) { + let binder_slid = self + .func_ids + .ctx_var_binder + .and_then(|f| self.get_func(f, ctx_var_slid)); + + results.push(CtxVarInfo { + slid: ctx_var_slid, + binder_slid, + }); + } + } + results + } + + /// Get the binder's type (DSort slid). + fn get_binder_type(&self, binder_slid: Slid) -> Option { + self.func_ids + .binder_type + .and_then(|f| self.get_func(f, binder_slid)) + } + + /// Reconstruct a Term from its Term slid. + fn reconstruct_term( + &self, + term_slid: Slid, + binder_to_var: &HashMap, + func_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Check VarT + if let Some(var_t_sort) = self.sort_ids.var_t { + for var_t_slid in self.elements_of_sort(var_t_sort) { + if let Some(term_func) = self.func_ids.var_t_term + && self.get_func(term_func, var_t_slid) == Some(term_slid) { + // Found a VarT for this term + if let Some(binder_func) = self.func_ids.var_t_binder + && let Some(binder_slid) = self.get_func(binder_func, var_t_slid) + && let Some((var_name, var_sort)) = binder_to_var.get(&binder_slid) { + return Some(Term::Var(var_name.clone(), var_sort.clone())); + } + } + } + } + + // Check AppT + if let Some(app_t_sort) = self.sort_ids.app_t { + for app_t_slid in self.elements_of_sort(app_t_sort) { + if let Some(term_func) = self.func_ids.app_t_term + && self.get_func(term_func, app_t_slid) == Some(term_slid) { + // Found an AppT for this term + let func_slid = self + .func_ids + .app_t_func + .and_then(|f| self.get_func(f, app_t_slid))?; + let func_idx = *func_to_idx.get(&func_slid)?; + + let arg_term_slid = self + .func_ids + .app_t_arg + .and_then(|f| self.get_func(f, app_t_slid))?; + let arg = self.reconstruct_term( + arg_term_slid, + binder_to_var, + func_to_idx, + srt_slid_to_idx, + )?; + + return Some(Term::App(func_idx, Box::new(arg))); + } + } + } + + // Check RecordT + if let Some(record_t_sort) = self.sort_ids.record_t { + for record_t_slid in self.elements_of_sort(record_t_sort) { + if let Some(term_func) = self.func_ids.record_t_term + && self.get_func(term_func, record_t_slid) == Some(term_slid) { + // Found a RecordT for this term - collect entries + let mut fields = Vec::new(); + if let Some(rec_entry_sort) = self.sort_ids.rec_entry { + for rec_entry_slid in self.elements_of_sort(rec_entry_sort) { + if let Some(record_func) = self.func_ids.rec_entry_record + && self.get_func(record_func, rec_entry_slid) + == Some(record_t_slid) + { + // Get field name (from Field) + let field_name = self + .func_ids + .rec_entry_field + .and_then(|f| self.get_func(f, rec_entry_slid)) + .map(|field_slid| { + let name = self.get_element_name(field_slid); + name.rsplit('/').next().unwrap_or(&name).to_string() + }) + .unwrap_or_default(); + + // Get value term + if let Some(val_slid) = self + .func_ids + .rec_entry_val + .and_then(|f| self.get_func(f, rec_entry_slid)) + && let Some(val_term) = self.reconstruct_term( + val_slid, + binder_to_var, + func_to_idx, + srt_slid_to_idx, + ) { + fields.push((field_name, val_term)); + } + } + } + } + return Some(Term::Record(fields)); + } + } + } + + // Check ProjT + if let Some(proj_t_sort) = self.sort_ids.proj_t { + for proj_t_slid in self.elements_of_sort(proj_t_sort) { + if let Some(term_func) = self.func_ids.proj_t_term + && self.get_func(term_func, proj_t_slid) == Some(term_slid) { + // Get base term + let base_slid = self + .func_ids + .proj_t_base + .and_then(|f| self.get_func(f, proj_t_slid))?; + let base = + self.reconstruct_term(base_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + // Get field name + let field_name = self + .func_ids + .proj_t_field + .and_then(|f| self.get_func(f, proj_t_slid)) + .map(|field_slid| { + let name = self.get_element_name(field_slid); + name.rsplit('/').next().unwrap_or(&name).to_string() + }) + .unwrap_or_default(); + + return Some(Term::Project(Box::new(base), field_name)); + } + } + } + + None + } + + /// Reconstruct a Formula from its Formula slid. + fn reconstruct_formula( + &self, + formula_slid: Slid, + binder_to_var: &mut HashMap, + func_to_idx: &HashMap, + rel_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Check TrueF + if let Some(true_f_sort) = self.sort_ids.true_f { + for true_f_slid in self.elements_of_sort(true_f_sort) { + if let Some(formula_func) = self.func_ids.true_f_formula + && self.get_func(formula_func, true_f_slid) == Some(formula_slid) { + return Some(Formula::True); + } + } + } + + // Check FalseF + if let Some(false_f_sort) = self.sort_ids.false_f { + for false_f_slid in self.elements_of_sort(false_f_sort) { + if let Some(formula_func) = self.func_ids.false_f_formula + && self.get_func(formula_func, false_f_slid) == Some(formula_slid) { + return Some(Formula::False); + } + } + } + + // Check EqF + if let Some(eq_f_sort) = self.sort_ids.eq_f { + for eq_f_slid in self.elements_of_sort(eq_f_sort) { + if let Some(formula_func) = self.func_ids.eq_f_formula + && self.get_func(formula_func, eq_f_slid) == Some(formula_slid) { + let lhs_slid = self + .func_ids + .eq_f_lhs + .and_then(|f| self.get_func(f, eq_f_slid))?; + let rhs_slid = self + .func_ids + .eq_f_rhs + .and_then(|f| self.get_func(f, eq_f_slid))?; + + let lhs = + self.reconstruct_term(lhs_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + let rhs = + self.reconstruct_term(rhs_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + return Some(Formula::Eq(lhs, rhs)); + } + } + } + + // Check RelF + if let Some(rel_f_sort) = self.sort_ids.rel_f { + for rel_f_slid in self.elements_of_sort(rel_f_sort) { + if let Some(formula_func) = self.func_ids.rel_f_formula + && self.get_func(formula_func, rel_f_slid) == Some(formula_slid) { + let rel_slid = self + .func_ids + .rel_f_rel + .and_then(|f| self.get_func(f, rel_f_slid))?; + let rel_idx = *rel_to_idx.get(&rel_slid)?; + + let arg_slid = self + .func_ids + .rel_f_arg + .and_then(|f| self.get_func(f, rel_f_slid))?; + let arg = + self.reconstruct_term(arg_slid, binder_to_var, func_to_idx, srt_slid_to_idx)?; + + return Some(Formula::Rel(rel_idx, arg)); + } + } + } + + // Check ConjF + if let Some(conj_f_sort) = self.sort_ids.conj_f { + for conj_f_slid in self.elements_of_sort(conj_f_sort) { + if let Some(formula_func) = self.func_ids.conj_f_formula + && self.get_func(formula_func, conj_f_slid) == Some(formula_slid) { + // Collect conjuncts from ConjArm + let mut conjuncts = Vec::new(); + if let Some(conj_arm_sort) = self.sort_ids.conj_arm { + for arm_slid in self.elements_of_sort(conj_arm_sort) { + if let Some(conj_func) = self.func_ids.conj_arm_conj + && self.get_func(conj_func, arm_slid) == Some(conj_f_slid) + && let Some(child_slid) = self + .func_ids + .conj_arm_child + .and_then(|f| self.get_func(f, arm_slid)) + && let Some(child) = self.reconstruct_formula( + child_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + ) { + conjuncts.push(child); + } + } + } + return Some(Formula::Conj(conjuncts)); + } + } + } + + // Check DisjF + if let Some(disj_f_sort) = self.sort_ids.disj_f { + for disj_f_slid in self.elements_of_sort(disj_f_sort) { + if let Some(formula_func) = self.func_ids.disj_f_formula + && self.get_func(formula_func, disj_f_slid) == Some(formula_slid) { + // Collect disjuncts from DisjArm + let mut disjuncts = Vec::new(); + if let Some(disj_arm_sort) = self.sort_ids.disj_arm { + for arm_slid in self.elements_of_sort(disj_arm_sort) { + if let Some(disj_func) = self.func_ids.disj_arm_disj + && self.get_func(disj_func, arm_slid) == Some(disj_f_slid) + && let Some(child_slid) = self + .func_ids + .disj_arm_child + .and_then(|f| self.get_func(f, arm_slid)) + && let Some(child) = self.reconstruct_formula( + child_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + ) { + disjuncts.push(child); + } + } + } + return Some(Formula::Disj(disjuncts)); + } + } + } + + // Check ExistsF + if let Some(exists_f_sort) = self.sort_ids.exists_f { + for exists_f_slid in self.elements_of_sort(exists_f_sort) { + if let Some(formula_func) = self.func_ids.exists_f_formula + && self.get_func(formula_func, exists_f_slid) == Some(formula_slid) { + // Get the binder + let binder_slid = self + .func_ids + .exists_f_binder + .and_then(|f| self.get_func(f, exists_f_slid))?; + + // Get binder type + let dsort_slid = self.get_binder_type(binder_slid)?; + let dsort_raw = self.resolve_dsort(dsort_slid); + let dsort = remap_derived_sort(&dsort_raw, srt_slid_to_idx); + + // Get var name from binder element name + let binder_name = self.get_element_name(binder_slid); + let var_name = binder_name + .strip_prefix("binder_") + .unwrap_or(&binder_name) + .to_string(); + + // Add to binder mapping for body reconstruction + binder_to_var.insert(binder_slid, (var_name.clone(), dsort.clone())); + + // Reconstruct body + let body_slid = self + .func_ids + .exists_f_body + .and_then(|f| self.get_func(f, exists_f_slid))?; + let body = self.reconstruct_formula( + body_slid, + binder_to_var, + func_to_idx, + rel_to_idx, + srt_slid_to_idx, + )?; + + return Some(Formula::Exists(var_name, dsort, Box::new(body))); + } + } + } + + None + } + + /// Reconstruct an axiom (Sequent) from its SequentInfo. + fn reconstruct_axiom( + &self, + info: &SequentInfo, + func_to_idx: &HashMap, + rel_to_idx: &HashMap, + srt_slid_to_idx: &HashMap, + ) -> Option { + // Build binder mapping from context variables + let mut binder_to_var: HashMap = HashMap::new(); + let mut context = Context::new(); + + let ctx_vars = self.query_sequent_ctx_vars(info.slid); + for cv in ctx_vars { + if let Some(binder_slid) = cv.binder_slid { + // Get binder type + if let Some(dsort_slid) = self.get_binder_type(binder_slid) { + let dsort_raw = self.resolve_dsort(dsort_slid); + let dsort = remap_derived_sort(&dsort_raw, srt_slid_to_idx); + + // Get var name from binder element name + let binder_name = self.get_element_name(binder_slid); + let var_name = binder_name + .strip_prefix("binder_") + .unwrap_or(&binder_name) + .to_string(); + + binder_to_var.insert(binder_slid, (var_name.clone(), dsort.clone())); + context = context.extend(var_name, dsort); + } + } + } + + // Reconstruct premise + let premise = info.premise_slid.and_then(|slid| { + self.reconstruct_formula(slid, &mut binder_to_var, func_to_idx, rel_to_idx, srt_slid_to_idx) + })?; + + // Reconstruct conclusion + let conclusion = info.conclusion_slid.and_then(|slid| { + self.reconstruct_formula(slid, &mut binder_to_var, func_to_idx, rel_to_idx, srt_slid_to_idx) + })?; + + Some(Sequent { + context, + premise, + conclusion, + }) + } + + /// Resolve a DSort slid to a DerivedSort. + /// + /// DSorts in GeologMeta are represented as either BaseDS or ProdDS elements. + /// This traverses the structure to build the corresponding DerivedSort. + pub fn resolve_dsort(&self, dsort_slid: Slid) -> DerivedSort { + // Check if it's a BaseDS + if let Some(base_ds_sort) = self.sort_ids.base_ds + && let Some(srt_func) = self.func_ids.base_ds_srt { + // Check all BaseDS elements to find one whose dsort matches + for base_slid in self.elements_of_sort(base_ds_sort) { + if let Some(dsort_func) = self.func_ids.base_ds_dsort + && self.get_func(dsort_func, base_slid) == Some(dsort_slid) { + // Found the BaseDS, get its Srt + if let Some(srt_slid) = self.get_func(srt_func, base_slid) { + // We need to map srt_slid to a sort index... + // This is tricky without knowing the theory context. + // For bootstrap, we store the slid index and resolve later. + return DerivedSort::Base(srt_slid.index()); + } + } + } + } + + // Check if it's a ProdDS + if let Some(prod_ds_sort) = self.sort_ids.prod_ds { + for prod_slid in self.elements_of_sort(prod_ds_sort) { + if let Some(dsort_func) = self.func_ids.prod_ds_dsort + && self.get_func(dsort_func, prod_slid) == Some(dsort_slid) { + // Found the ProdDS, get its fields + let fields = self.query_prod_fields(prod_slid); + return DerivedSort::Product(fields); + } + } + } + + // Fallback: empty product (unit type) + DerivedSort::Product(vec![]) + } + + /// Query the fields of a product DSort. + fn query_prod_fields(&self, prod_slid: Slid) -> Vec<(String, DerivedSort)> { + let Some(field_sort) = self.sort_ids.field else { + return vec![]; + }; + let Some(prod_func) = self.func_ids.field_prod else { + return vec![]; + }; + let Some(type_func) = self.func_ids.field_type else { + return vec![]; + }; + + let mut fields = Vec::new(); + for field_slid in self.elements_of_sort(field_sort) { + if self.get_func(prod_func, field_slid) == Some(prod_slid) { + let name = self.get_element_name(field_slid); + let short_name = name.rsplit('/').next().unwrap_or(&name).to_string(); + + let field_type = self + .get_func(type_func, field_slid) + .map(|ds| self.resolve_dsort(ds)) + .unwrap_or(DerivedSort::Product(vec![])); + + fields.push((short_name, field_type)); + } + } + fields + } + + /// Get all theory names that are committed (visible from HEAD). + pub fn query_committed_theories(&self) -> Vec<(String, Slid)> { + use super::BindingKind; + self.list_bindings() + .into_iter() + .filter_map(|(name, kind, slid)| { + if kind == BindingKind::Theory { + Some((name, slid)) + } else { + None + } + }) + .collect() + } + + /// Get all instance names that are committed (visible from HEAD). + pub fn query_committed_instances(&self) -> Vec<(String, Slid)> { + use super::BindingKind; + self.list_bindings() + .into_iter() + .filter_map(|(name, kind, slid)| { + if kind == BindingKind::Instance { + Some((name, slid)) + } else { + None + } + }) + .collect() + } + + /// Get all theories in GeologMeta (regardless of commit status). + /// + /// This is useful for reconstruction when loading from disk, + /// where we want to restore all data, not just committed data. + pub fn query_all_theories(&self) -> Vec<(String, Slid)> { + let Some(theory_sort) = self.sort_ids.theory else { + return vec![]; + }; + + self.elements_of_sort(theory_sort) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + (name, slid) + }) + .collect() + } + + /// Get all instances in GeologMeta (regardless of commit status). + /// + /// This is useful for reconstruction when loading from disk, + /// where we want to restore all data, not just committed data. + pub fn query_all_instances(&self) -> Vec<(String, Slid)> { + let Some(instance_sort) = self.sort_ids.instance else { + return vec![]; + }; + + self.elements_of_sort(instance_sort) + .into_iter() + .map(|slid| { + let name = self.get_element_name(slid); + (name, slid) + }) + .collect() + } + + /// Reconstruct an ElaboratedTheory from persisted GeologMeta data. + /// + /// This is a bootstrap method that will be replaced by proper query engine. + /// It rebuilds the in-memory ElaboratedTheory representation from the + /// persisted sorts, functions, and relations. + pub fn reconstruct_theory(&self, theory_slid: Slid) -> Option { + let theory_name = self.get_element_name(theory_slid); + + // Query sorts, functions, relations + let sort_infos = self.query_theory_sorts(theory_slid); + let func_infos = self.query_theory_funcs(theory_slid); + let rel_infos = self.query_theory_rels(theory_slid); + + // Build Srt Slid -> sort index mapping for resolving DerivedSorts + let mut srt_slid_to_idx: std::collections::HashMap = + std::collections::HashMap::new(); + for (idx, info) in sort_infos.iter().enumerate() { + srt_slid_to_idx.insert(info.slid.index(), idx); + } + + // Build signature using its constructor methods + let mut signature = Signature::new(); + + // Add sorts + for info in &sort_infos { + signature.add_sort(info.name.clone()); + } + + // Add functions with remapped DerivedSorts + for info in &func_infos { + let domain = remap_derived_sort(&info.domain, &srt_slid_to_idx); + let codomain = remap_derived_sort(&info.codomain, &srt_slid_to_idx); + signature.add_function(info.name.clone(), domain, codomain); + } + + // Add relations with remapped DerivedSorts + for info in &rel_infos { + let domain = remap_derived_sort(&info.domain, &srt_slid_to_idx); + signature.add_relation(info.name.clone(), domain); + } + + // Build Func Slid -> func index mapping + let func_to_idx: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Build Rel Slid -> rel index mapping + let rel_to_idx: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Query and reconstruct axioms + let sequent_infos = self.query_theory_sequents(theory_slid); + let mut axioms = Vec::new(); + let mut axiom_names = Vec::new(); + + for info in &sequent_infos { + if let Some(axiom) = self.reconstruct_axiom(info, &func_to_idx, &rel_to_idx, &srt_slid_to_idx) { + axiom_names.push(info.name.clone()); + axioms.push(axiom); + } + } + + let theory = Theory { + name: theory_name, + signature, + axioms, + axiom_names, + }; + + Some(ElaboratedTheory { + params: vec![], // TODO: persist and reconstruct params + theory, + }) + } + + /// Reconstruct all persisted theories. + /// + /// Returns a map from theory name to ElaboratedTheory. + pub fn reconstruct_all_theories( + &self, + ) -> std::collections::HashMap> { + let mut result = std::collections::HashMap::new(); + // Use query_all_theories to restore ALL theories from disk, + // not just committed ones + for (name, slid) in self.query_all_theories() { + if let Some(theory) = self.reconstruct_theory(slid) { + result.insert(name, std::rc::Rc::new(theory)); + } + } + result + } + + // ======================================================================== + // Instance queries and reconstruction + // ======================================================================== + + /// Query all elements belonging to an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_elems(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_elems_compiled(instance_slid) + } + + /// Query all function values in an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_func_vals(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_func_vals_compiled(instance_slid) + } + + /// Query all relation tuples in an instance. + /// Delegates to the compiled query engine. + pub fn query_instance_rel_tuples(&self, instance_slid: Slid) -> Vec { + // Delegate to compiled query engine + self.query_instance_rel_tuples_compiled(instance_slid) + } + + /// Reconstruct an instance (Structure + metadata) from persisted GeologMeta data. + pub fn reconstruct_instance( + &self, + instance_slid: Slid, + ) -> Option { + let theory_slid = self.get_instance_theory(instance_slid)?; + let theory = self.reconstruct_theory(theory_slid)?; + + let instance_name = self.get_element_name(instance_slid); + let num_sorts = theory.theory.signature.sorts.len(); + + // Query elements + let elem_infos = self.query_instance_elems(instance_slid); + let sort_infos = self.query_theory_sorts(theory_slid); + + // Build Srt Slid -> sort index mapping + let srt_to_idx: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Build Elem Slid -> Structure Slid mapping + // Structure Slids are assigned sequentially as we add elements + let mut elem_to_structure_slid: HashMap = HashMap::new(); + let mut structure = crate::core::Structure::new(num_sorts); + let mut element_names: HashMap = HashMap::new(); + + // Group elements by sort and add to structure + for elem_info in &elem_infos { + if let Some(srt_slid) = elem_info.srt_slid + && let Some(&sort_idx) = srt_to_idx.get(&srt_slid) { + // Add element to structure + let (structure_slid, _luid) = + structure.add_element(&mut crate::universe::Universe::new(), sort_idx); + elem_to_structure_slid.insert(elem_info.slid, structure_slid); + element_names.insert(structure_slid, elem_info.name.clone()); + } + } + + // Build srt_slid -> sort index mapping for remapping DerivedSorts + let srt_slid_to_idx: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid.index(), idx)) + .collect(); + + // Initialize functions + let func_infos = self.query_theory_funcs(theory_slid); + let domain_sort_ids: Vec> = func_infos + .iter() + .map(|f| { + // Remap the domain from Slid indices to sort indices + let remapped = remap_derived_sort(&f.domain, &srt_slid_to_idx); + match remapped { + DerivedSort::Base(idx) => Some(idx), + DerivedSort::Product(_) => None, + } + }) + .collect(); + structure.init_functions(&domain_sort_ids); + + // Initialize relations + let rel_infos = self.query_theory_rels(theory_slid); + let arities: Vec = rel_infos + .iter() + .map(|r| { + // Remap to get correct arity + let remapped = remap_derived_sort(&r.domain, &srt_slid_to_idx); + remapped.arity() + }) + .collect(); + structure.init_relations(&arities); + + // Build Func Slid -> func index mapping + let func_to_idx: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (info.slid, idx)) + .collect(); + + // Populate function values + let func_vals = self.query_instance_func_vals(instance_slid); + for fv in func_vals { + if let (Some(func_slid), Some(arg_slid), Some(result_slid)) = + (fv.func_slid, fv.arg_slid, fv.result_slid) + && let Some(&func_idx) = func_to_idx.get(&func_slid) + && let (Some(&arg_struct), Some(&result_struct)) = ( + elem_to_structure_slid.get(&arg_slid), + elem_to_structure_slid.get(&result_slid), + ) { + let _ = structure.define_function(func_idx, arg_struct, result_struct); + } + } + + // Populate relation tuples from columnar batches + // Build UUID -> Structure Slid mapping for elements + let elem_uuid_to_structure: HashMap = elem_infos + .iter() + .filter_map(|info| { + let uuid = self.get_element_uuid(info.slid); + elem_to_structure_slid.get(&info.slid).map(|&s| (uuid, s)) + }) + .collect(); + + // Build Rel UUID -> rel index mapping + let rel_uuid_to_idx: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (self.get_element_uuid(info.slid), idx)) + .collect(); + + // Load columnar batches for this instance + let instance_uuid = self.get_element_uuid(instance_slid); + if let Ok(batches) = self.load_instance_data_batches(instance_uuid) { + for batch in batches { + for rel_batch in &batch.relation_tuples { + if let Some(&rel_idx) = rel_uuid_to_idx.get(&rel_batch.rel) { + // Convert each tuple's UUIDs to Structure Slids + for tuple_uuids in rel_batch.iter() { + let tuple_slids: Vec = tuple_uuids + .iter() + .filter_map(|uuid| elem_uuid_to_structure.get(uuid).copied()) + .collect(); + + // Only assert if all elements were found + if tuple_slids.len() == tuple_uuids.len() { + structure.assert_relation(rel_idx, tuple_slids); + } + } + } + } + } + } + + Some(ReconstructedInstance { + name: instance_name, + theory_name: theory.theory.name.clone(), + structure, + element_names, + }) + } + + /// Reconstruct all persisted instances. + pub fn reconstruct_all_instances(&self) -> HashMap { + let mut result = HashMap::new(); + // Use query_all_instances to restore ALL instances from disk, + // not just committed ones + for (name, slid) in self.query_all_instances() { + if let Some(instance) = self.reconstruct_instance(slid) { + result.insert(name, instance); + } + } + result + } +} + +/// Information about an element in an instance +#[derive(Debug, Clone)] +pub struct ElemInfo { + pub name: String, + pub slid: Slid, + pub srt_slid: Option, +} + +/// Information about a function value +#[derive(Debug, Clone)] +pub struct FuncValInfo { + pub slid: Slid, + pub func_slid: Option, + pub arg_slid: Option, + pub result_slid: Option, +} + +/// Information about a relation tuple +#[derive(Debug, Clone)] +pub struct RelTupleInfo { + pub slid: Slid, + pub rel_slid: Option, + pub arg_slid: Option, +} + +/// A reconstructed instance with its structure and metadata +#[derive(Debug)] +pub struct ReconstructedInstance { + pub name: String, + pub theory_name: String, + pub structure: crate::core::Structure, + pub element_names: HashMap, +} diff --git a/src/store/columnar.rs b/src/store/columnar.rs new file mode 100644 index 0000000..f160e46 --- /dev/null +++ b/src/store/columnar.rs @@ -0,0 +1,208 @@ +//! Columnar batch format for efficient storage and wire transfer. +//! +//! This module defines the physical representation for instance-level data +//! (elements, function values, relation tuples). The logical model is still +//! GeologMeta (with Elem, FuncVal, RelTupleArg sorts), but the physical +//! encoding uses columnar batches for efficiency. +//! +//! # EDB vs IDB Batches +//! +//! Batches are tagged as either EDB (extensional) or IDB (intensional): +//! +//! - **EDB batches**: User-declared facts. Persisted locally AND transmitted over wire. +//! - **IDB batches**: Chase-derived facts. Persisted locally but NOT transmitted over wire. +//! +//! When receiving patches over the network, only EDB batches are included. +//! The recipient runs the chase locally to regenerate IDB tuples. +//! +//! Each patch can have up to 2 batches per instance: +//! - One EDB batch (if user manually added tuples) +//! - One IDB batch (if chase produced conclusions) + +use rkyv::{Archive, Deserialize, Serialize}; + +use crate::id::Uuid; + +/// Distinguishes between user-declared (EDB) and chase-derived (IDB) data. +/// +/// This determines whether the batch is transmitted over the wire during sync. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] +#[archive(check_bytes)] +pub enum BatchKind { + /// Extensional database: user-declared facts. + /// Persisted locally AND transmitted over wire. + #[default] + Edb, + /// Intensional database: chase-derived facts. + /// Persisted locally but NOT transmitted over wire. + Idb, +} + +/// A batch of elements added to an instance. +/// +/// Logically equivalent to a collection of Elem elements in GeologMeta. +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct ElementBatch { + /// Which instance these elements belong to + pub instance: Uuid, + /// Sort UUID for each element (parallel array) + pub sorts: Vec, + /// UUID for each element (parallel array, same length as sorts) + pub elements: Vec, +} + +/// A batch of function values in an instance. +/// +/// Logically equivalent to a collection of FuncVal elements in GeologMeta. +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct FunctionValueBatch { + /// Which instance these function values belong to + pub instance: Uuid, + /// Which function + pub func: Uuid, + /// Domain elements (parallel array) + pub args: Vec, + /// Codomain elements (parallel array, same length as args) + pub results: Vec, +} + +/// A batch of relation tuples in an instance. +/// +/// Logically equivalent to a collection of RelTuple + RelTupleArg elements +/// in GeologMeta, but stored columnar for efficiency. +/// +/// For a relation `R : [from: A, to: B] -> Prop`, this stores: +/// - columns[0] = all "from" field values (UUIDs of A elements) +/// - columns[1] = all "to" field values (UUIDs of B elements) +/// +/// Row i represents the tuple (columns[0][i], columns[1][i]). +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] +pub struct RelationTupleBatch { + /// Which instance these tuples belong to + pub instance: Uuid, + /// Which relation + pub rel: Uuid, + /// Field UUIDs for each column (from the relation's domain ProdDS/Field) + pub field_ids: Vec, + /// Columnar data: columns[field_idx][row_idx] = element UUID + /// All columns have the same length (number of tuples). + pub columns: Vec>, +} + +impl RelationTupleBatch { + /// Create a new empty batch for a relation + pub fn new(instance: Uuid, rel: Uuid, field_ids: Vec) -> Self { + let num_fields = field_ids.len(); + Self { + instance, + rel, + field_ids, + columns: vec![Vec::new(); num_fields], + } + } + + /// Add a tuple to the batch + pub fn push(&mut self, tuple: &[Uuid]) { + assert_eq!(tuple.len(), self.columns.len(), "tuple arity mismatch"); + for (col, &val) in self.columns.iter_mut().zip(tuple.iter()) { + col.push(val); + } + } + + /// Number of tuples in this batch + pub fn len(&self) -> usize { + self.columns.first().map(|c| c.len()).unwrap_or(0) + } + + /// Whether the batch is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Iterate over tuples as slices + pub fn iter(&self) -> impl Iterator> + '_ { + (0..self.len()).map(|i| { + self.columns.iter().map(|col| col[i]).collect() + }) + } +} + +/// A complete instance data snapshot in columnar format. +/// +/// This is the efficient representation for storage and wire transfer. +/// Logically equivalent to the Elem, FuncVal, RelTuple, RelTupleArg +/// portions of a GeologMeta instance. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Default)] +#[archive(check_bytes)] +pub struct InstanceDataBatch { + /// Whether this batch contains EDB (user-declared) or IDB (chase-derived) data. + /// IDB batches are persisted locally but NOT transmitted over wire. + pub kind: BatchKind, + /// All element additions + pub elements: Vec, + /// All function value definitions + pub function_values: Vec, + /// All relation tuple assertions + pub relation_tuples: Vec, +} + +impl InstanceDataBatch { + /// Create a new empty EDB batch (default for user-declared data) + pub fn new() -> Self { + Self::default() + } + + /// Create a new empty IDB batch (for chase-derived data) + pub fn new_idb() -> Self { + Self { + kind: BatchKind::Idb, + ..Default::default() + } + } + + /// Check if this batch should be transmitted over the wire + pub fn is_wire_transmittable(&self) -> bool { + self.kind == BatchKind::Edb + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_relation_tuple_batch() { + let instance = Uuid::nil(); + let rel = Uuid::nil(); + let field_a = Uuid::nil(); + let field_b = Uuid::nil(); + + let mut batch = RelationTupleBatch::new( + instance, + rel, + vec![field_a, field_b], + ); + + assert!(batch.is_empty()); + + // Add some tuples + let elem1 = Uuid::nil(); + let elem2 = Uuid::nil(); + let elem3 = Uuid::nil(); + + batch.push(&[elem1, elem2]); + batch.push(&[elem2, elem3]); + batch.push(&[elem1, elem3]); + + assert_eq!(batch.len(), 3); + + let tuples: Vec<_> = batch.iter().collect(); + assert_eq!(tuples.len(), 3); + assert_eq!(tuples[0], vec![elem1, elem2]); + assert_eq!(tuples[1], vec![elem2, elem3]); + assert_eq!(tuples[2], vec![elem1, elem3]); + } +} diff --git a/src/store/commit.rs b/src/store/commit.rs new file mode 100644 index 0000000..83cd78b --- /dev/null +++ b/src/store/commit.rs @@ -0,0 +1,209 @@ +//! Commit operations for the Store. +//! +//! Version control through commits and name bindings. + +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::{BindingKind, Store}; + +impl Store { + /// Create a new commit + pub fn commit(&mut self, message: Option<&str>) -> Result { + let sort_id = self.sort_ids.commit.ok_or("Commit sort not found")?; + let commit_slid = self.add_element(sort_id, message.unwrap_or("commit")); + + // Set parent if there's a head + if let Some(head) = self.head { + let parent_func = self.func_ids.commit_parent.ok_or("Commit/parent not found")?; + self.define_func(parent_func, commit_slid, head)?; + } + + // Create NameBindings for all uncommitted changes + let nb_sort = self.sort_ids.name_binding.ok_or("NameBinding sort not found")?; + let commit_func = self.func_ids.name_binding_commit.ok_or("NameBinding/commit not found")?; + let theory_func = self.func_ids.name_binding_theory.ok_or("NameBinding/theory not found")?; + let instance_func = self.func_ids.name_binding_instance.ok_or("NameBinding/instance not found")?; + + // Collect uncommitted to avoid borrow issues + let uncommitted: Vec<_> = self.uncommitted.drain().collect(); + for (name, binding) in uncommitted { + let nb_slid = self.add_element(nb_sort, &format!("nb_{}_{}", name, commit_slid.index())); + self.define_func(commit_func, nb_slid, commit_slid)?; + + match binding.kind { + BindingKind::Theory => { + self.define_func(theory_func, nb_slid, binding.target)?; + } + BindingKind::Instance => { + self.define_func(instance_func, nb_slid, binding.target)?; + } + } + } + + // Update head + self.head = Some(commit_slid); + + // Auto-save + self.save()?; + + Ok(commit_slid) + } + + /// Get the current binding for a name (from HEAD commit or uncommitted) + pub fn resolve_name(&self, name: &str) -> Option<(Slid, BindingKind)> { + // Check uncommitted first + if let Some(binding) = self.uncommitted.get(name) { + return Some((binding.target, binding.kind)); + } + + // Search through name bindings from HEAD backwards (if we have commits) + if let (Some(head), Some(nb_sort), Some(commit_func), Some(theory_func), Some(instance_func)) = ( + self.head, + self.sort_ids.name_binding, + self.func_ids.name_binding_commit, + self.func_ids.name_binding_theory, + self.func_ids.name_binding_instance, + ) { + let mut current = Some(head); + while let Some(commit) = current { + // Find all NameBindings for this commit + for nb_slid in self.elements_of_sort(nb_sort) { + if self.get_func(commit_func, nb_slid) == Some(commit) { + // Check if this binding is for our name + let nb_name = self.get_element_name(nb_slid); + if nb_name.starts_with(&format!("nb_{}_", name)) { + // Found it! Return the target + if let Some(theory) = self.get_func(theory_func, nb_slid) { + return Some((theory, BindingKind::Theory)); + } + if let Some(instance) = self.get_func(instance_func, nb_slid) { + return Some((instance, BindingKind::Instance)); + } + } + } + } + + // Move to parent commit + if let Some(parent_func) = self.func_ids.commit_parent { + current = self.get_func(parent_func, commit); + } else { + break; + } + } + } + + // Fallback: search directly in meta Structure for uncommitted theories/instances + // This handles the case where data exists in meta.bin but no commit was made yet + if let Some(theory_sort) = self.sort_ids.theory { + for slid in self.elements_of_sort(theory_sort) { + if self.get_element_name(slid) == name { + return Some((slid, BindingKind::Theory)); + } + } + } + if let Some(instance_sort) = self.sort_ids.instance { + for slid in self.elements_of_sort(instance_sort) { + if self.get_element_name(slid) == name { + return Some((slid, BindingKind::Instance)); + } + } + } + + None + } + + /// Get all commits in order (oldest to newest) + pub fn commit_history(&self) -> Vec { + let Some(head) = self.head else { + return vec![]; + }; + + let mut chain = Vec::new(); + let mut current = Some(head); + + while let Some(commit) = current { + chain.push(commit); + current = self + .func_ids + .commit_parent + .and_then(|f| self.get_func(f, commit)); + } + + chain.reverse(); + chain + } + + /// List all committed bindings (theories and instances) + /// + /// Returns (name, kind, target_slid) for each binding visible from HEAD. + /// Names may appear multiple times if rebound in different commits. + pub fn list_bindings(&self) -> Vec<(String, BindingKind, Slid)> { + let Some(head) = self.head else { + return vec![]; + }; + + let Some(nb_sort) = self.sort_ids.name_binding else { + return vec![]; + }; + let Some(commit_func) = self.func_ids.name_binding_commit else { + return vec![]; + }; + let Some(theory_func) = self.func_ids.name_binding_theory else { + return vec![]; + }; + let Some(instance_func) = self.func_ids.name_binding_instance else { + return vec![]; + }; + + let mut bindings = Vec::new(); + let mut seen_names = std::collections::HashSet::new(); + + // Walk commits from head backwards + let mut current = Some(head); + while let Some(commit) = current { + // Find all NameBindings for this commit + for nb_slid in self.elements_of_sort(nb_sort) { + if self.get_func(commit_func, nb_slid) == Some(commit) { + // Extract name from "nb_{name}_{commit_id}" + let nb_name = self.get_element_name(nb_slid); + if let Some(name) = extract_binding_name(&nb_name) { + // Only include first (most recent) binding for each name + if seen_names.insert(name.clone()) { + if let Some(theory) = self.get_func(theory_func, nb_slid) { + bindings.push((name, BindingKind::Theory, theory)); + } else if let Some(instance) = self.get_func(instance_func, nb_slid) { + bindings.push((name, BindingKind::Instance, instance)); + } + } + } + } + } + + // Move to parent commit + current = self + .func_ids + .commit_parent + .and_then(|f| self.get_func(f, commit)); + } + + bindings + } +} + +/// Extract the name from a binding element name like "nb_Graph_2" +fn extract_binding_name(nb_name: &str) -> Option { + // Format: "nb_{name}_{commit_id}" + if !nb_name.starts_with("nb_") { + return None; + } + let rest = &nb_name[3..]; // Skip "nb_" + // Find the last underscore (before commit_id) + if let Some(last_underscore) = rest.rfind('_') { + // Verify the part after underscore is a number + if rest[last_underscore + 1..].parse::().is_ok() { + return Some(rest[..last_underscore].to_string()); + } + } + None +} diff --git a/src/store/instance.rs b/src/store/instance.rs new file mode 100644 index 0000000..d5204fd --- /dev/null +++ b/src/store/instance.rs @@ -0,0 +1,356 @@ +//! Instance operations for the Store. +//! +//! Creating, extending, and modifying instances in the GeologMeta structure. + +use std::collections::HashMap; + +use crate::core::{RelationStorage, Structure}; +use crate::id::{NumericId, Slid, Uuid}; + +use super::append::AppendOps; +use super::columnar::{InstanceDataBatch, RelationTupleBatch}; +use super::{BindingKind, Store, UncommittedBinding}; + +impl Store { + /// Create a new instance (version 0, no parent) + pub fn create_instance(&mut self, name: &str, theory: Slid) -> Result { + let sort_id = self.sort_ids.instance.ok_or("Instance sort not found")?; + let instance_slid = self.add_element(sort_id, name); + + // Set theory + let func_id = self.func_ids.instance_theory.ok_or("Instance/theory not found")?; + self.define_func(func_id, instance_slid, theory)?; + + // Register uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: instance_slid, + kind: BindingKind::Instance, + }, + ); + + Ok(instance_slid) + } + + /// Create a new version of an existing instance + pub fn extend_instance(&mut self, parent: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.instance.ok_or("Instance sort not found")?; + + // Get the theory from the parent + let theory_func = self.func_ids.instance_theory.ok_or("Instance/theory not found")?; + let theory = self.get_func(theory_func, parent).ok_or("Parent has no theory")?; + + let instance_slid = self.add_element( + sort_id, + &format!("{}@v{}", name, self.meta.carriers[sort_id].len()), + ); + + // Set parent and theory + let parent_func = self.func_ids.instance_parent.ok_or("Instance/parent not found")?; + self.define_func(parent_func, instance_slid, parent)?; + self.define_func(theory_func, instance_slid, theory)?; + + // Update uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: instance_slid, + kind: BindingKind::Instance, + }, + ); + + Ok(instance_slid) + } + + /// Add an element to an instance + pub fn add_elem(&mut self, instance: Slid, srt: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.elem.ok_or("Elem sort not found")?; + let elem_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(instance), name.to_string()], + ); + + let instance_func = self.func_ids.elem_instance.ok_or("Elem/instance not found")?; + let sort_func = self.func_ids.elem_sort.ok_or("Elem/sort not found")?; + + self.define_func(instance_func, elem_slid, instance)?; + self.define_func(sort_func, elem_slid, srt)?; + + Ok(elem_slid) + } + + /// Retract an element from an instance + pub fn retract_elem(&mut self, instance: Slid, elem: Slid) -> Result { + let sort_id = self.sort_ids.elem_retract.ok_or("ElemRetract sort not found")?; + let retract_slid = self.add_element(sort_id, &format!("retract_{}", self.get_element_name(elem))); + + let instance_func = self.func_ids.elem_retract_instance.ok_or("ElemRetract/instance not found")?; + let elem_func = self.func_ids.elem_retract_elem.ok_or("ElemRetract/elem not found")?; + + self.define_func(instance_func, retract_slid, instance)?; + self.define_func(elem_func, retract_slid, elem)?; + + Ok(retract_slid) + } + + /// Define a function value in an instance + pub fn add_func_val( + &mut self, + instance: Slid, + func: Slid, + arg: Slid, + result: Slid, + ) -> Result { + let sort_id = self.sort_ids.func_val.ok_or("FuncVal sort not found")?; + let fv_slid = self.add_element( + sort_id, + &format!("fv_{}_{}", self.get_element_name(func), self.get_element_name(arg)), + ); + + let instance_func = self.func_ids.func_val_instance.ok_or("FuncVal/instance not found")?; + let func_func = self.func_ids.func_val_func.ok_or("FuncVal/func not found")?; + let arg_func = self.func_ids.func_val_arg.ok_or("FuncVal/arg not found")?; + let result_func = self.func_ids.func_val_result.ok_or("FuncVal/result not found")?; + + self.define_func(instance_func, fv_slid, instance)?; + self.define_func(func_func, fv_slid, func)?; + self.define_func(arg_func, fv_slid, arg)?; + self.define_func(result_func, fv_slid, result)?; + + Ok(fv_slid) + } + + // NOTE: No retract_func_val - function values are IMMUTABLE (Monotonic Submodel Property) + + /// Assert a relation tuple in an instance. + /// + /// NOTE: This is a legacy stub. Relation tuples should be persisted via columnar + /// batches (see `store::columnar`). This method is kept for API compatibility + /// but silently succeeds without persisting to storage. + /// + /// TODO: Migrate callers to use columnar batch persistence. + #[allow(unused_variables)] + pub fn add_rel_tuple(&mut self, instance: Slid, rel: Slid, arg: Slid) -> Result { + // Relation tuples are now stored in columnar batches, not as individual + // GeologMeta elements. This method is a no-op that returns a dummy Slid. + // + // The actual persistence should happen via InstanceDataBatch in columnar.rs. + // For now, return the arg as a placeholder to avoid breaking callers. + Ok(arg) + } + + // NOTE: No retract_rel_tuple - relation tuples are IMMUTABLE (Monotonic Submodel Property) + + /// Persist all instance data (elements, function values, relation tuples) to GeologMeta. + /// + /// This takes a Structure and persists its contents to the Store, creating Elem, + /// FuncVal, and RelTuple elements in GeologMeta. + /// + /// Returns a mapping from Structure Slids to GeologMeta Elem Slids. + pub fn persist_instance_data( + &mut self, + instance_slid: Slid, + theory_slid: Slid, + structure: &Structure, + element_names: &HashMap, + ) -> Result { + // Get theory's sorts to map sort indices to Srt Slids + let sort_infos = self.query_theory_sorts(theory_slid); + let func_infos = self.query_theory_funcs(theory_slid); + let rel_infos = self.query_theory_rels(theory_slid); + + // Build sort index -> Srt Slid mapping + let sort_idx_to_srt: HashMap = sort_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Build func index -> Func Slid mapping + let func_idx_to_func: HashMap = func_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Build rel index -> Rel Slid mapping + let rel_idx_to_rel: HashMap = rel_infos + .iter() + .enumerate() + .map(|(idx, info)| (idx, info.slid)) + .collect(); + + // Mapping from Structure Slid to GeologMeta Elem Slid + let mut elem_slid_map: HashMap = HashMap::new(); + + // 1. Persist all elements + for (sort_idx, carrier) in structure.carriers.iter().enumerate() { + let srt_slid = sort_idx_to_srt + .get(&sort_idx) + .copied() + .ok_or_else(|| format!("Unknown sort index: {}", sort_idx))?; + + for structure_slid_u64 in carrier.iter() { + let structure_slid = Slid::from_usize(structure_slid_u64 as usize); + let elem_name = element_names + .get(&structure_slid) + .map(|s| s.as_str()) + .unwrap_or_else(|| "elem"); + + let elem_slid = self.add_elem(instance_slid, srt_slid, elem_name)?; + elem_slid_map.insert(structure_slid, elem_slid); + } + } + + // 2. Persist function values + // For now, only handle base domain functions (not product domains) + for (func_idx, func_col) in structure.functions.iter().enumerate() { + let func_slid = match func_idx_to_func.get(&func_idx) { + Some(s) => *s, + None => continue, // Skip if no corresponding Func in theory + }; + + match func_col { + crate::core::FunctionColumn::Local(values) => { + for (local_idx, opt_result) in values.iter().enumerate() { + if let Some(result_slid) = crate::id::get_slid(*opt_result) { + // Find the structure Slid for this local index + // The local index corresponds to position in the domain sort's carrier + if let Some(domain_sort_idx) = self.get_func_domain_sort(func_slid) + && let Some(carrier) = structure.carriers.get(domain_sort_idx) + && let Some(arg_u64) = carrier.iter().nth(local_idx) { + let arg_slid = Slid::from_usize(arg_u64 as usize); + if let (Some(&arg_elem), Some(&result_elem)) = + (elem_slid_map.get(&arg_slid), elem_slid_map.get(&result_slid)) + { + self.add_func_val(instance_slid, func_slid, arg_elem, result_elem)?; + } + } + } + } + } + crate::core::FunctionColumn::External(_) => { + // External functions reference elements from other instances + // TODO: Handle external references + } + crate::core::FunctionColumn::ProductLocal { .. } => { + // Product domain functions need special handling + // TODO: Handle product domains + } + crate::core::FunctionColumn::ProductCodomain { .. } => { + // Product codomain functions need special handling + // TODO: Handle product codomains (store each field value) + } + } + } + + // 3. Persist relation tuples via columnar batches + // Build InstanceDataBatch with all relation tuples + let mut batch = InstanceDataBatch::new(); + + // Get instance UUID for the batch + let instance_uuid = self.get_element_uuid(instance_slid); + + // Build a map from Structure Slid to element UUID + let struct_slid_to_uuid: HashMap = elem_slid_map + .iter() + .map(|(&struct_slid, &elem_slid)| { + (struct_slid, self.get_element_uuid(elem_slid)) + }) + .collect(); + + for (rel_idx, relation) in structure.relations.iter().enumerate() { + let rel_slid = match rel_idx_to_rel.get(&rel_idx) { + Some(s) => *s, + None => continue, + }; + + if relation.is_empty() { + continue; + } + + // Get the relation UUID + let rel_uuid = self.get_element_uuid(rel_slid); + + // Get field UUIDs for this relation's domain + let rel_info = rel_infos.get(rel_idx); + let arity = rel_info.map(|r| r.domain.arity()).unwrap_or(1); + + // For field_ids, we use the field UUIDs from the relation's domain + // For now, use placeholder UUIDs since we need to query Field elements + // TODO: Query Field elements from GeologMeta for proper UUIDs + let field_ids: Vec = (0..arity).map(|_| Uuid::nil()).collect(); + + let mut rel_batch = RelationTupleBatch::new( + instance_uuid, + rel_uuid, + field_ids, + ); + + // Add all tuples + for tuple in relation.iter() { + // Convert Structure Slids to UUIDs + let uuid_tuple: Vec = tuple + .iter() + .filter_map(|struct_slid| struct_slid_to_uuid.get(struct_slid).copied()) + .collect(); + + if uuid_tuple.len() == tuple.len() { + rel_batch.push(&uuid_tuple); + } + } + + if !rel_batch.is_empty() { + batch.relation_tuples.push(rel_batch); + } + } + + // Save the batch if we have any relation tuples + if !batch.relation_tuples.is_empty() { + // Determine version number (count existing batches for this instance) + let existing_batches = self.load_instance_data_batches(instance_uuid) + .unwrap_or_default(); + let version = existing_batches.len() as u64; + + self.save_instance_data_batch(instance_uuid, version, &batch)?; + } + + Ok(InstancePersistResult { elem_slid_map }) + } + + /// Helper to get the domain sort index for a function. + fn get_func_domain_sort(&self, func_slid: Slid) -> Option { + let dom_func = self.func_ids.func_dom?; + let dsort_slid = self.get_func(dom_func, func_slid)?; + + // Check if it's a base dsort + let base_ds_sort = self.sort_ids.base_ds?; + let srt_func = self.func_ids.base_ds_srt?; + let dsort_func = self.func_ids.base_ds_dsort?; + + for base_slid in self.elements_of_sort(base_ds_sort) { + if self.get_func(dsort_func, base_slid) == Some(dsort_slid) + && let Some(srt_slid) = self.get_func(srt_func, base_slid) { + // Find this Srt's index in the theory + let srt_theory_func = self.func_ids.srt_theory?; + if let Some(theory_slid) = self.get_func(srt_theory_func, srt_slid) { + let sorts = self.query_theory_sorts(theory_slid); + for (idx, info) in sorts.iter().enumerate() { + if info.slid == srt_slid { + return Some(idx); + } + } + } + } + } + None + } +} + +/// Result of persisting instance data to GeologMeta. +#[derive(Debug)] +pub struct InstancePersistResult { + /// Mapping from Structure Slids to GeologMeta Elem Slids + pub elem_slid_map: HashMap, +} diff --git a/src/store/materialize.rs b/src/store/materialize.rs new file mode 100644 index 0000000..b060571 --- /dev/null +++ b/src/store/materialize.rs @@ -0,0 +1,238 @@ +//! Materialized views for the Store. +//! +//! A MaterializedView is an indexed snapshot of an instance at a specific version, +//! computed by walking the version chain and applying all additions/retractions. + +use std::collections::{HashMap, HashSet}; + +use crate::id::{NumericId, Slid}; + +use super::append::AppendOps; +use super::Store; + +/// A materialized view of an instance at a specific version. +/// +/// This is the "rendered" state of an instance after applying all patches +/// from the root to a particular version. It can be incrementally updated +/// when a new child version is created. +#[derive(Clone, Debug)] +pub struct MaterializedView { + /// The instance version this view is materialized at + pub instance: Slid, + + /// Live elements (not tombstoned) + pub elements: HashSet, + + /// Live relation tuples: tuple_slid -> (rel, arg) + pub rel_tuples: HashMap, + + /// Live function values: fv_slid -> (func, arg, result) + pub func_vals: HashMap, + + /// Element tombstones (for delta computation) + /// NOTE: Only elements can be tombstoned; FuncVals and RelTuples are immutable + pub elem_tombstones: HashSet, +} + +impl MaterializedView { + /// Create an empty materialized view + pub fn empty(instance: Slid) -> Self { + Self { + instance, + elements: HashSet::new(), + rel_tuples: HashMap::new(), + func_vals: HashMap::new(), + elem_tombstones: HashSet::new(), + } + } + + /// Get the number of live elements + pub fn element_count(&self) -> usize { + self.elements.len() + } + + /// Check if an element is live + pub fn has_element(&self, elem: Slid) -> bool { + self.elements.contains(&elem) + } + + /// Check if a relation tuple is live + pub fn has_rel_tuple(&self, tuple: Slid) -> bool { + self.rel_tuples.contains_key(&tuple) + } + + /// Get all elements of a particular sort (requires Store for lookup) + pub fn elements_of_sort<'a>( + &'a self, + store: &'a Store, + sort: Slid, + ) -> impl Iterator + 'a { + self.elements.iter().copied().filter(move |&elem| { + store + .func_ids + .elem_sort + .and_then(|f| store.get_func(f, elem)) + .map(|s| s == sort) + .unwrap_or(false) + }) + } + + /// Get all relation tuples for a particular relation + pub fn tuples_of_relation(&self, rel: Slid) -> impl Iterator + '_ { + self.rel_tuples + .iter() + .filter(move |(_, (r, _))| *r == rel) + .map(|(&tuple_slid, (_, arg))| (tuple_slid, *arg)) + } + + /// Get all function values for a particular function + pub fn values_of_function(&self, func: Slid) -> impl Iterator + '_ { + self.func_vals + .iter() + .filter(move |(_, (f, _, _))| *f == func) + .map(|(_, (_, arg, result))| (*arg, *result)) + } +} + +impl Store { + /// Materialize an instance from scratch by walking the parent chain. + /// + /// This collects all additions and retractions from root to the specified + /// version, producing a complete view of the instance state. + pub fn materialize(&self, instance: Slid) -> MaterializedView { + let mut view = MaterializedView::empty(instance); + + // Collect version chain (from instance back to root) + let mut chain = Vec::new(); + let mut version = Some(instance); + while let Some(v) = version { + chain.push(v); + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + // Process from oldest to newest (reverse the chain) + for v in chain.into_iter().rev() { + self.apply_version_delta(&mut view, v); + } + + view.instance = instance; + view + } + + /// Apply the delta from a single instance version to a materialized view. + /// + /// This is the core of incremental materialization: given a view at version N, + /// we can efficiently update it to version N+1 by applying only the changes + /// introduced in N+1. + pub fn apply_version_delta(&self, view: &mut MaterializedView, version: Slid) { + // 1. Process element additions + if let Some(elem_sort) = self.sort_ids.elem + && let Some(instance_func) = self.func_ids.elem_instance { + for elem in self.elements_of_sort(elem_sort) { + if self.get_func(instance_func, elem) == Some(version) { + // Don't add if already tombstoned + if !view.elem_tombstones.contains(&elem) { + view.elements.insert(elem); + } + } + } + } + + // 2. Process element retractions + if let Some(retract_sort) = self.sort_ids.elem_retract + && let Some(instance_func) = self.func_ids.elem_retract_instance + && let Some(elem_func) = self.func_ids.elem_retract_elem { + for retract in self.elements_of_sort(retract_sort) { + if self.get_func(instance_func, retract) == Some(version) + && let Some(elem) = self.get_func(elem_func, retract) { + view.elements.remove(&elem); + view.elem_tombstones.insert(elem); + } + } + } + + // 3. Process relation tuple additions + // NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + // not as individual GeologMeta elements. This section is a no-op until + // columnar batch loading is implemented. + // + // TODO: Load relation tuples from columnar batches into view.rel_tuples + + // 4. Process function value additions (IMMUTABLE - no retractions) + if let Some(fv_sort) = self.sort_ids.func_val + && let (Some(instance_func), Some(func_func), Some(arg_func), Some(result_func)) = ( + self.func_ids.func_val_instance, + self.func_ids.func_val_func, + self.func_ids.func_val_arg, + self.func_ids.func_val_result, + ) { + for fv in self.elements_of_sort(fv_sort) { + if self.get_func(instance_func, fv) == Some(version) + && let (Some(func), Some(arg), Some(result)) = ( + self.get_func(func_func, fv), + self.get_func(arg_func, fv), + self.get_func(result_func, fv), + ) { + view.func_vals.insert(fv, (func, arg, result)); + } + } + } + } + + /// Incrementally update a materialized view to a new version. + /// + /// The new version must be a direct child of the view's current version, + /// or this will return an error. + pub fn update_view( + &self, + view: &mut MaterializedView, + new_version: Slid, + ) -> Result<(), String> { + // Verify that new_version is a direct child of view.instance + let parent = self + .func_ids + .instance_parent + .and_then(|f| self.get_func(f, new_version)); + + if parent != Some(view.instance) { + return Err(format!( + "Cannot incrementally update: {} is not a direct child of {}", + new_version.index(), + view.instance.index() + )); + } + + // Apply the delta + self.apply_version_delta(view, new_version); + view.instance = new_version; + + Ok(()) + } + + /// Create a new instance version extending an existing view, and update the view. + /// + /// This is the preferred way to modify instances: create the extension, + /// add elements/tuples/values to it, then update the view. + pub fn extend_instance_with_view( + &mut self, + view: &mut MaterializedView, + name: &str, + ) -> Result { + let new_version = self.extend_instance(view.instance, name)?; + + // The view can be updated after mutations are done + // For now, just update the instance reference + view.instance = new_version; + + Ok(new_version) + } + + /// Materialize and cache a view for an instance. + /// + /// This stores the view in a view cache for efficient reuse. + /// The cache is invalidated when the instance is extended. + pub fn get_or_create_view(&mut self, instance: Slid) -> MaterializedView { + // For now, just materialize (cache can be added later) + self.materialize(instance) + } +} diff --git a/src/store/mod.rs b/src/store/mod.rs new file mode 100644 index 0000000..1f2c874 --- /dev/null +++ b/src/store/mod.rs @@ -0,0 +1,585 @@ +//! Append-only store for GeologMeta elements. +//! +//! This module provides the foundation for geolog's persistent, versioned data model. +//! All data (theories, instances, elements, function values, relation tuples) is stored +//! as elements in a single GeologMeta Structure that is append-only. +//! +//! # Key design principles +//! +//! - **Append-only**: Elements are never deleted, only tombstoned +//! - **Patch-based versioning**: Each theory/instance version is a delta from its parent +//! - **Incremental materialization**: Views are updated efficiently as patches arrive +//! - **Eternal format**: Once GeologMeta schema is v1.0, it never changes +//! +//! # Module structure +//! +//! - [`schema`]: Cached sort and function IDs from GeologMeta +//! - [`append`]: Low-level element append operations +//! - [`theory`]: Theory CRUD (create, extend, add sorts/functions/relations) +//! - [`instance`]: Instance CRUD (create, extend, add elements, retractions) +//! - [`commit`]: Version control (commits, name bindings, history) +//! - [`query`]: Query operations (walking version chains) +//! - [`materialize`]: Materialized views for fast indexed access + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use crate::core::{DerivedSort, ElaboratedTheory, Structure}; +use crate::id::{NumericId, Slid}; +use crate::meta::geolog_meta; +use crate::naming::NamingIndex; +use crate::universe::Universe; + +pub mod append; +pub mod batch; +pub mod bootstrap_queries; +pub mod columnar; +pub mod commit; +pub mod instance; +pub mod materialize; +pub mod query; +pub mod schema; +pub mod theory; + +pub use batch::{ElementBatch, ElementBuilder, ElementCreationContext}; +pub use materialize::MaterializedView; +pub use schema::{FuncIds, SortIds}; + +// ============================================================================ +// STORE +// ============================================================================ + +/// The append-only store: a single GeologMeta Structure plus indexing. +/// +/// This is the "source of truth" for all geolog data. Theories and instances +/// are represented as elements within this structure, along with their +/// components (sorts, functions, relations, elements, values, etc.). +pub struct Store { + /// The GeologMeta instance containing all data + pub meta: Structure, + + /// The GeologMeta theory (for signature lookups) + pub meta_theory: Arc, + + /// Universe for UUID <-> Luid mapping + pub universe: Universe, + + /// Human-readable names for UUIDs + pub naming: NamingIndex, + + /// Current HEAD commit (None if no commits yet) + pub head: Option, + + /// Uncommitted changes (name -> target slid) + /// These become NameBindings on commit + pub uncommitted: HashMap, + + /// Cached sort IDs for quick lookup + pub(crate) sort_ids: SortIds, + + /// Cached function IDs for quick lookup + pub(crate) func_ids: FuncIds, + + /// Path for persistence (None = in-memory only) + pub path: Option, + + /// Whether there are unsaved changes + dirty: bool, +} + +/// An uncommitted name binding +#[derive(Debug, Clone)] +pub struct UncommittedBinding { + /// The target (Theory or Instance slid in meta) + pub target: Slid, + /// Whether this binds to a theory or instance + pub kind: BindingKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BindingKind { + Theory, + Instance, +} + +// ============================================================================ +// APPEND TRAIT IMPLEMENTATION +// ============================================================================ + +impl append::AppendOps for Store { + fn add_element(&mut self, sort_id: usize, name: &str) -> Slid { + let (slid, luid) = self.meta.add_element(&mut self.universe, sort_id); + let uuid = self.universe.get(luid).expect("freshly created luid should have uuid"); + self.naming.insert(uuid, vec![name.to_string()]); + self.dirty = true; + slid + } + + fn add_element_qualified(&mut self, sort_id: usize, path: Vec) -> Slid { + let (slid, luid) = self.meta.add_element(&mut self.universe, sort_id); + let uuid = self.universe.get(luid).expect("freshly created luid should have uuid"); + self.naming.insert(uuid, path); + self.dirty = true; + slid + } + + fn define_func(&mut self, func_id: usize, domain: Slid, codomain: Slid) -> Result<(), String> { + self.meta.define_function(func_id, domain, codomain)?; + self.dirty = true; + Ok(()) + } + + fn get_func(&self, func_id: usize, domain: Slid) -> Option { + let sort_slid = self.meta.sort_local_id(domain); + self.meta.get_function(func_id, sort_slid) + } + + fn elements_of_sort(&self, sort_id: usize) -> Vec { + if sort_id >= self.meta.carriers.len() { + return vec![]; + } + self.meta.carriers[sort_id] + .iter() + .map(|x| Slid::from_usize(x as usize)) + .collect() + } + + fn get_element_name(&self, slid: Slid) -> String { + let luid = self.meta.get_luid(slid); + if let Some(uuid) = self.universe.get(luid) { + self.naming.display_name(&uuid) + } else { + format!("#{}", slid.index()) + } + } +} + +// ============================================================================ +// STORE IMPL +// ============================================================================ + +impl Store { + /// Create a new empty store + pub fn new() -> Self { + let meta_theory = geolog_meta(); + let num_sorts = meta_theory.theory.signature.sorts.len(); + let mut meta = Structure::new(num_sorts); + + // Initialize function storage for all functions in GeologMeta + let domain_sort_ids: Vec> = meta_theory + .theory + .signature + .functions + .iter() + .map(|f| match &f.domain { + DerivedSort::Base(sort_id) => Some(*sort_id), + DerivedSort::Product(_) => None, + }) + .collect(); + meta.init_functions(&domain_sort_ids); + + // Initialize relation storage + let arities: Vec = meta_theory + .theory + .signature + .relations + .iter() + .map(|r| match &r.domain { + DerivedSort::Base(_) => 1, + DerivedSort::Product(fields) => fields.len(), + }) + .collect(); + meta.init_relations(&arities); + + let sort_ids = SortIds::from_theory(&meta_theory); + let func_ids = FuncIds::from_theory(&meta_theory); + + Self { + meta, + meta_theory, + universe: Universe::new(), + naming: NamingIndex::new(), + head: None, + uncommitted: HashMap::new(), + sort_ids, + func_ids, + path: None, + dirty: false, + } + } + + /// Create a store with a persistence path + pub fn with_path(path: impl Into) -> Self { + let path = path.into(); + + // Create directory if needed + let _ = std::fs::create_dir_all(&path); + + // Create store with paths for all components + let mut store = Self::new(); + store.path = Some(path.clone()); + store.universe = Universe::with_path(path.join("universe")); + store.naming = NamingIndex::with_path(path.join("naming")); + store + } + + /// Load a store from disk, or create new if doesn't exist + pub fn load_or_create(path: impl Into) -> Self { + let path = path.into(); + if path.exists() { + Self::load(&path).unwrap_or_else(|_| Self::with_path(path)) + } else { + Self::with_path(path) + } + } + + /// Load a store from disk + pub fn load(path: &Path) -> Result { + // Load meta structure + let meta_path = path.join("meta.bin"); + let meta = crate::serialize::load_structure(&meta_path)?; + + // Load universe + let universe_path = path.join("universe"); + let universe = Universe::load(&universe_path)?; + + // Load naming + let naming_path = path.join("naming"); + let naming = NamingIndex::load(&naming_path)?; + + // Load HEAD commit reference + let head_path = path.join("HEAD"); + let head = if head_path.exists() { + let content = std::fs::read_to_string(&head_path) + .map_err(|e| format!("Failed to read HEAD: {}", e))?; + let index: usize = content + .trim() + .parse() + .map_err(|e| format!("Invalid HEAD format: {}", e))?; + Some(Slid::from_usize(index)) + } else { + None + }; + + // Get meta theory and build IDs (same as new()) + let meta_theory = geolog_meta(); + let sort_ids = SortIds::from_theory(&meta_theory); + let func_ids = FuncIds::from_theory(&meta_theory); + + Ok(Self { + meta, + meta_theory, + universe, + naming, + head, + uncommitted: HashMap::new(), + sort_ids, + func_ids, + path: Some(path.to_path_buf()), + dirty: false, + }) + } + + /// Save the store to disk + pub fn save(&mut self) -> Result<(), String> { + if !self.dirty { + return Ok(()); + } + + let Some(path) = &self.path else { + return Ok(()); // In-memory store, nothing to save + }; + + // Ensure parent directory exists + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create directory: {}", e))?; + } + + // Save universe + self.universe.save()?; + + // Save naming + self.naming.save()?; + + // Save meta structure + let meta_path = path.join("meta.bin"); + crate::serialize::save_structure(&self.meta, &meta_path)?; + + // Save head commit reference + if let Some(head) = self.head { + let head_path = path.join("HEAD"); + std::fs::write(&head_path, format!("{}", head.index())) + .map_err(|e| format!("Failed to write HEAD: {}", e))?; + } + + self.dirty = false; + Ok(()) + } + + /// Check if the store has uncommitted changes + pub fn is_dirty(&self) -> bool { + self.dirty || !self.uncommitted.is_empty() + } + + /// Get the number of elements in the meta structure + pub fn len(&self) -> usize { + self.meta.len() + } + + /// Check if the store is empty + pub fn is_empty(&self) -> bool { + self.meta.is_empty() + } + + // ======================================================================== + // COLUMNAR BATCH STORAGE + // ======================================================================== + + /// Get the directory for instance data (columnar batches) + fn instance_data_dir(&self) -> Option { + self.path.as_ref().map(|p| p.join("instance_data")) + } + + /// Save instance data batch for a specific patch version. + /// + /// Each patch can have up to 2 batches per instance: + /// - One EDB batch (user-declared facts) + /// - One IDB batch (chase-derived facts) + /// + /// The batch kind is encoded in the filename to allow both to coexist. + pub fn save_instance_data_batch( + &self, + instance_uuid: crate::id::Uuid, + patch_version: u64, + batch: &columnar::InstanceDataBatch, + ) -> Result<(), String> { + use rkyv::ser::serializers::AllocSerializer; + use rkyv::ser::Serializer; + + let Some(dir) = self.instance_data_dir() else { + return Ok(()); // In-memory store, nothing to save + }; + + // Ensure directory exists + std::fs::create_dir_all(&dir) + .map_err(|e| format!("Failed to create instance_data dir: {}", e))?; + + // Serialize batch with rkyv + let mut serializer = AllocSerializer::<4096>::default(); + serializer.serialize_value(batch) + .map_err(|e| format!("Failed to serialize instance data batch: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + // Write to file named by instance UUID, patch version, and batch kind + // EDB batches: {uuid}_v{version}_edb.batch.bin + // IDB batches: {uuid}_v{version}_idb.batch.bin + let kind_suffix = match batch.kind { + columnar::BatchKind::Edb => "edb", + columnar::BatchKind::Idb => "idb", + }; + let filename = format!("{}_v{}_{}.batch.bin", instance_uuid, patch_version, kind_suffix); + let file_path = dir.join(filename); + std::fs::write(&file_path, &bytes) + .map_err(|e| format!("Failed to write instance data batch: {}", e))?; + + Ok(()) + } + + /// Load all instance data batches for an instance (across all patch versions). + /// + /// Returns batches in version order so they can be applied sequentially. + /// Both EDB and IDB batches are loaded; use `batch.kind` to filter if needed. + pub fn load_instance_data_batches( + &self, + instance_uuid: crate::id::Uuid, + ) -> Result, String> { + use rkyv::Deserialize; + + let Some(dir) = self.instance_data_dir() else { + return Ok(vec![]); // In-memory store, no data + }; + + if !dir.exists() { + return Ok(vec![]); + } + + // (version, is_idb, batch) - sort so EDB comes before IDB at same version + let mut version_batches: Vec<(u64, bool, columnar::InstanceDataBatch)> = Vec::new(); + let prefix = format!("{}_v", instance_uuid); + + // Read all matching batch files + let entries = std::fs::read_dir(&dir) + .map_err(|e| format!("Failed to read instance_data dir: {}", e))?; + + for entry in entries { + let entry = entry.map_err(|e| format!("Failed to read dir entry: {}", e))?; + let path = entry.path(); + + if let Some(name) = path.file_name().and_then(|n| n.to_str()) + && name.starts_with(&prefix) && name.ends_with(".batch.bin") { + // Parse filename: {uuid}_v{version}_{edb|idb}.batch.bin + // or legacy format: {uuid}_v{version}.batch.bin + let suffix = name + .strip_prefix(&prefix) + .and_then(|s| s.strip_suffix(".batch.bin")) + .ok_or_else(|| format!("Invalid batch filename: {}", name))?; + + // Check for new format with _edb or _idb suffix + let (version_str, is_idb) = if let Some(v) = suffix.strip_suffix("_edb") { + (v, false) + } else if let Some(v) = suffix.strip_suffix("_idb") { + (v, true) + } else { + // Legacy format without kind suffix - assume EDB + (suffix, false) + }; + + let version: u64 = version_str.parse() + .map_err(|_| format!("Invalid version in filename: {}", name))?; + + let bytes = std::fs::read(&path) + .map_err(|e| format!("Failed to read batch {}: {}", name, e))?; + + let archived = rkyv::check_archived_root::(&bytes) + .map_err(|e| format!("Failed to validate batch {}: {}", name, e))?; + + let batch: columnar::InstanceDataBatch = archived.deserialize(&mut rkyv::Infallible) + .map_err(|_| format!("Failed to deserialize batch {}", name))?; + + version_batches.push((version, is_idb, batch)); + } + } + + // Sort by version, then EDB before IDB at same version + version_batches.sort_by_key(|(v, is_idb, _)| (*v, *is_idb)); + Ok(version_batches.into_iter().map(|(_, _, b)| b).collect()) + } + + /// Load only EDB (wire-transmittable) batches for an instance. + /// + /// This is what would be sent over the network during sync. + pub fn load_edb_batches( + &self, + instance_uuid: crate::id::Uuid, + ) -> Result, String> { + let all = self.load_instance_data_batches(instance_uuid)?; + Ok(all.into_iter().filter(|b| b.is_wire_transmittable()).collect()) + } +} + +impl Default for Store { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_store() { + let store = Store::new(); + assert!(store.head.is_none()); + assert!(store.uncommitted.is_empty()); + } + + #[test] + fn test_create_theory() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + assert!(store.uncommitted.contains_key("TestTheory")); + } + + #[test] + fn test_create_instance() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let _instance = store.create_instance("TestInstance", theory).unwrap(); + assert!(store.uncommitted.contains_key("TestInstance")); + } + + #[test] + fn test_commit() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + let commit = store.commit(Some("Initial commit")).unwrap(); + assert_eq!(store.head, Some(commit)); + assert!(store.uncommitted.is_empty()); + } + + #[test] + fn test_materialize_empty_instance() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let instance = store.create_instance("TestInstance", theory).unwrap(); + + let view = store.materialize(instance); + assert_eq!(view.instance, instance); + assert!(view.elements.is_empty()); + assert!(view.rel_tuples.is_empty()); + assert!(view.func_vals.is_empty()); + } + + #[test] + fn test_materialize_with_elements() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let instance = store.create_instance("TestInstance", theory).unwrap(); + + // We'd need a sort in the theory to add elements, so this test is limited + let view = store.materialize(instance); + assert_eq!(view.instance, instance); + } + + #[test] + fn test_incremental_view_update() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let v1 = store.create_instance("TestInstance", theory).unwrap(); + + let mut view = store.materialize(v1); + assert_eq!(view.instance, v1); + + // Extend the instance + let v2 = store.extend_instance(v1, "TestInstance_v2").unwrap(); + + // Update view incrementally + let result = store.update_view(&mut view, v2); + assert!(result.is_ok()); + assert_eq!(view.instance, v2); + } + + #[test] + fn test_incremental_update_invalid_parent() { + let mut store = Store::new(); + let theory = store.create_theory("TestTheory").unwrap(); + let v1 = store.create_instance("Instance1", theory).unwrap(); + let v2 = store.create_instance("Instance2", theory).unwrap(); + + let mut view = store.materialize(v1); + + // v2 is not a child of v1, so this should fail + let result = store.update_view(&mut view, v2); + assert!(result.is_err()); + } + + #[test] + fn test_commit_history() { + let mut store = Store::new(); + let _theory = store.create_theory("TestTheory").unwrap(); + let c1 = store.commit(Some("First")).unwrap(); + + store.create_theory("Theory2").unwrap(); + let c2 = store.commit(Some("Second")).unwrap(); + + let history = store.commit_history(); + assert_eq!(history, vec![c1, c2]); + } +} diff --git a/src/store/query.rs b/src/store/query.rs new file mode 100644 index 0000000..3033279 --- /dev/null +++ b/src/store/query.rs @@ -0,0 +1,127 @@ +//! Query operations for the Store. +//! +//! Walking instance version chains to collect elements, function values, and relation tuples. +//! +//! NOTE: FuncVals and RelTuples are IMMUTABLE (Monotonic Submodel Property). +//! Only elements can be retracted. + +use std::collections::HashSet; + +use crate::id::Slid; + +use super::append::AppendOps; +use super::Store; + +impl Store { + /// Get all elements of an instance (including from parent chain) + pub fn get_instance_elements(&self, instance: Slid) -> Vec { + let mut elements = Vec::new(); + let mut retractions = HashSet::new(); + + // Collect retractions first (from all versions in chain) + let mut version = Some(instance); + while let Some(v) = version { + if let Some(retract_sort) = self.sort_ids.elem_retract + && let Some(instance_func) = self.func_ids.elem_retract_instance + && let Some(elem_func) = self.func_ids.elem_retract_elem { + for retract in self.elements_of_sort(retract_sort) { + if self.get_func(instance_func, retract) == Some(v) + && let Some(elem) = self.get_func(elem_func, retract) { + retractions.insert(elem); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + // Now collect elements (filtering out retracted ones) + let mut version = Some(instance); + while let Some(v) = version { + if let Some(elem_sort) = self.sort_ids.elem + && let Some(instance_func) = self.func_ids.elem_instance { + for elem in self.elements_of_sort(elem_sort) { + if self.get_func(instance_func, elem) == Some(v) + && !retractions.contains(&elem) { + elements.push(elem); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + elements + } + + /// Get all relation tuples of an instance (including from parent chain) + /// + /// NOTE: Relation tuples are now stored in columnar batches (see `store::columnar`), + /// not as individual GeologMeta elements. This function returns empty until + /// columnar batch loading is implemented. + /// + /// TODO: Implement columnar batch loading for relation tuples. + pub fn get_instance_rel_tuples(&self, _instance: Slid) -> Vec<(Slid, Slid, Slid)> { + // Relation tuples are stored in columnar batches, not GeologMeta elements. + // Return empty until columnar batch loading is implemented. + vec![] + } + + /// Get all function values of an instance (including from parent chain) + /// + /// Returns (fv_slid, func_slid, arg_slid, result_slid) tuples. + /// NOTE: FuncVals are IMMUTABLE - no retractions (Monotonic Submodel Property) + pub fn get_instance_func_vals(&self, instance: Slid) -> Vec<(Slid, Slid, Slid, Slid)> { + let mut vals = Vec::new(); + + // Collect function values from all versions in the chain + let mut version = Some(instance); + while let Some(v) = version { + if let Some(fv_sort) = self.sort_ids.func_val + && let ( + Some(instance_func), + Some(func_func), + Some(arg_func), + Some(result_func), + ) = ( + self.func_ids.func_val_instance, + self.func_ids.func_val_func, + self.func_ids.func_val_arg, + self.func_ids.func_val_result, + ) { + for fv in self.elements_of_sort(fv_sort) { + if self.get_func(instance_func, fv) == Some(v) + && let (Some(func), Some(arg), Some(result)) = ( + self.get_func(func_func, fv), + self.get_func(arg_func, fv), + self.get_func(result_func, fv), + ) { + vals.push((fv, func, arg, result)); + } + } + } + version = self.func_ids.instance_parent.and_then(|f| self.get_func(f, v)); + } + + vals + } + + /// Get the theory for an instance + pub fn get_instance_theory(&self, instance: Slid) -> Option { + self.func_ids + .instance_theory + .and_then(|f| self.get_func(f, instance)) + } + + /// Get the parent of an instance (for versioning) + pub fn get_instance_parent(&self, instance: Slid) -> Option { + self.func_ids + .instance_parent + .and_then(|f| self.get_func(f, instance)) + } + + /// Get an element's sort + pub fn get_elem_sort(&self, elem: Slid) -> Option { + self.func_ids + .elem_sort + .and_then(|f| self.get_func(f, elem)) + } +} diff --git a/src/store/schema.rs b/src/store/schema.rs new file mode 100644 index 0000000..cf2819c --- /dev/null +++ b/src/store/schema.rs @@ -0,0 +1,345 @@ +//! Schema ID caching for GeologMeta. +//! +//! Caches sort and function IDs from the GeologMeta signature for fast lookup. + +use crate::core::ElaboratedTheory; + +/// Cached sort IDs from GeologMeta +#[derive(Default)] +pub struct SortIds { + pub theory: Option, + pub instance: Option, + pub commit: Option, + pub name_binding: Option, + pub srt: Option, + pub func: Option, + pub rel: Option, + pub elem: Option, + pub elem_retract: Option, + pub func_val: Option, + pub rel_tuple: Option, + pub rel_tuple_arg: Option, + // NOTE: No func_val_retract or rel_tuple_retract - these are immutable (Monotonic Submodel Property) + pub sequent: Option, + pub param: Option, + pub dsort: Option, + pub base_ds: Option, + pub prod_ds: Option, + pub field: Option, + pub binder: Option, + pub term: Option, + pub formula: Option, + + // Context variables (for sequent universal quantification) + pub ctx_var: Option, + + // Term subtypes + pub var_t: Option, + pub app_t: Option, + pub record_t: Option, + pub rec_entry: Option, + pub proj_t: Option, + + // Formula subtypes + pub true_f: Option, + pub false_f: Option, + pub eq_f: Option, + pub rel_f: Option, + pub conj_f: Option, + pub conj_arm: Option, + pub disj_f: Option, + pub disj_arm: Option, + pub exists_f: Option, + + // Node (for ancestry/scoping - may not be needed for persistence) + pub node: Option, +} + +/// Cached function IDs from GeologMeta +#[derive(Default)] +pub struct FuncIds { + // Theory functions + pub theory_parent: Option, + + // Instance functions + pub instance_parent: Option, + pub instance_theory: Option, + + // Commit functions + pub commit_parent: Option, + + // NameBinding functions + pub name_binding_commit: Option, + pub name_binding_theory: Option, + pub name_binding_instance: Option, + + // Elem functions + pub elem_instance: Option, + pub elem_sort: Option, + + // ElemRetract functions + pub elem_retract_instance: Option, + pub elem_retract_elem: Option, + + // FuncVal functions (IMMUTABLE - no retract) + pub func_val_instance: Option, + pub func_val_func: Option, + pub func_val_arg: Option, + pub func_val_result: Option, + + // RelTuple functions (IMMUTABLE - no retract) + pub rel_tuple_instance: Option, + pub rel_tuple_rel: Option, + + // RelTupleArg functions (uniform for all relations, even unary) + pub rel_tuple_arg_tuple: Option, + pub rel_tuple_arg_elem: Option, + pub rel_tuple_arg_position: Option, + + // Srt functions + pub srt_theory: Option, + + // Func functions + pub func_theory: Option, + pub func_dom: Option, + pub func_cod: Option, + + // Rel functions + pub rel_theory: Option, + pub rel_dom: Option, + + // DSort functions + pub base_ds_dsort: Option, + pub base_ds_srt: Option, + pub prod_ds_dsort: Option, + pub field_prod: Option, + pub field_type: Option, + + // Sequent functions + pub sequent_theory: Option, + pub sequent_premise: Option, + pub sequent_conclusion: Option, + + // CtxVar functions (sequent-level universal quantification) + pub ctx_var_sequent: Option, + pub ctx_var_binder: Option, + + // Binder functions + pub binder_type: Option, + + // Term/Formula to Node embeddings + pub term_node: Option, + pub formula_node: Option, + + // VarT functions + pub var_t_term: Option, + pub var_t_binder: Option, + + // AppT functions + pub app_t_term: Option, + pub app_t_func: Option, + pub app_t_arg: Option, + + // RecordT functions + pub record_t_term: Option, + + // RecEntry functions + pub rec_entry_record: Option, + pub rec_entry_val: Option, + pub rec_entry_field: Option, + + // ProjT functions + pub proj_t_term: Option, + pub proj_t_base: Option, + pub proj_t_field: Option, + + // TrueF/FalseF functions + pub true_f_formula: Option, + pub false_f_formula: Option, + + // EqF functions + pub eq_f_formula: Option, + pub eq_f_lhs: Option, + pub eq_f_rhs: Option, + + // RelF functions + pub rel_f_formula: Option, + pub rel_f_arg: Option, + pub rel_f_rel: Option, + + // ConjF functions + pub conj_f_formula: Option, + + // ConjArm functions + pub conj_arm_conj: Option, + pub conj_arm_child: Option, + + // DisjF functions + pub disj_f_formula: Option, + + // DisjArm functions + pub disj_arm_disj: Option, + pub disj_arm_child: Option, + + // ExistsF functions + pub exists_f_formula: Option, + pub exists_f_binder: Option, + pub exists_f_body: Option, +} + +impl SortIds { + /// Populate sort IDs from a GeologMeta theory + pub fn from_theory(theory: &ElaboratedTheory) -> Self { + let sig = &theory.theory.signature; + Self { + theory: sig.lookup_sort("Theory"), + instance: sig.lookup_sort("Instance"), + commit: sig.lookup_sort("Commit"), + name_binding: sig.lookup_sort("NameBinding"), + srt: sig.lookup_sort("Srt"), + func: sig.lookup_sort("Func"), + rel: sig.lookup_sort("Rel"), + elem: sig.lookup_sort("Elem"), + elem_retract: sig.lookup_sort("ElemRetract"), + func_val: sig.lookup_sort("FuncVal"), + rel_tuple: sig.lookup_sort("RelTuple"), + rel_tuple_arg: sig.lookup_sort("RelTupleArg"), + sequent: sig.lookup_sort("Sequent"), + param: sig.lookup_sort("Param"), + dsort: sig.lookup_sort("DSort"), + base_ds: sig.lookup_sort("BaseDS"), + prod_ds: sig.lookup_sort("ProdDS"), + field: sig.lookup_sort("Field"), + binder: sig.lookup_sort("Binder"), + term: sig.lookup_sort("Term"), + formula: sig.lookup_sort("Formula"), + ctx_var: sig.lookup_sort("CtxVar"), + var_t: sig.lookup_sort("VarT"), + app_t: sig.lookup_sort("AppT"), + record_t: sig.lookup_sort("RecordT"), + rec_entry: sig.lookup_sort("RecEntry"), + proj_t: sig.lookup_sort("ProjT"), + true_f: sig.lookup_sort("TrueF"), + false_f: sig.lookup_sort("FalseF"), + eq_f: sig.lookup_sort("EqF"), + rel_f: sig.lookup_sort("RelF"), + conj_f: sig.lookup_sort("ConjF"), + conj_arm: sig.lookup_sort("ConjArm"), + disj_f: sig.lookup_sort("DisjF"), + disj_arm: sig.lookup_sort("DisjArm"), + exists_f: sig.lookup_sort("ExistsF"), + node: sig.lookup_sort("Node"), + } + } +} + +impl FuncIds { + /// Populate function IDs from a GeologMeta theory + pub fn from_theory(theory: &ElaboratedTheory) -> Self { + let sig = &theory.theory.signature; + Self { + theory_parent: sig.lookup_func("Theory/parent"), + instance_parent: sig.lookup_func("Instance/parent"), + instance_theory: sig.lookup_func("Instance/theory"), + commit_parent: sig.lookup_func("Commit/parent"), + name_binding_commit: sig.lookup_func("NameBinding/commit"), + name_binding_theory: sig.lookup_func("NameBinding/theory"), + name_binding_instance: sig.lookup_func("NameBinding/instance"), + elem_instance: sig.lookup_func("Elem/instance"), + elem_sort: sig.lookup_func("Elem/sort"), + elem_retract_instance: sig.lookup_func("ElemRetract/instance"), + elem_retract_elem: sig.lookup_func("ElemRetract/elem"), + func_val_instance: sig.lookup_func("FuncVal/instance"), + func_val_func: sig.lookup_func("FuncVal/func"), + func_val_arg: sig.lookup_func("FuncVal/arg"), + func_val_result: sig.lookup_func("FuncVal/result"), + rel_tuple_instance: sig.lookup_func("RelTuple/instance"), + rel_tuple_rel: sig.lookup_func("RelTuple/rel"), + rel_tuple_arg_tuple: sig.lookup_func("RelTupleArg/tuple"), + rel_tuple_arg_elem: sig.lookup_func("RelTupleArg/elem"), + rel_tuple_arg_position: sig.lookup_func("RelTupleArg/position"), + srt_theory: sig.lookup_func("Srt/theory"), + func_theory: sig.lookup_func("Func/theory"), + func_dom: sig.lookup_func("Func/dom"), + func_cod: sig.lookup_func("Func/cod"), + rel_theory: sig.lookup_func("Rel/theory"), + rel_dom: sig.lookup_func("Rel/dom"), + base_ds_dsort: sig.lookup_func("BaseDS/dsort"), + base_ds_srt: sig.lookup_func("BaseDS/srt"), + prod_ds_dsort: sig.lookup_func("ProdDS/dsort"), + field_prod: sig.lookup_func("Field/prod"), + field_type: sig.lookup_func("Field/type"), + + // Sequent functions + sequent_theory: sig.lookup_func("Sequent/theory"), + sequent_premise: sig.lookup_func("Sequent/premise"), + sequent_conclusion: sig.lookup_func("Sequent/conclusion"), + + // CtxVar functions + ctx_var_sequent: sig.lookup_func("CtxVar/sequent"), + ctx_var_binder: sig.lookup_func("CtxVar/binder"), + + // Binder functions + binder_type: sig.lookup_func("Binder/type"), + + // Term/Formula to Node embeddings + term_node: sig.lookup_func("Term/node"), + formula_node: sig.lookup_func("Formula/node"), + + // VarT functions + var_t_term: sig.lookup_func("VarT/term"), + var_t_binder: sig.lookup_func("VarT/binder"), + + // AppT functions + app_t_term: sig.lookup_func("AppT/term"), + app_t_func: sig.lookup_func("AppT/func"), + app_t_arg: sig.lookup_func("AppT/arg"), + + // RecordT functions + record_t_term: sig.lookup_func("RecordT/term"), + + // RecEntry functions + rec_entry_record: sig.lookup_func("RecEntry/record"), + rec_entry_val: sig.lookup_func("RecEntry/val"), + rec_entry_field: sig.lookup_func("RecEntry/field"), + + // ProjT functions + proj_t_term: sig.lookup_func("ProjT/term"), + proj_t_base: sig.lookup_func("ProjT/base"), + proj_t_field: sig.lookup_func("ProjT/field"), + + // TrueF/FalseF functions + true_f_formula: sig.lookup_func("TrueF/formula"), + false_f_formula: sig.lookup_func("FalseF/formula"), + + // EqF functions + eq_f_formula: sig.lookup_func("EqF/formula"), + eq_f_lhs: sig.lookup_func("EqF/lhs"), + eq_f_rhs: sig.lookup_func("EqF/rhs"), + + // RelF functions + rel_f_formula: sig.lookup_func("RelF/formula"), + rel_f_arg: sig.lookup_func("RelF/arg"), + rel_f_rel: sig.lookup_func("RelF/rel"), + + // ConjF functions + conj_f_formula: sig.lookup_func("ConjF/formula"), + + // ConjArm functions + conj_arm_conj: sig.lookup_func("ConjArm/conj"), + conj_arm_child: sig.lookup_func("ConjArm/child"), + + // DisjF functions + disj_f_formula: sig.lookup_func("DisjF/formula"), + + // DisjArm functions + disj_arm_disj: sig.lookup_func("DisjArm/disj"), + disj_arm_child: sig.lookup_func("DisjArm/child"), + + // ExistsF functions + exists_f_formula: sig.lookup_func("ExistsF/formula"), + exists_f_binder: sig.lookup_func("ExistsF/binder"), + exists_f_body: sig.lookup_func("ExistsF/body"), + } + } +} diff --git a/src/store/theory.rs b/src/store/theory.rs new file mode 100644 index 0000000..3f66577 --- /dev/null +++ b/src/store/theory.rs @@ -0,0 +1,750 @@ +//! Theory operations for the Store. +//! +//! Creating, extending, and modifying theories in the GeologMeta structure. + +use std::collections::HashMap; + +use crate::core::{DerivedSort, Formula, Sequent, Signature, Term}; +use crate::id::Slid; + +use super::append::AppendOps; +use super::{BindingKind, Store, UncommittedBinding}; + +impl Store { + /// Create a new theory (version 0, no parent) + pub fn create_theory(&mut self, name: &str) -> Result { + let sort_id = self.sort_ids.theory.ok_or("Theory sort not found")?; + let theory_slid = self.add_element(sort_id, name); + + // Register uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: theory_slid, + kind: BindingKind::Theory, + }, + ); + + Ok(theory_slid) + } + + /// Create a new version of an existing theory + pub fn extend_theory(&mut self, parent: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.theory.ok_or("Theory sort not found")?; + let theory_slid = self.add_element( + sort_id, + &format!("{}@v{}", name, self.meta.carriers[sort_id].len()), + ); + + // Set parent + let func_id = self.func_ids.theory_parent.ok_or("Theory/parent not found")?; + self.define_func(func_id, theory_slid, parent)?; + + // Update uncommitted binding + self.uncommitted.insert( + name.to_string(), + UncommittedBinding { + target: theory_slid, + kind: BindingKind::Theory, + }, + ); + + Ok(theory_slid) + } + + /// Add a sort to a theory + pub fn add_sort(&mut self, theory: Slid, name: &str) -> Result { + let sort_id = self.sort_ids.srt.ok_or("Srt sort not found")?; + let srt_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let func_id = self.func_ids.srt_theory.ok_or("Srt/theory not found")?; + self.define_func(func_id, srt_slid, theory)?; + + Ok(srt_slid) + } + + /// Add a function to a theory + pub fn add_function( + &mut self, + theory: Slid, + name: &str, + domain: Slid, + codomain: Slid, + ) -> Result { + let sort_id = self.sort_ids.func.ok_or("Func sort not found")?; + let func_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let theory_func = self.func_ids.func_theory.ok_or("Func/theory not found")?; + let dom_func = self.func_ids.func_dom.ok_or("Func/dom not found")?; + let cod_func = self.func_ids.func_cod.ok_or("Func/cod not found")?; + + self.define_func(theory_func, func_slid, theory)?; + self.define_func(dom_func, func_slid, domain)?; + self.define_func(cod_func, func_slid, codomain)?; + + Ok(func_slid) + } + + /// Add a relation to a theory + pub fn add_relation(&mut self, theory: Slid, name: &str, domain: Slid) -> Result { + let sort_id = self.sort_ids.rel.ok_or("Rel sort not found")?; + let rel_slid = self.add_element_qualified( + sort_id, + vec![self.get_element_name(theory), name.to_string()], + ); + + let theory_func = self.func_ids.rel_theory.ok_or("Rel/theory not found")?; + let dom_func = self.func_ids.rel_dom.ok_or("Rel/dom not found")?; + + self.define_func(theory_func, rel_slid, theory)?; + self.define_func(dom_func, rel_slid, domain)?; + + Ok(rel_slid) + } + + /// Create a base DSort from a Srt + pub fn make_base_dsort(&mut self, srt: Slid) -> Result { + let base_ds_sort = self.sort_ids.base_ds.ok_or("BaseDS sort not found")?; + let dsort_sort = self.sort_ids.dsort.ok_or("DSort sort not found")?; + + let base_ds_slid = self.add_element(base_ds_sort, &format!("base_{}", self.get_element_name(srt))); + let dsort_slid = self.add_element(dsort_sort, &format!("dsort_{}", self.get_element_name(srt))); + + let dsort_func = self.func_ids.base_ds_dsort.ok_or("BaseDS/dsort not found")?; + let srt_func = self.func_ids.base_ds_srt.ok_or("BaseDS/srt not found")?; + + self.define_func(dsort_func, base_ds_slid, dsort_slid)?; + self.define_func(srt_func, base_ds_slid, srt)?; + + Ok(dsort_slid) + } + + /// Create a product DSort with fields + pub fn make_product_dsort( + &mut self, + theory: Slid, + fields: &[(String, Slid)], // (field_name, field_dsort) + ) -> Result { + let (dsort, _) = self.make_product_dsort_with_fields(theory, fields)?; + Ok(dsort) + } + + /// Create a product DSort with fields, returning both the DSort and field Slids + fn make_product_dsort_with_fields( + &mut self, + theory: Slid, + fields: &[(String, Slid)], // (field_name, field_dsort) + ) -> Result<(Slid, HashMap), String> { + let prod_ds_sort = self.sort_ids.prod_ds.ok_or("ProdDS sort not found")?; + let dsort_sort = self.sort_ids.dsort.ok_or("DSort sort not found")?; + let field_sort = self.sort_ids.field.ok_or("Field sort not found")?; + + // Create the DSort element + let field_names: Vec<_> = fields.iter().map(|(n, _)| n.as_str()).collect(); + let dsort_name = format!("dsort_[{}]", field_names.join(",")); + let dsort_slid = self.add_element(dsort_sort, &dsort_name); + + // Create the ProdDS element + let prod_ds_slid = self.add_element(prod_ds_sort, &format!("prod_{}", dsort_name)); + + let dsort_func = self.func_ids.prod_ds_dsort.ok_or("ProdDS/dsort not found")?; + self.define_func(dsort_func, prod_ds_slid, dsort_slid)?; + + // Create Field elements + let prod_func = self.func_ids.field_prod.ok_or("Field/prod not found")?; + let type_func = self.func_ids.field_type.ok_or("Field/type not found")?; + + let mut field_slids = HashMap::new(); + for (field_name, field_dsort) in fields { + let field_slid = self.add_element_qualified( + field_sort, + vec![self.get_element_name(theory), field_name.clone()], + ); + self.define_func(prod_func, field_slid, prod_ds_slid)?; + self.define_func(type_func, field_slid, *field_dsort)?; + + field_slids.insert(field_name.clone(), field_slid); + } + + Ok((dsort_slid, field_slids)) + } + + /// Persist a full signature to the Store. + /// + /// Creates all sorts, functions, and relations in GeologMeta. + /// Returns a mapping from sort indices to Srt Slids. + pub fn persist_signature( + &mut self, + theory: Slid, + signature: &Signature, + ) -> Result { + let mut sort_slids: HashMap = HashMap::new(); + let mut dsort_slids: HashMap = HashMap::new(); // Base DSort for each sort + let mut func_slids: HashMap = HashMap::new(); + let mut rel_slids: HashMap = HashMap::new(); + let mut field_slids: HashMap = HashMap::new(); + + // 1. Create all Srt elements and their base DSorts + for (sort_id, sort_name) in signature.sorts.iter().enumerate() { + let srt_slid = self.add_sort(theory, sort_name)?; + sort_slids.insert(sort_id, srt_slid); + + // Create base DSort for this sort + let dsort_slid = self.make_base_dsort(srt_slid)?; + dsort_slids.insert(sort_id, dsort_slid); + } + + // 2. Create all Func elements + for (func_id, func_sym) in signature.functions.iter().enumerate() { + let (domain_dsort, dom_fields) = + self.persist_derived_sort_with_fields(theory, &func_sym.domain, &dsort_slids)?; + let (codomain_dsort, cod_fields) = + self.persist_derived_sort_with_fields(theory, &func_sym.codomain, &dsort_slids)?; + + // Collect field slids + field_slids.extend(dom_fields); + field_slids.extend(cod_fields); + + let func_slid = self.add_function(theory, &func_sym.name, domain_dsort, codomain_dsort)?; + func_slids.insert(func_id, func_slid); + } + + // 3. Create all Rel elements + for (rel_id, rel_sym) in signature.relations.iter().enumerate() { + let (domain_dsort, dom_fields) = + self.persist_derived_sort_with_fields(theory, &rel_sym.domain, &dsort_slids)?; + field_slids.extend(dom_fields); + + let rel_slid = self.add_relation(theory, &rel_sym.name, domain_dsort)?; + rel_slids.insert(rel_id, rel_slid); + } + + Ok(SignaturePersistResult { + sort_slids, + dsort_slids, + func_slids, + rel_slids, + field_slids, + }) + } + + /// Convert a DerivedSort to a DSort Slid, creating necessary elements. + fn persist_derived_sort( + &mut self, + theory: Slid, + ds: &DerivedSort, + dsort_slids: &HashMap, + ) -> Result { + let (dsort, _) = self.persist_derived_sort_with_fields(theory, ds, dsort_slids)?; + Ok(dsort) + } + + /// Convert a DerivedSort to a DSort Slid, also returning field Slids. + fn persist_derived_sort_with_fields( + &mut self, + theory: Slid, + ds: &DerivedSort, + dsort_slids: &HashMap, + ) -> Result<(Slid, HashMap), String> { + match ds { + DerivedSort::Base(sort_id) => { + let dsort = dsort_slids + .get(sort_id) + .copied() + .ok_or_else(|| format!("Unknown sort id: {}", sort_id))?; + Ok((dsort, HashMap::new())) + } + DerivedSort::Product(fields) => { + if fields.is_empty() { + // Unit type - create empty product + let dsort = self.make_product_dsort(theory, &[])?; + Ok((dsort, HashMap::new())) + } else { + // Recursively persist field types + let mut field_dsorts = Vec::new(); + let mut all_field_slids = HashMap::new(); + + for (field_name, field_type) in fields { + let (field_dsort, nested_fields) = + self.persist_derived_sort_with_fields(theory, field_type, dsort_slids)?; + field_dsorts.push((field_name.clone(), field_dsort)); + all_field_slids.extend(nested_fields); + } + + let (dsort, new_field_slids) = + self.make_product_dsort_with_fields(theory, &field_dsorts)?; + all_field_slids.extend(new_field_slids); + + Ok((dsort, all_field_slids)) + } + } + } + } + + // ================================================================ + // AXIOM PERSISTENCE + // ================================================================ + + /// Create a Binder element with the given type. + fn persist_binder( + &mut self, + name: &str, + dsort: Slid, + ) -> Result { + let binder_sort = self.sort_ids.binder.ok_or("Binder sort not found")?; + let binder_slid = self.add_element(binder_sort, &format!("binder_{}", name)); + + let type_func = self.func_ids.binder_type.ok_or("Binder/type not found")?; + self.define_func(type_func, binder_slid, dsort)?; + + Ok(binder_slid) + } + + /// Persist a Term, returning its Term Slid. + /// + /// # Arguments + /// - `theory`: The theory this term belongs to + /// - `term`: The term to persist + /// - `sig_result`: Mapping from signature indices to Slids + /// - `binders`: Mapping from variable names to their Binder Slids + pub fn persist_term( + &mut self, + theory: Slid, + term: &Term, + sig_result: &SignaturePersistResult, + binders: &HashMap, + ) -> Result { + let term_sort = self.sort_ids.term.ok_or("Term sort not found")?; + let node_sort = self.sort_ids.node.ok_or("Node sort not found")?; + + match term { + Term::Var(name, _sort) => { + // Create VarT element + let var_t_sort = self.sort_ids.var_t.ok_or("VarT sort not found")?; + let term_slid = self.add_element(term_sort, &format!("term_var_{}", name)); + let var_t_slid = self.add_element(var_t_sort, &format!("var_t_{}", name)); + + // Link VarT to Term + let term_func = self.func_ids.var_t_term.ok_or("VarT/term not found")?; + self.define_func(term_func, var_t_slid, term_slid)?; + + // Link VarT to Binder + let binder_slid = binders + .get(name) + .copied() + .ok_or_else(|| format!("Unknown variable: {}", name))?; + let binder_func = self.func_ids.var_t_binder.ok_or("VarT/binder not found")?; + self.define_func(binder_func, var_t_slid, binder_slid)?; + + // Create Node for scoping + let node_slid = self.add_element(node_sort, &format!("node_term_var_{}", name)); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::App(func_id, arg) => { + // Recursively persist argument + let arg_slid = self.persist_term(theory, arg, sig_result, binders)?; + + // Create AppT element + let app_t_sort = self.sort_ids.app_t.ok_or("AppT sort not found")?; + let term_slid = self.add_element(term_sort, "term_app"); + let app_t_slid = self.add_element(app_t_sort, "app_t"); + + // Link AppT to Term + let term_func = self.func_ids.app_t_term.ok_or("AppT/term not found")?; + self.define_func(term_func, app_t_slid, term_slid)?; + + // Link AppT to Func + let func_slid = sig_result + .func_slids + .get(func_id) + .copied() + .ok_or_else(|| format!("Unknown function id: {}", func_id))?; + let func_func = self.func_ids.app_t_func.ok_or("AppT/func not found")?; + self.define_func(func_func, app_t_slid, func_slid)?; + + // Link AppT to argument Term + let arg_func = self.func_ids.app_t_arg.ok_or("AppT/arg not found")?; + self.define_func(arg_func, app_t_slid, arg_slid)?; + + // Create Node for scoping + let node_slid = self.add_element(node_sort, "node_term_app"); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::Record(fields) => { + // Create RecordT element + let record_t_sort = self.sort_ids.record_t.ok_or("RecordT sort not found")?; + let rec_entry_sort = self.sort_ids.rec_entry.ok_or("RecEntry sort not found")?; + + let term_slid = self.add_element(term_sort, "term_record"); + let record_t_slid = self.add_element(record_t_sort, "record_t"); + + // Link RecordT to Term + let term_func = self.func_ids.record_t_term.ok_or("RecordT/term not found")?; + self.define_func(term_func, record_t_slid, term_slid)?; + + // Create RecEntry for each field + for (field_name, field_term) in fields { + let val_slid = self.persist_term(theory, field_term, sig_result, binders)?; + + let rec_entry_slid = + self.add_element(rec_entry_sort, &format!("rec_entry_{}", field_name)); + + // Link to record + let record_func = self.func_ids.rec_entry_record.ok_or("RecEntry/record not found")?; + self.define_func(record_func, rec_entry_slid, record_t_slid)?; + + // Link to value + let val_func = self.func_ids.rec_entry_val.ok_or("RecEntry/val not found")?; + self.define_func(val_func, rec_entry_slid, val_slid)?; + + // Link to field (need to look up Field Slid by name) + if let Some(&field_slid) = sig_result.field_slids.get(field_name) { + let field_func = self.func_ids.rec_entry_field.ok_or("RecEntry/field not found")?; + self.define_func(field_func, rec_entry_slid, field_slid)?; + } + // Note: field_slids may not contain all fields if they weren't persisted + // (e.g., for inline record types). This is a known limitation. + } + + // Create Node for scoping + let node_slid = self.add_element(node_sort, "node_term_record"); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + + Term::Project(base, field_name) => { + // Recursively persist base term + let base_slid = self.persist_term(theory, base, sig_result, binders)?; + + // Create ProjT element + let proj_t_sort = self.sort_ids.proj_t.ok_or("ProjT sort not found")?; + let term_slid = self.add_element(term_sort, &format!("term_proj_{}", field_name)); + let proj_t_slid = self.add_element(proj_t_sort, &format!("proj_t_{}", field_name)); + + // Link ProjT to Term + let term_func = self.func_ids.proj_t_term.ok_or("ProjT/term not found")?; + self.define_func(term_func, proj_t_slid, term_slid)?; + + // Link ProjT to base Term + let base_func = self.func_ids.proj_t_base.ok_or("ProjT/base not found")?; + self.define_func(base_func, proj_t_slid, base_slid)?; + + // Link ProjT to Field (if we can find it) + if let Some(&field_slid) = sig_result.field_slids.get(field_name) { + let field_func = self.func_ids.proj_t_field.ok_or("ProjT/field not found")?; + self.define_func(field_func, proj_t_slid, field_slid)?; + } + + // Create Node for scoping + let node_slid = self.add_element(node_sort, &format!("node_term_proj_{}", field_name)); + let term_node_func = self.func_ids.term_node.ok_or("Term/node not found")?; + self.define_func(term_node_func, term_slid, node_slid)?; + + Ok(term_slid) + } + } + } + + /// Persist a Formula, returning its Formula Slid. + /// + /// # Arguments + /// - `theory`: The theory this formula belongs to + /// - `formula`: The formula to persist + /// - `sig_result`: Mapping from signature indices to Slids + /// - `binders`: Mapping from variable names to their Binder Slids (mutable for Exists) + pub fn persist_formula( + &mut self, + theory: Slid, + formula: &Formula, + sig_result: &SignaturePersistResult, + binders: &mut HashMap, + ) -> Result { + let formula_sort = self.sort_ids.formula.ok_or("Formula sort not found")?; + let node_sort = self.sort_ids.node.ok_or("Node sort not found")?; + + match formula { + Formula::True => { + let true_f_sort = self.sort_ids.true_f.ok_or("TrueF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_true"); + let true_f_slid = self.add_element(true_f_sort, "true_f"); + + let formula_func = self.func_ids.true_f_formula.ok_or("TrueF/formula not found")?; + self.define_func(formula_func, true_f_slid, formula_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_true"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::False => { + let false_f_sort = self.sort_ids.false_f.ok_or("FalseF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_false"); + let false_f_slid = self.add_element(false_f_sort, "false_f"); + + let formula_func = self.func_ids.false_f_formula.ok_or("FalseF/formula not found")?; + self.define_func(formula_func, false_f_slid, formula_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_false"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Eq(lhs, rhs) => { + let lhs_slid = self.persist_term(theory, lhs, sig_result, binders)?; + let rhs_slid = self.persist_term(theory, rhs, sig_result, binders)?; + + let eq_f_sort = self.sort_ids.eq_f.ok_or("EqF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_eq"); + let eq_f_slid = self.add_element(eq_f_sort, "eq_f"); + + let formula_func = self.func_ids.eq_f_formula.ok_or("EqF/formula not found")?; + self.define_func(formula_func, eq_f_slid, formula_slid)?; + + let lhs_func = self.func_ids.eq_f_lhs.ok_or("EqF/lhs not found")?; + self.define_func(lhs_func, eq_f_slid, lhs_slid)?; + + let rhs_func = self.func_ids.eq_f_rhs.ok_or("EqF/rhs not found")?; + self.define_func(rhs_func, eq_f_slid, rhs_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_eq"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Rel(rel_id, arg) => { + let arg_slid = self.persist_term(theory, arg, sig_result, binders)?; + + let rel_f_sort = self.sort_ids.rel_f.ok_or("RelF sort not found")?; + let formula_slid = self.add_element(formula_sort, "formula_rel"); + let rel_f_slid = self.add_element(rel_f_sort, "rel_f"); + + let formula_func = self.func_ids.rel_f_formula.ok_or("RelF/formula not found")?; + self.define_func(formula_func, rel_f_slid, formula_slid)?; + + let arg_func = self.func_ids.rel_f_arg.ok_or("RelF/arg not found")?; + self.define_func(arg_func, rel_f_slid, arg_slid)?; + + let rel_slid = sig_result + .rel_slids + .get(rel_id) + .copied() + .ok_or_else(|| format!("Unknown relation id: {}", rel_id))?; + let rel_func = self.func_ids.rel_f_rel.ok_or("RelF/rel not found")?; + self.define_func(rel_func, rel_f_slid, rel_slid)?; + + let node_slid = self.add_element(node_sort, "node_formula_rel"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Conj(conjuncts) => { + let conj_f_sort = self.sort_ids.conj_f.ok_or("ConjF sort not found")?; + let conj_arm_sort = self.sort_ids.conj_arm.ok_or("ConjArm sort not found")?; + + let formula_slid = self.add_element(formula_sort, "formula_conj"); + let conj_f_slid = self.add_element(conj_f_sort, "conj_f"); + + let formula_func = self.func_ids.conj_f_formula.ok_or("ConjF/formula not found")?; + self.define_func(formula_func, conj_f_slid, formula_slid)?; + + // Persist each conjunct as a ConjArm + for (i, child_formula) in conjuncts.iter().enumerate() { + let child_slid = self.persist_formula(theory, child_formula, sig_result, binders)?; + + let arm_slid = self.add_element(conj_arm_sort, &format!("conj_arm_{}", i)); + + let conj_func = self.func_ids.conj_arm_conj.ok_or("ConjArm/conj not found")?; + self.define_func(conj_func, arm_slid, conj_f_slid)?; + + let child_func = self.func_ids.conj_arm_child.ok_or("ConjArm/child not found")?; + self.define_func(child_func, arm_slid, child_slid)?; + } + + let node_slid = self.add_element(node_sort, "node_formula_conj"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Disj(disjuncts) => { + let disj_f_sort = self.sort_ids.disj_f.ok_or("DisjF sort not found")?; + let disj_arm_sort = self.sort_ids.disj_arm.ok_or("DisjArm sort not found")?; + + let formula_slid = self.add_element(formula_sort, "formula_disj"); + let disj_f_slid = self.add_element(disj_f_sort, "disj_f"); + + let formula_func = self.func_ids.disj_f_formula.ok_or("DisjF/formula not found")?; + self.define_func(formula_func, disj_f_slid, formula_slid)?; + + // Persist each disjunct as a DisjArm + for (i, child_formula) in disjuncts.iter().enumerate() { + let child_slid = self.persist_formula(theory, child_formula, sig_result, binders)?; + + let arm_slid = self.add_element(disj_arm_sort, &format!("disj_arm_{}", i)); + + let disj_func = self.func_ids.disj_arm_disj.ok_or("DisjArm/disj not found")?; + self.define_func(disj_func, arm_slid, disj_f_slid)?; + + let child_func = self.func_ids.disj_arm_child.ok_or("DisjArm/child not found")?; + self.define_func(child_func, arm_slid, child_slid)?; + } + + let node_slid = self.add_element(node_sort, "node_formula_disj"); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + + Formula::Exists(var_name, var_sort, body) => { + let exists_f_sort = self.sort_ids.exists_f.ok_or("ExistsF sort not found")?; + + let formula_slid = self.add_element(formula_sort, &format!("formula_exists_{}", var_name)); + let exists_f_slid = self.add_element(exists_f_sort, &format!("exists_f_{}", var_name)); + + let formula_func = self.func_ids.exists_f_formula.ok_or("ExistsF/formula not found")?; + self.define_func(formula_func, exists_f_slid, formula_slid)?; + + // Create binder for this existential + let dsort = self.persist_derived_sort(theory, var_sort, &sig_result.dsort_slids)?; + let binder_slid = self.persist_binder(var_name, dsort)?; + + let binder_func = self.func_ids.exists_f_binder.ok_or("ExistsF/binder not found")?; + self.define_func(binder_func, exists_f_slid, binder_slid)?; + + // Extend binders for the body + let old_binder = binders.insert(var_name.clone(), binder_slid); + + // Persist body with extended binders + let body_slid = self.persist_formula(theory, body, sig_result, binders)?; + + // Restore old binder (if any) for proper scoping + if let Some(old) = old_binder { + binders.insert(var_name.clone(), old); + } else { + binders.remove(var_name); + } + + let body_func = self.func_ids.exists_f_body.ok_or("ExistsF/body not found")?; + self.define_func(body_func, exists_f_slid, body_slid)?; + + let node_slid = self.add_element(node_sort, &format!("node_formula_exists_{}", var_name)); + let formula_node_func = self.func_ids.formula_node.ok_or("Formula/node not found")?; + self.define_func(formula_node_func, formula_slid, node_slid)?; + + Ok(formula_slid) + } + } + } + + /// Persist an axiom (Sequent) to GeologMeta. + /// + /// Creates the Sequent element, context variables, premise, and conclusion. + pub fn persist_axiom( + &mut self, + theory: Slid, + axiom: &Sequent, + axiom_name: &str, + sig_result: &SignaturePersistResult, + ) -> Result { + let sequent_sort = self.sort_ids.sequent.ok_or("Sequent sort not found")?; + let ctx_var_sort = self.sort_ids.ctx_var.ok_or("CtxVar sort not found")?; + + // Create Sequent element + let sequent_slid = self.add_element_qualified( + sequent_sort, + vec![self.get_element_name(theory), axiom_name.to_string()], + ); + + // Link to theory + let theory_func = self.func_ids.sequent_theory.ok_or("Sequent/theory not found")?; + self.define_func(theory_func, sequent_slid, theory)?; + + // Create binders for context variables + let mut binders = HashMap::new(); + for (var_name, var_sort) in &axiom.context.vars { + let dsort = self.persist_derived_sort(theory, var_sort, &sig_result.dsort_slids)?; + let binder_slid = self.persist_binder(var_name, dsort)?; + binders.insert(var_name.clone(), binder_slid); + + // Create CtxVar linking sequent to binder + let ctx_var_slid = self.add_element(ctx_var_sort, &format!("ctx_var_{}", var_name)); + + let sequent_func = self.func_ids.ctx_var_sequent.ok_or("CtxVar/sequent not found")?; + self.define_func(sequent_func, ctx_var_slid, sequent_slid)?; + + let binder_func = self.func_ids.ctx_var_binder.ok_or("CtxVar/binder not found")?; + self.define_func(binder_func, ctx_var_slid, binder_slid)?; + } + + // Persist premise formula + let premise_slid = self.persist_formula(theory, &axiom.premise, sig_result, &mut binders)?; + let premise_func = self.func_ids.sequent_premise.ok_or("Sequent/premise not found")?; + self.define_func(premise_func, sequent_slid, premise_slid)?; + + // Persist conclusion formula + let conclusion_slid = self.persist_formula(theory, &axiom.conclusion, sig_result, &mut binders)?; + let conclusion_func = self.func_ids.sequent_conclusion.ok_or("Sequent/conclusion not found")?; + self.define_func(conclusion_func, sequent_slid, conclusion_slid)?; + + Ok(sequent_slid) + } + + /// Persist all axioms from a Theory to GeologMeta. + pub fn persist_axioms( + &mut self, + theory: Slid, + axioms: &[Sequent], + axiom_names: &[String], + sig_result: &SignaturePersistResult, + ) -> Result, String> { + let mut axiom_slids = Vec::new(); + for (axiom, name) in axioms.iter().zip(axiom_names.iter()) { + let slid = self.persist_axiom(theory, axiom, name, sig_result)?; + axiom_slids.push(slid); + } + Ok(axiom_slids) + } +} + +/// Result of persisting a signature to GeologMeta. +/// +/// Maps from local indices (as used in Signature) to Slids in GeologMeta. +#[derive(Debug)] +pub struct SignaturePersistResult { + /// Sort index -> Srt Slid + pub sort_slids: HashMap, + /// Sort index -> base DSort Slid for that sort + pub dsort_slids: HashMap, + /// Function index -> Func Slid + pub func_slids: HashMap, + /// Relation index -> Rel Slid + pub rel_slids: HashMap, + /// Field name -> Field Slid (for record types in domains) + pub field_slids: HashMap, +} diff --git a/src/tensor/builder.rs b/src/tensor/builder.rs new file mode 100644 index 0000000..27ef44a --- /dev/null +++ b/src/tensor/builder.rs @@ -0,0 +1,392 @@ +//! Builder helpers for tensor expressions. +//! +//! High-level operations like conjunction, existential quantification, and disjunction. + +use std::collections::{BTreeSet, HashMap}; + +use super::expr::TensorExpr; + +/// Conjunction of two tensor expressions with variable alignment. +/// +/// Given tensors T₁ and T₂ with named variables, compute their conjunction +/// by building Product + Contract to identify shared variables. +pub fn conjunction( + t1: TensorExpr, + vars1: &[String], + t2: TensorExpr, + vars2: &[String], +) -> (TensorExpr, Vec) { + // Compute combined variable list and mapping + let mut combined_vars: Vec = vars1.to_vec(); + let mut var_to_target: HashMap<&str, usize> = HashMap::new(); + + for (i, v) in vars1.iter().enumerate() { + var_to_target.insert(v, i); + } + + let mut index_map: Vec = (0..vars1.len()).collect(); + + for v in vars2 { + if let Some(&target) = var_to_target.get(v.as_str()) { + index_map.push(target); + } else { + let new_target = combined_vars.len(); + var_to_target.insert(v, new_target); + combined_vars.push(v.clone()); + index_map.push(new_target); + } + } + + let output: BTreeSet = (0..combined_vars.len()).collect(); + + let expr = TensorExpr::Contract { + inner: Box::new(TensorExpr::Product(vec![t1, t2])), + index_map, + output, + }; + + (expr, combined_vars) +} + +/// Existential quantification over a variable. +/// +/// Removes the variable by OR-ing over all its values (contraction). +pub fn exists(tensor: TensorExpr, vars: &[String], var: &str) -> (TensorExpr, Vec) { + let var_idx = vars.iter().position(|v| v == var); + + match var_idx { + None => (tensor, vars.to_vec()), + Some(idx) => { + let fresh_target = vars.len(); + let index_map: Vec = (0..vars.len()) + .map(|i| if i == idx { fresh_target } else { i }) + .collect(); + + let output: BTreeSet = (0..vars.len()).filter(|&i| i != idx).collect(); + + let result_vars: Vec = vars + .iter() + .enumerate() + .filter(|&(i, _)| i != idx) + .map(|(_, v)| v.clone()) + .collect(); + + let expr = TensorExpr::Contract { + inner: Box::new(tensor), + index_map, + output, + }; + + (expr, result_vars) + } + } +} + +/// Multi-way conjunction with variable alignment. +pub fn conjunction_all(tensors: Vec<(TensorExpr, Vec)>) -> (TensorExpr, Vec) { + if tensors.is_empty() { + return (TensorExpr::scalar(true), vec![]); + } + + let mut result = tensors.into_iter(); + let (mut acc_expr, mut acc_vars) = result.next().unwrap(); + + for (expr, vars) in result { + let (new_expr, new_vars) = conjunction(acc_expr, &acc_vars, expr, &vars); + acc_expr = new_expr; + acc_vars = new_vars; + } + + (acc_expr, acc_vars) +} + +/// Disjunction of two tensor expressions with variable alignment. +/// +/// Both tensors must have the same variables (possibly in different order). +/// The result is the pointwise OR. +pub fn disjunction( + t1: TensorExpr, + vars1: &[String], + t2: TensorExpr, + vars2: &[String], +) -> (TensorExpr, Vec) { + // Check that variables are the same set + let set1: std::collections::HashSet<_> = vars1.iter().collect(); + let set2: std::collections::HashSet<_> = vars2.iter().collect(); + + if set1 != set2 { + // Variables don't match - this should have been handled at compile_formula level + // by extending tensors with full-domain products for missing variables. + // If we get here, something went wrong. + let only_in_1: Vec<_> = set1.difference(&set2).collect(); + let only_in_2: Vec<_> = set2.difference(&set1).collect(); + panic!( + "disjunction received mismatched variables (should have been aligned).\n\ + Left has: {:?}, Right has: {:?}\n\ + Only in left: {:?}, Only in right: {:?}", + vars1, vars2, only_in_1, only_in_2 + ); + } + + // If vars2 is in different order than vars1, reorder t2 via Contract + if vars1 == vars2 { + // Same order, just union + (TensorExpr::Sum(vec![t1, t2]), vars1.to_vec()) + } else { + // Need to reorder t2 to match vars1 ordering + // Build index_map from vars2 positions to vars1 positions + let index_map: Vec = vars2 + .iter() + .map(|v| vars1.iter().position(|v1| v1 == v).unwrap()) + .collect(); + let output: BTreeSet = (0..vars1.len()).collect(); + + let t2_reordered = TensorExpr::Contract { + inner: Box::new(t2), + index_map, + output, + }; + + (TensorExpr::Sum(vec![t1, t2_reordered]), vars1.to_vec()) + } +} + +/// Multi-way disjunction with variable alignment. +/// +/// All tensors must have the same variables. +pub fn disjunction_all(tensors: Vec<(TensorExpr, Vec)>) -> (TensorExpr, Vec) { + if tensors.is_empty() { + return (TensorExpr::scalar(false), vec![]); + } + + let mut result = tensors.into_iter(); + let (mut acc_expr, mut acc_vars) = result.next().unwrap(); + + for (expr, vars) in result { + let (new_expr, new_vars) = disjunction(acc_expr, &acc_vars, expr, &vars); + acc_expr = new_expr; + acc_vars = new_vars; + } + + (acc_expr, acc_vars) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tensor::sparse::SparseTensor; + + fn leaf(t: SparseTensor) -> TensorExpr { + TensorExpr::leaf(t) + } + + #[test] + fn test_conjunction() { + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 1]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 0]); + s.insert(vec![1, 1]); + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (expr, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y", "z"]); + assert_eq!(result.dims, vec![2, 2, 2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1, 0])); + assert!(result.contains(&[0, 1, 1])); + } + + #[test] + fn test_exists() { + let mut t = SparseTensor::empty(vec![2, 2]); + t.insert(vec![0, 0]); + t.insert(vec![0, 1]); + t.insert(vec![1, 1]); + + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(leaf(t), &vars, "y"); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + } + + #[test] + fn test_relational_join() { + // R(x,y) ⋈ S(y,z) then ∃y + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 1]); + r.insert(vec![1, 2]); + + let mut s = SparseTensor::empty(vec![3, 3]); + s.insert(vec![0, 1]); + s.insert(vec![1, 2]); + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (conj, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + assert_eq!(vars, vec!["x", "y", "z"]); + + let (result_expr, result_vars) = exists(conj, &vars, "y"); + let result = result_expr.materialize(); + + assert_eq!(result_vars, vec!["x", "z"]); + assert!(result.contains(&[0, 2])); // path 0→1→2 + } + + #[test] + fn test_fused_join_uses_hash() { + // Large-ish tensors to verify hash join path works + let mut r = SparseTensor::empty(vec![100, 100]); + let mut s = SparseTensor::empty(vec![100, 100]); + + // Sparse data + for i in 0..50 { + r.insert(vec![i, i + 1]); + s.insert(vec![i + 1, i + 2]); + } + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "z".to_string()]; + + let (conj, vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let (result_expr, _) = exists(conj, &vars, "y"); + let result = result_expr.materialize(); + + // Should have 50 paths: 0→2, 1→3, ..., 49→51 + assert_eq!(result.len(), 50); + assert!(result.contains(&[0, 2])); + assert!(result.contains(&[49, 51])); + } + + #[test] + fn test_disjunction_same_vars() { + // R(x,y) ∨ S(x,y) with same variable order + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 0]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 1]); + + let vars = vec!["x".to_string(), "y".to_string()]; + + let (expr, result_vars) = disjunction(leaf(r), &vars, leaf(s), &vars); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x", "y"]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + } + + #[test] + fn test_disjunction_reordered_vars() { + // R(x,y) ∨ S(y,x) - different variable order requires reordering + let mut r = SparseTensor::empty(vec![2, 3]); + r.insert(vec![0, 1]); // x=0, y=1 + + let mut s = SparseTensor::empty(vec![3, 2]); + s.insert(vec![2, 1]); // y=2, x=1 + + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string(), "x".to_string()]; + + let (expr, result_vars) = disjunction(leaf(r), &vars_r, leaf(s), &vars_s); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x", "y"]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1])); // from R + assert!(result.contains(&[1, 2])); // from S reordered + } + + #[test] + fn test_disjunction_all() { + // R(x) ∨ S(x) ∨ T(x) + let mut r = SparseTensor::empty(vec![5]); + r.insert(vec![0]); + + let mut s = SparseTensor::empty(vec![5]); + s.insert(vec![1]); + + let mut t = SparseTensor::empty(vec![5]); + t.insert(vec![2]); + + let vars = vec!["x".to_string()]; + + let (expr, result_vars) = disjunction_all(vec![ + (leaf(r), vars.clone()), + (leaf(s), vars.clone()), + (leaf(t), vars.clone()), + ]); + let result = expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + assert_eq!(result.len(), 3); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + assert!(result.contains(&[2])); + } + + #[test] + fn test_disjunction_all_empty() { + // Empty disjunction = false + let (expr, vars) = disjunction_all(vec![]); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert!(result.is_empty()); + } + + #[test] + fn test_geometric_formula_pattern() { + // Test pattern from geometric logic: ∃y. (R(x,y) ∧ S(y)) ∨ (T(x)) + // This exercises Sum inside a more complex expression + + // R(x,y): edges 0→1, 1→2 + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 1]); + r.insert(vec![1, 2]); + + // S(y): valid y values {1, 2} + let mut s = SparseTensor::empty(vec![3]); + s.insert(vec![1]); + s.insert(vec![2]); + + // T(x): alternative x values {2} + let mut t = SparseTensor::empty(vec![3]); + t.insert(vec![2]); + + // Build: R(x,y) ∧ S(y) + let vars_r = vec!["x".to_string(), "y".to_string()]; + let vars_s = vec!["y".to_string()]; + let (conj, conj_vars) = conjunction(leaf(r), &vars_r, leaf(s), &vars_s); + // conj_vars = ["x", "y"] + + // ∃y. (R(x,y) ∧ S(y)) + let (exists_expr, exists_vars) = exists(conj, &conj_vars, "y"); + // exists_vars = ["x"] + + // (∃y. R(x,y) ∧ S(y)) ∨ T(x) + let vars_t = vec!["x".to_string()]; + let (result_expr, result_vars) = disjunction(exists_expr, &exists_vars, leaf(t), &vars_t); + + let result = result_expr.materialize(); + + assert_eq!(result_vars, vec!["x"]); + // From R ∧ S: x=0 (path 0→1, 1∈S) and x=1 (path 1→2, 2∈S) + // From T: x=2 + assert_eq!(result.len(), 3); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + assert!(result.contains(&[2])); + } +} diff --git a/src/tensor/check.rs b/src/tensor/check.rs new file mode 100644 index 0000000..cbdd5c8 --- /dev/null +++ b/src/tensor/check.rs @@ -0,0 +1,580 @@ +//! Sequent checking using tensor expressions. + +use crate::core::{Sequent, Signature, Structure}; + +use super::compile::{compile_formula, derived_sort_cardinality, CompileContext, CompileError}; +use super::sparse::DomainIterator; + +/// A violation of a sequent: a variable assignment where the premise holds but conclusion doesn't. +#[derive(Clone, Debug)] +pub struct Violation { + /// The tuple indices representing the variable assignment + pub assignment: Vec, + /// Variable names (for debugging/reporting) + pub variable_names: Vec, +} + +impl Violation { + pub fn new(assignment: Vec, variable_names: Vec) -> Self { + Self { + assignment, + variable_names, + } + } +} + +/// Result of checking a sequent +#[derive(Clone, Debug)] +pub enum CheckResult { + /// The sequent is satisfied (all assignments that satisfy the premise also satisfy the conclusion) + Satisfied, + /// The sequent is violated (some assignments satisfy the premise but not the conclusion) + Violated(Vec), +} + +impl CheckResult { + pub fn is_satisfied(&self) -> bool { + matches!(self, CheckResult::Satisfied) + } + + pub fn violations(&self) -> &[Violation] { + match self { + CheckResult::Satisfied => &[], + CheckResult::Violated(vs) => vs, + } + } +} + +/// Check if a sequent is satisfied by a structure. +/// +/// For sequent `∀ctx. premise ⊢ conclusion`: +/// - Compiles both premise and conclusion to TensorExprs +/// - Materializes both (with fusion) +/// - Checks that every tuple in premise is also in conclusion +/// +/// Returns `CheckResult::Satisfied` if the sequent holds, or `CheckResult::Violated` +/// with a list of violating assignments. +pub fn check_sequent(sequent: &Sequent, structure: &Structure, sig: &Signature) -> Result { + let ctx = CompileContext::from_context(&sequent.context); + + // Compile premise and conclusion + let (premise_expr, premise_vars) = compile_formula(&sequent.premise, &ctx, structure, sig)?; + let (conclusion_expr, conclusion_vars) = + compile_formula(&sequent.conclusion, &ctx, structure, sig)?; + + // Materialize both + let premise_tensor = premise_expr.materialize(); + let conclusion_tensor = conclusion_expr.materialize(); + + // Handle edge cases + if premise_tensor.is_empty() { + // Vacuously true: no assignments satisfy the premise + return Ok(CheckResult::Satisfied); + } + + // Handle case where conclusion is scalar true (no variables) + if conclusion_vars.is_empty() && conclusion_tensor.contains(&[]) { + // Conclusion is just "true" - always satisfied + return Ok(CheckResult::Satisfied); + } + + // Handle case where premise has no variables (scalar) but conclusion has variables + // This means premise is "true" and we need to check conclusion holds universally + if premise_vars.is_empty() && !conclusion_vars.is_empty() { + // Premise is "true", need to check if conclusion is universally true + // This means: for all values of conclusion_vars, conclusion holds + // We need to enumerate the domain from the context + + // Get the domain sizes from conclusion variable sorts + // We need to look up sorts in the context + let domain_sizes: Vec = sequent + .context + .vars + .iter() + .filter(|(name, _)| conclusion_vars.contains(name)) + .map(|(_, sort)| derived_sort_cardinality(structure, sort)) + .collect(); + + // Check that conclusion covers all tuples in the domain + let expected_count: usize = domain_sizes.iter().product(); + + if conclusion_tensor.len() == expected_count { + // All tuples covered + return Ok(CheckResult::Satisfied); + } + + // Find violations: tuples in domain not in conclusion + let mut violations = Vec::new(); + for tuple in DomainIterator::new(&domain_sizes) { + if !conclusion_tensor.contains(&tuple) { + violations.push(Violation::new(tuple, conclusion_vars.clone())); + } + } + + return if violations.is_empty() { + Ok(CheckResult::Satisfied) + } else { + Ok(CheckResult::Violated(violations)) + }; + } + + // Build mapping from premise vars to conclusion vars + // Premise might have MORE variables than conclusion (e.g., ∃y quantified out in conclusion) + // We need to project premise tuples to conclusion variables + let _projection: Vec> = premise_vars + .iter() + .map(|pv| conclusion_vars.iter().position(|cv| cv == pv)) + .collect(); + + // All conclusion vars should be present in premise vars + // (premise provides the context for checking) + for cv in &conclusion_vars { + if !premise_vars.contains(cv) { + // This shouldn't happen in well-formed sequents + panic!( + "Conclusion variable '{}' not found in premise variables {:?}", + cv, premise_vars + ); + } + } + + // Check: for every tuple in premise, the projected tuple should be in conclusion + let mut violations = Vec::new(); + + for tuple in premise_tensor.iter() { + // Project premise tuple to conclusion vars + let conclusion_tuple: Vec = conclusion_vars + .iter() + .map(|cv| { + let premise_idx = premise_vars.iter().position(|pv| pv == cv).unwrap(); + tuple[premise_idx] + }) + .collect(); + + if !conclusion_tensor.contains(&conclusion_tuple) { + violations.push(Violation::new(tuple.clone(), premise_vars.clone())); + } + } + + if violations.is_empty() { + Ok(CheckResult::Satisfied) + } else { + Ok(CheckResult::Violated(violations)) + } +} + +/// Check if a sequent is satisfied, returning just a boolean. +/// Returns false if compilation fails. +pub fn check_sequent_bool(sequent: &Sequent, structure: &Structure, sig: &Signature) -> bool { + check_sequent(sequent, structure, sig) + .map(|r| r.is_satisfied()) + .unwrap_or(false) +} + +/// Check multiple sequents (axioms of a theory) against a structure. +/// Returns a list of (sequent_index, violations) for each violated sequent. +/// +/// If tensor compilation fails (e.g., for unsupported formula patterns like +/// record terms in equality), silently skips that axiom. Forward chaining +/// can handle these axioms differently via `eval_term_to_slid`. +pub fn check_theory_axioms( + axioms: &[Sequent], + structure: &Structure, + sig: &Signature, +) -> Vec<(usize, Vec)> { + axioms + .iter() + .enumerate() + .filter_map(|(i, seq)| { + match check_sequent(seq, structure, sig) { + Ok(CheckResult::Satisfied) => None, + Ok(CheckResult::Violated(vs)) => Some((i, vs)), + Err(_) => { + // Tensor compilation failed (e.g., unsupported term in equality) + // Treat as satisfied for now - forward chaining will handle these + // axioms via a different code path (eval_term_to_slid). + None + } + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{Context, DerivedSort, Formula, Signature, Structure, Term}; + use crate::id::{NumericId, Slid}; + use crate::universe::Universe; + + /// Helper to create Slid from integer + fn slid(n: usize) -> Slid { + Slid::from_usize(n) + } + + /// Helper to create a test structure with a single sort and some elements + fn make_test_structure_with_relation() -> (Structure, Signature) { + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add a binary relation: edge(from: Node, to: Node) + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); // 1 sort + + // Add 3 nodes (Slids 0, 1, 2) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[2]); // One binary relation + + // Add edges: 0→1, 1→2 + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + + (structure, sig) + } + + #[test] + fn test_check_sequent_reflexivity() { + // Axiom: ∀x:Node. true ⊢ edge(x,x) -- reflexivity + // This should FAIL because our graph doesn't have self-loops + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + + let premise = Formula::True; + let conclusion = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let sequent = Sequent { + context: ctx, + premise, + conclusion, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Should be violated for all 3 nodes (no self-loops) + assert!(!result.is_satisfied()); + assert_eq!(result.violations().len(), 3); + } + + #[test] + fn test_check_sequent_edge_implies_edge() { + // Axiom: ∀x,y:Node. edge(x,y) ⊢ edge(x,y) -- tautology + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let sequent = Sequent { + context: ctx, + premise: edge_xy.clone(), + conclusion: edge_xy, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_sequent_transitivity() { + // Axiom: ∀x,y,z:Node. edge(x,y) ∧ edge(y,z) ⊢ edge(x,z) -- transitivity + // This should FAIL because we have 0→1→2 but not 0→2 + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_xz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let premise = Formula::Conj(vec![edge_xy, edge_yz]); + + let sequent = Sequent { + context: ctx, + premise, + conclusion: edge_xz, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Should be violated: (0,1,2) satisfies premise but 0→2 is not an edge + assert!(!result.is_satisfied()); + assert_eq!(result.violations().len(), 1); + assert_eq!(result.violations()[0].assignment, vec![0, 1, 2]); + } + + #[test] + fn test_check_sequent_vacuously_true() { + // Axiom: ∀x,y:Node. false ⊢ edge(x,y) -- vacuously true + let (structure, sig) = make_test_structure_with_relation(); + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let sequent = Sequent { + context: ctx, + premise: Formula::False, + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ), + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_sequent_with_closure() { + // Add transitive closure edges to make transitivity hold + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + structure.init_relations(&[2]); + + // Add edges: 0→1, 1→2, AND 0→2 (transitive closure) + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + structure.assert_relation(0, vec![slid(0), slid(2)]); // Closure! + + let ctx = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_xz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let premise = Formula::Conj(vec![edge_xy, edge_yz]); + + let sequent = Sequent { + context: ctx, + premise, + conclusion: edge_xz, + }; + + let result = check_sequent(&sequent, &structure, &sig).unwrap(); + + // Now should be satisfied because we have 0→2 + assert!(result.is_satisfied()); + } + + #[test] + fn test_check_theory_axioms() { + let (structure, sig) = make_test_structure_with_relation(); + + // Two axioms: one true, one false + let ctx1 = Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }; + + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + // Axiom 1: edge(x,y) ⊢ edge(x,y) -- tautology (satisfied) + let axiom1 = Sequent { + context: ctx1.clone(), + premise: edge_xy.clone(), + conclusion: edge_xy.clone(), + }; + + // Axiom 2: true ⊢ edge(x,x) -- reflexivity (violated) + let ctx2 = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + let edge_xx = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let axiom2 = Sequent { + context: ctx2, + premise: Formula::True, + conclusion: edge_xx, + }; + + let violations = check_theory_axioms(&[axiom1, axiom2], &structure, &sig); + + // Only axiom 2 (index 1) should be violated + assert_eq!(violations.len(), 1); + assert_eq!(violations[0].0, 1); // Second axiom + assert_eq!(violations[0].1.len(), 3); // All 3 nodes violate reflexivity + } +} diff --git a/src/tensor/compile.rs b/src/tensor/compile.rs new file mode 100644 index 0000000..44ed75c --- /dev/null +++ b/src/tensor/compile.rs @@ -0,0 +1,1229 @@ +//! Formula compilation to tensor expressions. + +use std::collections::{BTreeSet, HashMap}; + +use crate::core::{Context, DerivedSort, Formula, RelId, Signature, Structure, Term}; +use crate::id::{NumericId, Slid}; + +use super::builder::{conjunction, conjunction_all, disjunction_all, exists}; +use super::expr::TensorExpr; +use super::sparse::SparseTensor; + +/// Error type for formula/term compilation +#[derive(Debug, Clone)] +pub enum CompileError { + /// Product sort in variable term (not yet supported) + ProductSortInVariable, + /// Function with product domain (not yet supported) + ProductDomainFunction(String), + /// Function with product codomain (not yet supported) + ProductCodomainFunction(String), + /// Record term in equality (not yet supported) + RecordInEquality, + /// Projection term in equality (not yet supported) + ProjectionInEquality, + /// Variable not found in context + UnboundVariable(String), +} + +impl std::fmt::Display for CompileError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CompileError::ProductSortInVariable => { + write!(f, "product sort in variable term not yet supported") + } + CompileError::ProductDomainFunction(name) => { + write!(f, "function '{}' has product domain (not yet supported)", name) + } + CompileError::ProductCodomainFunction(name) => { + write!(f, "function '{}' has product codomain (not yet supported)", name) + } + CompileError::RecordInEquality => { + write!(f, "record terms in equality not yet supported") + } + CompileError::ProjectionInEquality => { + write!(f, "projection terms in equality not yet supported") + } + CompileError::UnboundVariable(name) => { + write!(f, "variable '{}' not found in context", name) + } + } + } +} + +impl std::error::Error for CompileError {} + +/// Context for formula compilation, tracking variable names and their dimensions. +#[derive(Clone, Debug)] +pub struct CompileContext { + /// Variable names in order (these become tensor dimensions) + pub vars: Vec, + /// Variable sorts (for looking up cardinalities) + pub sorts: Vec, +} + +impl CompileContext { + pub fn new() -> Self { + Self { + vars: vec![], + sorts: vec![], + } + } + + pub fn from_context(ctx: &Context) -> Self { + Self { + vars: ctx.vars.iter().map(|(n, _)| n.clone()).collect(), + sorts: ctx.vars.iter().map(|(_, s)| s.clone()).collect(), + } + } + + pub fn lookup(&self, name: &str) -> Option { + self.vars.iter().position(|n| n == name) + } + + pub fn add(&mut self, name: String, sort: DerivedSort) { + self.vars.push(name); + self.sorts.push(sort); + } +} + +impl Default for CompileContext { + fn default() -> Self { + Self::new() + } +} + +/// Get the cardinality of a base sort in the structure. +pub fn sort_cardinality(structure: &Structure, sort_id: usize) -> usize { + structure.carriers[sort_id].len() as usize +} + +/// Get the cardinality of a derived sort. +pub fn derived_sort_cardinality(structure: &Structure, sort: &DerivedSort) -> usize { + match sort { + DerivedSort::Base(sort_id) => sort_cardinality(structure, *sort_id), + DerivedSort::Product(fields) => { + // Product cardinality is the product of field cardinalities + fields + .iter() + .map(|(_, s)| derived_sort_cardinality(structure, s)) + .product() + } + } +} + +/// Build a Slid-to-index map for a sort's carrier. +/// Returns a map from Slid to its position within the carrier. +pub fn build_carrier_index(structure: &Structure, sort_id: usize) -> HashMap { + structure.carriers[sort_id] + .iter() + .enumerate() + .map(|(idx, slid_u64)| (Slid::from_usize(slid_u64 as usize), idx)) + .collect() +} + +/// Convert a function's graph (extent) to a SparseTensor. +/// +/// For function f : A → B, builds a 2D tensor where (i, j) is present +/// iff f(a_i) = b_j (where a_i is the i-th element of A, b_j is j-th of B). +pub fn function_to_tensor( + structure: &Structure, + func_id: usize, + domain_sort_id: usize, + codomain_sort_id: usize, +) -> SparseTensor { + use crate::id::{NumericId, Slid}; + use std::collections::BTreeSet; + + let domain_carrier = &structure.carriers[domain_sort_id]; + let codomain_carrier = &structure.carriers[codomain_sort_id]; + + let domain_size = domain_carrier.len() as usize; + let codomain_size = codomain_carrier.len() as usize; + + // Build reverse index for codomain (Slid -> position) + let codomain_index: HashMap = codomain_carrier + .iter() + .enumerate() + .map(|(idx, slid_u64)| (Slid::from_usize(slid_u64 as usize), idx)) + .collect(); + + // Iterate over function's extent + let mut extent = BTreeSet::new(); + for (domain_idx, domain_slid_u64) in domain_carrier.iter().enumerate() { + let domain_slid = Slid::from_usize(domain_slid_u64 as usize); + let sort_slid = structure.sort_local_id(domain_slid); + + if let Some(codomain_slid) = structure.get_function(func_id, sort_slid) + && let Some(&codomain_idx) = codomain_index.get(&codomain_slid) { + extent.insert(vec![domain_idx, codomain_idx]); + } + } + + SparseTensor { + dims: vec![domain_size, codomain_size], + extent, + } +} + +/// Convert a VecRelation to a SparseTensor. +/// +/// The relation has tuples of Slids; we convert to indices using carrier maps. +/// `column_sorts` specifies the sort of each column for looking up carriers. +pub fn relation_to_tensor( + structure: &Structure, + rel_id: RelId, + column_sorts: &[usize], // SortId for each column +) -> SparseTensor { + let relation = &structure.relations[rel_id]; + + // Build carrier index maps for each column + let carrier_indices: Vec> = column_sorts + .iter() + .map(|&sort_id| build_carrier_index(structure, sort_id)) + .collect(); + + // Build dimensions from carrier sizes + let dims: Vec = column_sorts + .iter() + .map(|&sort_id| structure.carriers[sort_id].len() as usize) + .collect(); + + // Convert tuples + let mut extent = std::collections::BTreeSet::new(); + for tuple in relation.iter() { + let indices: Option> = tuple + .iter() + .zip(&carrier_indices) + .map(|(&slid, index_map)| index_map.get(&slid).copied()) + .collect(); + + if let Some(idx_tuple) = indices { + extent.insert(idx_tuple); + } + // Skip tuples with elements not in carriers (shouldn't happen in valid data) + } + + SparseTensor { dims, extent } +} + +/// Extract variable names from a term pattern. +/// Returns pairs of (field_position, variable_name). +fn extract_term_vars(term: &Term) -> Vec<(usize, String, DerivedSort)> { + match term { + Term::Var(name, sort) => vec![(0, name.clone(), sort.clone())], + Term::Record(fields) => fields + .iter() + .enumerate() + .flat_map(|(i, (_, t))| { + extract_term_vars(t) + .into_iter() + .map(move |(_, name, sort)| (i, name, sort)) + }) + .collect(), + // For function applications and projections, we'd need more work + Term::App(_, _) | Term::Project(_, _) => { + // These are more complex - for now, treat as opaque + vec![] + } + } +} + +/// Check if a term contains any function applications +fn term_has_func_app(term: &Term) -> bool { + match term { + Term::Var(_, _) => false, + Term::App(_, _) => true, + Term::Project(base, _) => term_has_func_app(base), + Term::Record(fields) => fields.iter().any(|(_, t)| term_has_func_app(t)), + } +} + +/// Compile a simple relation formula (no function applications in term) +fn compile_rel_simple( + rel_id: RelId, + term: &Term, + structure: &Structure, + sig: &Signature, +) -> (TensorExpr, Vec) { + let vars_info = extract_term_vars(term); + let column_sorts = relation_column_sorts(sig, rel_id); + + // Build the tensor from the relation + let tensor = relation_to_tensor(structure, rel_id, &column_sorts); + + // Build variable list (ordered by column position) + let mut var_info_sorted = vars_info.clone(); + var_info_sorted.sort_by_key(|(pos, _, _)| *pos); + + // Check for repeated variables (same variable in multiple columns) + // e.g., edge(x, x) should produce a diagonal tensor + let mut seen_vars: HashMap = HashMap::new(); + let mut unique_vars: Vec = Vec::new(); + let mut index_map: Vec = Vec::new(); + + for (_, name, _) in &var_info_sorted { + if let Some(&existing_idx) = seen_vars.get(name) { + // Repeated variable: map to same target + index_map.push(existing_idx); + } else { + // New variable + let new_idx = unique_vars.len(); + seen_vars.insert(name.clone(), new_idx); + unique_vars.push(name.clone()); + index_map.push(new_idx); + } + } + + // If all variables are unique, no contraction needed + if unique_vars.len() == var_info_sorted.len() { + (TensorExpr::leaf(tensor), unique_vars) + } else { + // Need to contract to handle repeated variables (diagonal) + let output: BTreeSet = (0..unique_vars.len()).collect(); + let expr = TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(tensor)), + index_map, + output, + }; + (expr, unique_vars) + } +} + +/// Compile a relation formula with function applications in the term +/// For `[from: e src, to: e tgt] reachable`: +/// 1. Compile each field term (e src, e tgt) using compile_term +/// 2. Join the resulting tensors +/// 3. Join with the relation tensor +/// 4. Project out the intermediate value variables +fn compile_rel_with_func_apps( + rel_id: RelId, + term: &Term, + structure: &Structure, + sig: &Signature, +) -> Result<(TensorExpr, Vec), CompileError> { + let column_sorts = relation_column_sorts(sig, rel_id); + let rel_tensor = relation_to_tensor(structure, rel_id, &column_sorts); + + // Get the relation's field info (unused for now but documents the structure) + let _rel = &sig.relations[rel_id]; + + let mut fresh_counter = 0; + + // Compile each field term and collect their value variables + let field_terms: Vec<&Term> = match term { + Term::Record(fields) => fields.iter().map(|(_, t)| t).collect(), + _ => vec![term], // Single term for unary relation + }; + + // Compile all field terms + let mut all_compiled: Vec<(TensorExpr, Vec, String)> = Vec::new(); + for field_term in &field_terms { + let (expr, vars, value_var) = compile_term(field_term, structure, sig, &mut fresh_counter)?; + all_compiled.push((expr, vars, value_var)); + } + + // Join all field terms together + let mut joined_expr = all_compiled[0].0.clone(); + let mut joined_vars = all_compiled[0].1.clone(); + + for (expr, vars, _) in all_compiled.iter().skip(1) { + let (new_expr, new_vars) = conjunction(joined_expr, &joined_vars, expr.clone(), vars); + joined_expr = new_expr; + joined_vars = new_vars; + } + + // Build the relation tensor with value variables as dimensions + // The relation tensor has dimensions corresponding to the column sorts + // We need to rename the relation's dimensions to match the field value variables + let value_vars: Vec<&String> = all_compiled.iter().map(|(_, _, v)| v).collect(); + + // Build relation tensor variable names (one per column) + let rel_vars: Vec = value_vars.iter().map(|&v| v.clone()).collect(); + + // Join with relation tensor + let (result_expr, result_vars) = + conjunction(joined_expr, &joined_vars, TensorExpr::leaf(rel_tensor), &rel_vars); + + // Project out the value variables (they're internal) + let mut final_expr = result_expr; + let mut final_vars = result_vars; + for value_var in &value_vars { + let (new_expr, new_vars) = exists(final_expr, &final_vars, value_var); + final_expr = new_expr; + final_vars = new_vars; + } + + Ok((final_expr, final_vars)) +} + +/// Get the base sort IDs from a relation's domain. +fn relation_column_sorts(sig: &Signature, rel_id: RelId) -> Vec { + let rel_sym = &sig.relations[rel_id]; + match &rel_sym.domain { + DerivedSort::Base(sort_id) => vec![*sort_id], + DerivedSort::Product(fields) => fields + .iter() + .filter_map(|(_, sort)| { + if let DerivedSort::Base(sort_id) = sort { + Some(*sort_id) + } else { + None // Nested products not supported yet + } + }) + .collect(), + } +} + +/// Compile a term to a tensor expression. +/// +/// Returns (expr, vars, value_var) where: +/// - expr is a tensor over vars (including value_var) +/// - vars are all free variables in alphabetical order +/// - value_var is the internal name for the term's value dimension +/// +/// The tensor represents: for each assignment to free variables, +/// what is the value of the term? +fn compile_term( + term: &Term, + structure: &Structure, + sig: &Signature, + fresh_counter: &mut usize, +) -> Result<(TensorExpr, Vec, String), CompileError> { + match term { + Term::Var(name, sort) => { + // Variable x evaluates to itself + // Tensor is identity: (x, value) where value = x + // This is the diagonal tensor + let DerivedSort::Base(sort_id) = sort else { + return Err(CompileError::ProductSortInVariable); + }; + let size = structure.carriers[*sort_id].len() as usize; + + // Create diagonal tensor: extent = {(i, i) | i < size} + let extent: BTreeSet> = (0..size).map(|i| vec![i, i]).collect(); + let tensor = SparseTensor { + dims: vec![size, size], + extent, + }; + + // Value variable is the same as the input variable + // Actually we need a fresh name to track the "output" dimension + let value_var = format!("_val{}", *fresh_counter); + *fresh_counter += 1; + + // The tensor has dimensions [name, value_var] + // We need them in alphabetical order + let vars = if name < &value_var { + vec![name.clone(), value_var.clone()] + } else { + vec![value_var.clone(), name.clone()] + }; + + let expr = if name < &value_var { + TensorExpr::leaf(tensor) + } else { + // Need to transpose + TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(tensor)), + index_map: vec![1, 0], + output: (0..2).collect(), + } + }; + + Ok((expr, vars, value_var)) + } + + Term::App(func_id, arg) => { + // f(arg): first compile arg, then apply function + let (arg_expr, arg_vars, arg_value_var) = + compile_term(arg.as_ref(), structure, sig, fresh_counter)?; + + // Get function info + let func_sym = &sig.functions[*func_id]; + let DerivedSort::Base(domain_sort_id) = &func_sym.domain else { + return Err(CompileError::ProductDomainFunction(func_sym.name.clone())); + }; + let DerivedSort::Base(codomain_sort_id) = &func_sym.codomain else { + return Err(CompileError::ProductCodomainFunction(func_sym.name.clone())); + }; + + // Build function tensor: (domain, codomain) pairs + let func_tensor = function_to_tensor(structure, *func_id, *domain_sort_id, *codomain_sort_id); + + // Fresh variable for output + let result_var = format!("_val{}", *fresh_counter); + *fresh_counter += 1; + + // Function tensor has vars [arg_value_var, result_var] (we need to match arg's value) + let func_vars = if arg_value_var < result_var { + vec![arg_value_var.clone(), result_var.clone()] + } else { + vec![result_var.clone(), arg_value_var.clone()] + }; + + let func_expr = if arg_value_var < result_var { + TensorExpr::leaf(func_tensor) + } else { + TensorExpr::Contract { + inner: Box::new(TensorExpr::leaf(func_tensor)), + index_map: vec![1, 0], + output: (0..2).collect(), + } + }; + + // Join arg_expr and func_expr on arg_value_var + let (joined_expr, joined_vars) = conjunction(arg_expr, &arg_vars, func_expr, &func_vars); + + // Existentially quantify out arg_value_var (the intermediate value) + let (result_expr, result_vars) = exists(joined_expr, &joined_vars, &arg_value_var); + + Ok((result_expr, result_vars, result_var)) + } + + Term::Record(_) => { + Err(CompileError::RecordInEquality) + } + + Term::Project(_, _) => { + Err(CompileError::ProjectionInEquality) + } + } +} + +/// Compile a formula to a tensor expression. +/// +/// Returns the expression and the list of free variables in order. +pub fn compile_formula( + formula: &Formula, + _ctx: &CompileContext, + structure: &Structure, + sig: &Signature, +) -> Result<(TensorExpr, Vec), CompileError> { + match formula { + Formula::True => Ok((TensorExpr::scalar(true), vec![])), + + Formula::False => Ok((TensorExpr::scalar(false), vec![])), + + Formula::Rel(rel_id, term) => { + // Check if term contains function applications + if term_has_func_app(term) { + // Use compile_term for each field, then join with relation + compile_rel_with_func_apps(*rel_id, term, structure, sig) + } else { + // Simple case: direct variable binding + Ok(compile_rel_simple(*rel_id, term, structure, sig)) + } + } + + Formula::Conj(formulas) => { + if formulas.is_empty() { + return Ok((TensorExpr::scalar(true), vec![])); + } + + let compiled: Result)>, CompileError> = formulas + .iter() + .map(|f| compile_formula(f, _ctx, structure, sig)) + .collect(); + + Ok(conjunction_all(compiled?)) + } + + Formula::Disj(formulas) => { + if formulas.is_empty() { + return Ok((TensorExpr::scalar(false), vec![])); + } + + let mut compiled: Vec<(TensorExpr, Vec)> = formulas + .iter() + .map(|f| compile_formula(f, _ctx, structure, sig)) + .collect::, _>>()?; + + // Collect all variables across all disjuncts + let all_vars: std::collections::HashSet<&String> = compiled + .iter() + .flat_map(|(_, vars)| vars.iter()) + .collect(); + + // If all disjuncts have the same variables, we're good + let need_extension = compiled.iter().any(|(_, vars)| { + let var_set: std::collections::HashSet<_> = vars.iter().collect(); + var_set != all_vars + }); + + if need_extension { + // Build a canonical variable ordering + let all_vars_vec: Vec = { + let mut v: Vec<_> = all_vars.iter().cloned().cloned().collect(); + v.sort(); // Canonical ordering + v + }; + + // Extend each disjunct with missing variables + for (expr, vars) in &mut compiled { + let var_set: std::collections::HashSet<_> = vars.iter().collect(); + let missing: Vec<_> = all_vars_vec + .iter() + .filter(|v| !var_set.contains(*v)) + .collect(); + + if !missing.is_empty() { + // Create full-domain tensors for missing variables and take product + let mut full_domain_tensors = Vec::new(); + let mut new_vars = vars.clone(); + + for var in missing { + // Look up the variable's sort in the context + if let Some(idx) = _ctx.vars.iter().position(|v| v == var) { + let sort = &_ctx.sorts[idx]; + let card = derived_sort_cardinality(structure, sort); + + // Create a 1D tensor with all values [0..card) + let mut extent = BTreeSet::new(); + for i in 0..card { + extent.insert(vec![i]); + } + let full_tensor = SparseTensor { + dims: vec![card], + extent, + }; + full_domain_tensors.push(TensorExpr::leaf(full_tensor)); + new_vars.push(var.clone()); + } else { + // Variable not in context - return error + return Err(CompileError::UnboundVariable(var.clone())); + } + } + + // Take product: original × full_domain_1 × full_domain_2 × ... + if !full_domain_tensors.is_empty() { + let mut product_parts = vec![std::mem::replace( + expr, + TensorExpr::scalar(false), + )]; + product_parts.extend(full_domain_tensors); + *expr = TensorExpr::Product(product_parts); + *vars = new_vars; + } + } + } + } + + Ok(disjunction_all(compiled)) + } + + Formula::Exists(var_name, sort, inner) => { + // Compile inner formula + let (inner_expr, inner_vars) = compile_formula(inner, _ctx, structure, sig)?; + + // Check if the quantified variable appears in the inner formula + if !inner_vars.contains(var_name) { + // The variable doesn't appear free in the inner formula. + // For example: ∃x. True or ∃x. (y = y) + // + // In this case, the existential is: + // - FALSE if the domain is empty (no witness exists) + // - Equal to the inner formula otherwise (witness exists vacuously) + let domain_card = derived_sort_cardinality(structure, sort); + if domain_card == 0 { + // Empty domain: existential is false + return Ok((TensorExpr::scalar(false), inner_vars)); + } + // Non-empty domain: the existential is equivalent to the inner formula + return Ok((inner_expr, inner_vars)); + } + + // Apply existential (sum over the variable) + Ok(exists(inner_expr, &inner_vars, var_name)) + } + + Formula::Eq(t1, t2) => { + // Handle equality using recursive term compilation + // This supports arbitrary term expressions including nested function applications + // + // Strategy: compile both terms to tensors, join on value dimensions, + // then project out the internal value variables + + // Special case: x = x is trivially true + if let (Term::Var(name1, _), Term::Var(name2, _)) = (t1, t2) + && name1 == name2 { + return Ok((TensorExpr::scalar(true), vec![])); + } + + let mut fresh_counter = 0; + + // Compile both terms + let (expr1, vars1, val1) = compile_term(t1, structure, sig, &mut fresh_counter)?; + let (expr2, vars2, val2) = compile_term(t2, structure, sig, &mut fresh_counter)?; + + // t1 = t2 means their values are equal + // We need to: + // 1. Join expr1 and expr2 on their value dimensions (val1 = val2) + // 2. Project out the value dimensions + + // First, rename val2 to val1 in vars2 so they join on the same variable + let vars2_renamed: Vec = vars2 + .iter() + .map(|v| if v == &val2 { val1.clone() } else { v.clone() }) + .collect(); + + // Rename val2 to val1 in expr2 by reordering dimensions + // The vars are sorted alphabetically, so we need to figure out where val2 was + // and where val1 should go + let val2_pos = vars2.iter().position(|v| v == &val2).unwrap(); + + // Where should val1 go in the sorted vars2_renamed? + let mut sorted_vars2: Vec = vars2_renamed.clone(); + sorted_vars2.sort(); + let val1_pos_in_sorted = sorted_vars2.iter().position(|v| v == &val1).unwrap(); + + // Build index map for reordering + let expr2_reordered = if val2_pos != val1_pos_in_sorted { + // Need to reorder dimensions + let mut index_map: Vec = (0..vars2.len()).collect(); + // The dimension at val2_pos needs to go to val1_pos_in_sorted + index_map.remove(val2_pos); + index_map.insert(val1_pos_in_sorted, val2_pos); + + // Actually, we need the inverse mapping for Contract + let mut inverse_map = vec![0; vars2.len()]; + for (new_pos, &old_pos) in index_map.iter().enumerate() { + inverse_map[old_pos] = new_pos; + } + + TensorExpr::Contract { + inner: Box::new(expr2), + index_map: inverse_map, + output: (0..vars2.len()).collect(), + } + } else { + expr2 + }; + + // Now join on val1 + let (joined_expr, joined_vars) = + conjunction(expr1, &vars1, expr2_reordered, &sorted_vars2); + + // Project out the internal value variable val1 + let (result_expr, result_vars) = exists(joined_expr, &joined_vars, &val1); + + Ok((result_expr, result_vars)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::DerivedSort; + use crate::id::Slid; + use crate::universe::Universe; + + /// Helper to create Slid from integer + fn slid(n: usize) -> Slid { + Slid::from_usize(n) + } + + /// Helper to create a test structure with a single sort and some elements + fn make_test_structure_with_relation() -> (Structure, Signature) { + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add a binary relation: edge(from: Node, to: Node) + sig.add_relation( + "edge".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); // 1 sort + + // Add 3 nodes (Slids 0, 1, 2) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[2]); // One binary relation + + // Add edges: 0→1, 1→2 + structure.assert_relation(0, vec![slid(0), slid(1)]); + structure.assert_relation(0, vec![slid(1), slid(2)]); + + (structure, sig) + } + + #[test] + fn test_compile_formula_true() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + let (expr, vars) = compile_formula(&Formula::True, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert_eq!(result.len(), 1); // scalar true + assert!(result.contains(&[])); + } + + #[test] + fn test_compile_formula_false() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + let (expr, vars) = compile_formula(&Formula::False, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert!(result.is_empty()); + } + + #[test] + fn test_compile_formula_relation() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: edge(x, y) + let term = Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]); + let formula = Formula::Rel(0, term); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y"]); + assert_eq!(result.dims, vec![3, 3]); // 3 nodes + assert_eq!(result.len(), 2); // 2 edges + assert!(result.contains(&[0, 1])); // 0→1 + assert!(result.contains(&[1, 2])); // 1→2 + } + + #[test] + fn test_compile_formula_conjunction() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: edge(x, y) ∧ edge(y, z) + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let formula = Formula::Conj(vec![edge_xy, edge_yz]); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "y", "z"]); + assert_eq!(result.len(), 1); // Only one 2-hop path: 0→1→2 + assert!(result.contains(&[0, 1, 2])); + } + + #[test] + fn test_compile_formula_exists() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: ∃y. edge(x, y) ∧ edge(y, z) + // This is 2-hop reachability + let edge_xy = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + let edge_yz = Formula::Rel( + 0, + Term::Record(vec![ + ( + "from".to_string(), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + ( + "to".to_string(), + Term::Var("z".to_string(), DerivedSort::Base(0)), + ), + ]), + ); + + let inner = Formula::Conj(vec![edge_xy, edge_yz]); + let formula = Formula::Exists("y".to_string(), DerivedSort::Base(0), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars, vec!["x", "z"]); + assert_eq!(result.len(), 1); // One 2-hop path: 0→2 (via 1) + assert!(result.contains(&[0, 2])); + } + + #[test] + fn test_compile_formula_equality() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: x = y (diagonal) + let formula = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert_eq!(vars.len(), 2); + assert_eq!(result.dims, vec![3, 3]); + assert_eq!(result.len(), 3); // Diagonal: (0,0), (1,1), (2,2) + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + assert!(result.contains(&[2, 2])); + } + + #[test] + fn test_compile_formula_reflexive_identity() { + let (structure, sig) = make_test_structure_with_relation(); + let ctx = CompileContext::new(); + + // Build: x = x (trivially true) + let formula = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("x".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + assert!(vars.is_empty()); + assert_eq!(result.len(), 1); // scalar true + assert!(result.contains(&[])); + } + + #[test] + fn test_compile_formula_func_app_equality() { + // Test: f(x) = y where f is a function + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add function f : Node -> Node + sig.add_function("f".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Define f: 0 -> 1, 1 -> 2, 2 -> 0 + structure.init_functions(&[Some(0)]); // f has domain sort 0 + structure.define_function(0, Slid::from_usize(0), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(1), Slid::from_usize(2)).unwrap(); + structure.define_function(0, Slid::from_usize(2), Slid::from_usize(0)).unwrap(); + + let ctx = CompileContext::new(); + + // Build: f(x) = y + let formula = Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Variables should be x and y (alphabetical order) + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // Result should have exactly 3 tuples: (0,1), (1,2), (2,0) + // representing f(0)=1, f(1)=2, f(2)=0 + // But order depends on alphabetical sort of variable names + assert_eq!(result.len(), 3); + } + + #[test] + fn test_compile_formula_two_func_apps_equality() { + // Test: f(x) = g(y) where f, g are functions + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add functions f, g : Node -> Node + sig.add_function("f".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + sig.add_function("g".to_string(), DerivedSort::Base(node_id), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Define f: 0 -> 1, 1 -> 1, 2 -> 2 + // Define g: 0 -> 0, 1 -> 1, 2 -> 2 + structure.init_functions(&[Some(0), Some(0)]); // Both have domain sort 0 + structure.define_function(0, Slid::from_usize(0), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(1), Slid::from_usize(1)).unwrap(); + structure.define_function(0, Slid::from_usize(2), Slid::from_usize(2)).unwrap(); + structure.define_function(1, Slid::from_usize(0), Slid::from_usize(0)).unwrap(); + structure.define_function(1, Slid::from_usize(1), Slid::from_usize(1)).unwrap(); + structure.define_function(1, Slid::from_usize(2), Slid::from_usize(2)).unwrap(); + + let ctx = CompileContext::new(); + + // Build: f(x) = g(y) + // f(x) = g(y) when ∃z. f(x) = z ∧ g(y) = z + // f(0)=1, f(1)=1, f(2)=2 + // g(0)=0, g(1)=1, g(2)=2 + // So f(x)=g(y) holds for: (0,1), (1,1), (2,2) since f(0)=g(1)=1, f(1)=g(1)=1, f(2)=g(2)=2 + let formula = Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::App(1, Box::new(Term::Var("y".to_string(), DerivedSort::Base(0)))), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Variables should be x and y + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // f(x) = g(y) holds for: (x=0,y=1), (x=1,y=1), (x=2,y=2) + assert_eq!(result.len(), 3); + } + + #[test] + fn test_compile_formula_exists_empty_domain() { + // When the domain is empty, ∃x. φ should be false even if φ is true + // This is the case for ∃x. x = x on an empty structure + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Empty structure (no elements) + let structure = Structure::new(1); + + let ctx = CompileContext::new(); + + // Build: ∃x. x = x + // Inner formula x = x compiles to scalar true (no variables) + // But since domain is empty, the existential should be false + let inner = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + ); + let formula = Formula::Exists("x".to_string(), DerivedSort::Base(node_id), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Should be FALSE (empty) because there's no witness in empty domain + assert!(vars.is_empty()); + assert!(result.is_empty(), "∃x. x = x should be false on empty domain"); + } + + #[test] + fn test_compile_formula_exists_nonempty_domain() { + // When the domain is non-empty, ∃x. x = x should be true + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + structure.add_element(&mut universe, node_id); // Add one element + + let ctx = CompileContext::new(); + + // Build: ∃x. x = x + let inner = Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + Term::Var("x".to_string(), DerivedSort::Base(node_id)), + ); + let formula = Formula::Exists("x".to_string(), DerivedSort::Base(node_id), Box::new(inner)); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Should be TRUE because there's a witness + assert!(vars.is_empty()); + assert!(result.contains(&[]), "∃x. x = x should be true on non-empty domain"); + } + + #[test] + fn test_compile_formula_disjunction_different_vars() { + // Test disjunction where each disjunct has different variables + // R(x) \/ S(y) - this used to panic, now should work + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + + // Add two unary relations + sig.add_relation("R".to_string(), DerivedSort::Base(node_id)); + sig.add_relation("S".to_string(), DerivedSort::Base(node_id)); + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 nodes + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + + // Initialize relations + structure.init_relations(&[1, 1]); // Two unary relations + + // R = {0}, S = {1} + structure.assert_relation(0, vec![Slid::from_usize(0)]); + structure.assert_relation(1, vec![Slid::from_usize(1)]); + + // Need context with both x and y + let ctx = CompileContext { + vars: vec!["x".to_string(), "y".to_string()], + sorts: vec![DerivedSort::Base(node_id), DerivedSort::Base(node_id)], + }; + + // Build: R(x) \/ S(y) + let r_x = Formula::Rel( + 0, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ); + let s_y = Formula::Rel( + 1, + Term::Var("y".to_string(), DerivedSort::Base(0)), + ); + + let formula = Formula::Disj(vec![r_x, s_y]); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // Result should have both x and y + assert_eq!(vars.len(), 2); + assert!(vars.contains(&"x".to_string())); + assert!(vars.contains(&"y".to_string())); + + // The result is the union of: + // - R(x) extended with all y: {(0,0), (0,1), (0,2)} + // - S(y) extended with all x: {(0,1), (1,1), (2,1)} + // Note: the tuple order depends on variable order + assert!(!result.is_empty()); + } + + #[test] + fn test_compile_formula_relation_with_func_apps() { + // Test: [from: e src, to: e tgt] edge (function applications in relation term) + // This verifies that compile_rel_with_func_apps works correctly + let mut sig = Signature::new(); + let node_id = sig.add_sort("Node".to_string()); + let edge_id = sig.add_sort("Edge".to_string()); + + // Add functions src, tgt : Edge -> Node + sig.add_function("src".to_string(), DerivedSort::Base(edge_id), DerivedSort::Base(node_id)); + sig.add_function("tgt".to_string(), DerivedSort::Base(edge_id), DerivedSort::Base(node_id)); + + // Add binary relation: reachable(from: Node, to: Node) + sig.add_relation( + "reachable".to_string(), + DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(node_id)), + ("to".to_string(), DerivedSort::Base(node_id)), + ]), + ); + + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts + + // Add 3 nodes (sort 0) + for _ in 0..3 { + structure.add_element(&mut universe, node_id); + } + // Add 2 edges (sort 1) + for _ in 0..2 { + structure.add_element(&mut universe, edge_id); + } + + // Define edges: e0: 0->1, e1: 1->2 + structure.init_functions(&[Some(edge_id), Some(edge_id)]); // src, tgt have domain Edge + // e0: src=0, tgt=1 + structure.define_function(0, Slid::from_usize(3), Slid::from_usize(0)).unwrap(); // e0.src = node0 + structure.define_function(1, Slid::from_usize(3), Slid::from_usize(1)).unwrap(); // e0.tgt = node1 + // e1: src=1, tgt=2 + structure.define_function(0, Slid::from_usize(4), Slid::from_usize(1)).unwrap(); // e1.src = node1 + structure.define_function(1, Slid::from_usize(4), Slid::from_usize(2)).unwrap(); // e1.tgt = node2 + + // Reachable relation: initially {(0,1), (0,2), (1,2)} + structure.init_relations(&[2]); // One binary relation + structure.assert_relation(0, vec![Slid::from_usize(0), Slid::from_usize(1)]); // 0->1 + structure.assert_relation(0, vec![Slid::from_usize(0), Slid::from_usize(2)]); // 0->2 + structure.assert_relation(0, vec![Slid::from_usize(1), Slid::from_usize(2)]); // 1->2 + + let ctx = CompileContext::new(); + + // Build: [from: e src, to: e tgt] reachable + // This should match edges e where reachable(src(e), tgt(e)) holds + let formula = Formula::Rel( + 0, // reachable + Term::Record(vec![ + ( + "from".to_string(), + Term::App(0, Box::new(Term::Var("e".to_string(), DerivedSort::Base(edge_id)))), // e src + ), + ( + "to".to_string(), + Term::App(1, Box::new(Term::Var("e".to_string(), DerivedSort::Base(edge_id)))), // e tgt + ), + ]), + ); + + let (expr, vars) = compile_formula(&formula, &ctx, &structure, &sig).unwrap(); + let result = expr.materialize(); + + // The formula should match edges where reachable(src(e), tgt(e)) holds + // e0: src=0, tgt=1 -> reachable(0,1) holds ✓ + // e1: src=1, tgt=2 -> reachable(1,2) holds ✓ + // So both edges should match + assert_eq!(vars, vec!["e"]); + assert_eq!(result.len(), 2); // Both edges match + } +} diff --git a/src/tensor/expr.rs b/src/tensor/expr.rs new file mode 100644 index 0000000..c22f500 --- /dev/null +++ b/src/tensor/expr.rs @@ -0,0 +1,454 @@ +//! Lazy tensor expressions. + +use std::collections::{BTreeSet, HashMap}; +use std::rc::Rc; + +use super::sparse::{cartesian_product_of_extents, CartesianProductIter, SparseTensor}; + +/// A lazy tensor expression. +/// +/// Operations build up an expression tree rather than immediately computing. +/// Evaluation fuses operations to avoid materializing large intermediates. +#[derive(Clone, Debug)] +pub enum TensorExpr { + /// Materialized sparse tensor (leaf) + Leaf(Rc), + + /// Lazy tensor product (cross join) + /// Result dimensions = concatenation of input dimensions + Product(Vec), + + /// Lazy disjunction (union of extents) + /// All children must have the same dimensions. + /// Result is true wherever ANY child is true (pointwise OR). + Sum(Vec), + + /// Lazy contraction + /// Maps input indices to output indices; indices mapping to same target + /// are identified; targets not in output are summed (OR'd) over. + Contract { + inner: Box, + /// For each input index, which target index (in 0..M) + index_map: Vec, + /// Which target indices appear in output + output: BTreeSet, + }, +} + +impl TensorExpr { + /// Create a leaf from a sparse tensor + pub fn leaf(t: SparseTensor) -> Self { + TensorExpr::Leaf(Rc::new(t)) + } + + /// Create a scalar (0-dimensional) tensor expression + pub fn scalar(value: bool) -> Self { + TensorExpr::leaf(SparseTensor::scalar(value)) + } + + /// Get dimensions without materializing + pub fn dims(&self) -> Vec { + match self { + TensorExpr::Leaf(t) => t.dims.clone(), + TensorExpr::Product(exprs) => exprs.iter().flat_map(|e| e.dims()).collect(), + TensorExpr::Sum(exprs) => { + // All children should have same dims; return first or empty + exprs.first().map(|e| e.dims()).unwrap_or_default() + } + TensorExpr::Contract { + inner, + index_map, + output, + } => { + let inner_dims = inner.dims(); + // Build target -> dim mapping + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(inner_dims[i]); + } + // Output dims in order + let max_target = index_map.iter().copied().max().unwrap_or(0); + (0..=max_target) + .filter(|t| output.contains(t)) + .map(|t| *target_dims.get(&t).unwrap_or(&1)) + .collect() + } + } + } + + /// Arity (number of dimensions) + pub fn arity(&self) -> usize { + self.dims().len() + } + + /// Materialize the tensor expression into a sparse tensor. + /// + /// This is where fusion happens: Contract(Product(...)) is evaluated + /// without materializing the intermediate product. + pub fn materialize(&self) -> SparseTensor { + match self { + TensorExpr::Leaf(t) => (**t).clone(), + + TensorExpr::Product(exprs) => { + if exprs.is_empty() { + return SparseTensor::scalar(true); + } + // Materialize children and compute Cartesian product + let materialized: Vec = + exprs.iter().map(|e| e.materialize()).collect(); + let dims: Vec = materialized + .iter() + .flat_map(|t| t.dims.iter().copied()) + .collect(); + let extent = cartesian_product_of_extents(&materialized); + SparseTensor { dims, extent } + } + + TensorExpr::Sum(exprs) => { + if exprs.is_empty() { + // Empty disjunction = false = empty tensor with unknown dims + return SparseTensor::scalar(false); + } + // Union of extents (pointwise OR) + let first = exprs[0].materialize(); + let dims = first.dims.clone(); + let mut extent = first.extent; + + for expr in &exprs[1..] { + let child = expr.materialize(); + debug_assert_eq!(child.dims, dims, "Sum children must have same dimensions"); + extent.extend(child.extent); + } + + SparseTensor { dims, extent } + } + + TensorExpr::Contract { + inner, + index_map, + output, + } => { + // Check for fusion opportunity: Contract(Product(...)) + if let TensorExpr::Product(children) = inner.as_ref() { + return self.fused_join(children, index_map, output); + } + + // Fusion: Contract(Sum(...)) distributes + // Contract(Sum(a, b)) = Sum(Contract(a), Contract(b)) + if let TensorExpr::Sum(children) = inner.as_ref() { + let contracted_children: Vec = children + .iter() + .map(|child| TensorExpr::Contract { + inner: Box::new(child.clone()), + index_map: index_map.clone(), + output: output.clone(), + }) + .collect(); + return TensorExpr::Sum(contracted_children).materialize(); + } + + // Otherwise, materialize inner and contract + let inner_tensor = inner.materialize(); + contract_sparse(&inner_tensor, index_map, output) + } + } + } + + /// Fused evaluation of Contract(Product([...])). + /// Avoids materializing the full Cartesian product. + fn fused_join( + &self, + children: &[TensorExpr], + index_map: &[usize], + output: &BTreeSet, + ) -> SparseTensor { + if children.is_empty() { + let inner_result = SparseTensor::scalar(true); + return contract_sparse(&inner_result, index_map, output); + } + + // Materialize children + let materialized: Vec = children.iter().map(|e| e.materialize()).collect(); + + // Compute dimension offsets for each child + let mut offsets = vec![0usize]; + for t in &materialized { + offsets.push(offsets.last().unwrap() + t.arity()); + } + + // Figure out which target indices come from which children + // and which input indices map to each target + let max_target = index_map.iter().copied().max().unwrap_or(0); + let mut target_to_inputs: HashMap> = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_to_inputs.entry(target).or_default().push(i); + } + + // Build output dimensions + let inner_dims: Vec = materialized + .iter() + .flat_map(|t| t.dims.iter().copied()) + .collect(); + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(inner_dims[i]); + } + let output_targets: Vec = (0..=max_target).filter(|t| output.contains(t)).collect(); + let output_dims: Vec = output_targets + .iter() + .map(|t| *target_dims.get(t).unwrap_or(&1)) + .collect(); + + // Use hash join for 2-way, nested loops otherwise + // (Future: Leapfrog Triejoin for multi-way) + let mut result_extent: BTreeSet> = BTreeSet::new(); + + if materialized.len() == 2 { + // Hash join + let (t1, t2) = (&materialized[0], &materialized[1]); + let offset2 = offsets[1]; + + // Find join keys: target indices that have inputs from both t1 and t2 + let t1_range = 0..t1.arity(); + let t2_range = offset2..(offset2 + t2.arity()); + + let mut join_targets: Vec = Vec::new(); + let mut t1_key_indices: Vec = Vec::new(); + let mut t2_key_indices: Vec = Vec::new(); + + for (&target, inputs) in &target_to_inputs { + let from_t1: Vec<_> = inputs.iter().filter(|&&i| t1_range.contains(&i)).collect(); + let from_t2: Vec<_> = inputs.iter().filter(|&&i| t2_range.contains(&i)).collect(); + if !from_t1.is_empty() && !from_t2.is_empty() { + join_targets.push(target); + t1_key_indices.push(*from_t1[0]); // First input from t1 + t2_key_indices.push(*from_t2[0] - offset2); // First input from t2 (local index) + } + } + + // Build hash table on t1 + let mut hash_table: HashMap, Vec<&Vec>> = HashMap::new(); + for tuple in t1.iter() { + let key: Vec = t1_key_indices.iter().map(|&i| tuple[i]).collect(); + hash_table.entry(key).or_default().push(tuple); + } + + // Probe with t2 + for tuple2 in t2.iter() { + let key: Vec = t2_key_indices.iter().map(|&i| tuple2[i]).collect(); + if let Some(matches) = hash_table.get(&key) { + for tuple1 in matches { + // Combine and check full consistency + let combined: Vec = + tuple1.iter().chain(tuple2.iter()).copied().collect(); + if let Some(out_tuple) = try_project(&combined, index_map, &output_targets) + { + result_extent.insert(out_tuple); + } + } + } + } + } else { + // Nested loops for other cases + for combo in CartesianProductIter::new(&materialized) { + if let Some(out_tuple) = try_project(&combo, index_map, &output_targets) { + result_extent.insert(out_tuple); + } + } + } + + SparseTensor { + dims: output_dims, + extent: result_extent, + } + } + + /// Iterate over result tuples without full materialization. + /// (For now, just materializes; future: streaming evaluation) + pub fn iter(&self) -> impl Iterator> { + self.materialize().extent.into_iter() + } + + /// Check if result is empty (may short-circuit) + pub fn is_empty(&self) -> bool { + // Future: smarter emptiness checking + self.materialize().is_empty() + } + + /// Check if result contains a specific tuple + pub fn contains(&self, tuple: &[usize]) -> bool { + // Future: smarter containment checking + self.materialize().contains(tuple) + } +} + +// ============================================================================ +// INTERNAL HELPERS +// ============================================================================ + +/// Contract a materialized sparse tensor. +fn contract_sparse( + tensor: &SparseTensor, + index_map: &[usize], + output: &BTreeSet, +) -> SparseTensor { + let max_target = index_map.iter().copied().max().unwrap_or(0); + let mut target_dims: HashMap = HashMap::new(); + for (i, &target) in index_map.iter().enumerate() { + target_dims.entry(target).or_insert(tensor.dims[i]); + } + + let output_targets: Vec = (0..=max_target).filter(|t| output.contains(t)).collect(); + let output_dims: Vec = output_targets + .iter() + .map(|t| *target_dims.get(t).unwrap_or(&1)) + .collect(); + + let mut extent: BTreeSet> = BTreeSet::new(); + + for input_tuple in tensor.iter() { + if let Some(out_tuple) = try_project(input_tuple, index_map, &output_targets) { + extent.insert(out_tuple); + } + } + + SparseTensor { + dims: output_dims, + extent, + } +} + +/// Try to project a combined tuple to output indices. +/// Returns None if identified indices don't match. +fn try_project( + combined: &[usize], + index_map: &[usize], + output_targets: &[usize], +) -> Option> { + let mut target_values: HashMap = HashMap::new(); + + for (i, &val) in combined.iter().enumerate() { + let target = index_map[i]; + if let Some(&existing) = target_values.get(&target) { + if existing != val { + return None; // Inconsistent + } + } else { + target_values.insert(target, val); + } + } + + Some( + output_targets + .iter() + .map(|t| *target_values.get(t).unwrap_or(&0)) + .collect(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn leaf(t: SparseTensor) -> TensorExpr { + TensorExpr::leaf(t) + } + + #[test] + fn test_product_simple() { + let mut r = SparseTensor::empty(vec![3]); + r.insert(vec![0]); + r.insert(vec![2]); + + let mut s = SparseTensor::empty(vec![2]); + s.insert(vec![1]); + + let expr = TensorExpr::Product(vec![leaf(r), leaf(s)]); + let result = expr.materialize(); + + assert_eq!(result.dims, vec![3, 2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0, 1])); + assert!(result.contains(&[2, 1])); + } + + #[test] + fn test_contract_reduction() { + let mut t = SparseTensor::empty(vec![2, 3]); + t.insert(vec![0, 0]); + t.insert(vec![0, 2]); + t.insert(vec![1, 1]); + + let output: BTreeSet = [0].into_iter().collect(); + let expr = TensorExpr::Contract { + inner: Box::new(leaf(t)), + index_map: vec![0, 1], + output, + }; + let result = expr.materialize(); + + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0])); + assert!(result.contains(&[1])); + } + + #[test] + fn test_sum_basic() { + // R ∨ S where R = {(0,0), (1,1)} and S = {(1,1), (2,2)} + let mut r = SparseTensor::empty(vec![3, 3]); + r.insert(vec![0, 0]); + r.insert(vec![1, 1]); + + let mut s = SparseTensor::empty(vec![3, 3]); + s.insert(vec![1, 1]); + s.insert(vec![2, 2]); + + let expr = TensorExpr::Sum(vec![leaf(r), leaf(s)]); + let result = expr.materialize(); + + assert_eq!(result.dims, vec![3, 3]); + assert_eq!(result.len(), 3); // Union removes duplicates + assert!(result.contains(&[0, 0])); + assert!(result.contains(&[1, 1])); + assert!(result.contains(&[2, 2])); + } + + #[test] + fn test_sum_empty() { + // Empty disjunction = false + let expr = TensorExpr::Sum(vec![]); + let result = expr.materialize(); + + assert!(result.is_empty()); + } + + #[test] + fn test_contract_sum_distributes() { + // Contract(Sum(R, S)) = Sum(Contract(R), Contract(S)) + // Using ∃y. (R(x,y) ∨ S(x,y)) + let mut r = SparseTensor::empty(vec![2, 2]); + r.insert(vec![0, 0]); + r.insert(vec![0, 1]); + + let mut s = SparseTensor::empty(vec![2, 2]); + s.insert(vec![1, 0]); + + let sum = TensorExpr::Sum(vec![leaf(r), leaf(s)]); + + // ∃y: map y to fresh target, output only x + let output: BTreeSet = [0].into_iter().collect(); + let expr = TensorExpr::Contract { + inner: Box::new(sum), + index_map: vec![0, 2], // x→0, y→2 (fresh) + output, + }; + + let result = expr.materialize(); + + assert_eq!(result.dims, vec![2]); + assert_eq!(result.len(), 2); + assert!(result.contains(&[0])); // from R + assert!(result.contains(&[1])); // from S + } +} diff --git a/src/tensor/mod.rs b/src/tensor/mod.rs new file mode 100644 index 0000000..2fd1a6b --- /dev/null +++ b/src/tensor/mod.rs @@ -0,0 +1,31 @@ +//! Lazy tensor expressions for axiom checking +//! +//! A tensor indexed by finite sets A₀, A₁, ..., Aₙ₋₁ is a function +//! [∏ᵢ Aᵢ] → Bool. We represent this sparsely as the set of tuples +//! mapping to true. +//! +//! Key insight: tensor product followed by contraction should NEVER +//! materialize the intermediate product. Instead, we build expression +//! trees and fuse operations during evaluation. +//! +//! Two primitives suffice for einsum-style operations: +//! - **tensor_product**: ⊗ₖ Sₖ — indexed by all indices, value = ∧ of contributions +//! - **contract**: along `a:[N]→[M]`, output `O⊆[M]` — identifies indices, sums over non-output +//! +//! Over the Boolean semiring: product = AND, sum = OR. + +mod builder; +mod check; +mod compile; +mod expr; +mod sparse; + +// Re-export main types +pub use builder::{conjunction, conjunction_all, disjunction, disjunction_all, exists}; +pub use check::{check_sequent, check_sequent_bool, check_theory_axioms, CheckResult, Violation}; +pub use compile::{ + build_carrier_index, compile_formula, derived_sort_cardinality, relation_to_tensor, + sort_cardinality, CompileContext, CompileError, +}; +pub use expr::TensorExpr; +pub use sparse::SparseTensor; diff --git a/src/tensor/sparse.rs b/src/tensor/sparse.rs new file mode 100644 index 0000000..e89d840 --- /dev/null +++ b/src/tensor/sparse.rs @@ -0,0 +1,223 @@ +//! Sparse Boolean tensor (materialized). + +use std::collections::BTreeSet; + +/// A sparse Boolean tensor (materialized). +/// +/// Indexed by a product of finite sets with given cardinalities. +/// Stores the set of index tuples that map to `true`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SparseTensor { + /// Cardinality of each index dimension + pub dims: Vec, + /// Set of tuples (as `Vec`) where the tensor is true + /// Each tuple has length == dims.len() + pub extent: BTreeSet>, +} + +impl SparseTensor { + /// Create an empty tensor (all false) with given dimensions + pub fn empty(dims: Vec) -> Self { + Self { + dims, + extent: BTreeSet::new(), + } + } + + /// Create a scalar tensor (0-dimensional) with given value + pub fn scalar(value: bool) -> Self { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + Self { + dims: vec![], + extent, + } + } + + /// Number of dimensions (arity) + pub fn arity(&self) -> usize { + self.dims.len() + } + + /// Number of true entries + pub fn len(&self) -> usize { + self.extent.len() + } + + /// Check if empty (all false) + pub fn is_empty(&self) -> bool { + self.extent.is_empty() + } + + /// Check if a specific tuple is true + pub fn contains(&self, tuple: &[usize]) -> bool { + self.extent.contains(tuple) + } + + /// Insert a tuple (set to true) + pub fn insert(&mut self, tuple: Vec) -> bool { + debug_assert_eq!(tuple.len(), self.dims.len()); + debug_assert!(tuple.iter().zip(&self.dims).all(|(v, d)| *v < *d)); + self.extent.insert(tuple) + } + + /// Remove a tuple (set to false) + pub fn remove(&mut self, tuple: &[usize]) -> bool { + self.extent.remove(tuple) + } + + /// Iterate over all true tuples + pub fn iter(&self) -> impl Iterator> { + self.extent.iter() + } +} + +// ============================================================================ +// ITERATORS +// ============================================================================ + +/// Iterator over all tuples in a domain (Cartesian product of ranges) +pub(crate) struct DomainIterator { + dims: Vec, + current: Vec, + done: bool, +} + +impl DomainIterator { + pub fn new(dims: &[usize]) -> Self { + let done = dims.contains(&0); + Self { + dims: dims.to_vec(), + current: vec![0; dims.len()], + done, + } + } +} + +impl Iterator for DomainIterator { + type Item = Vec; + + fn next(&mut self) -> Option { + if self.done { + return None; + } + + if self.dims.is_empty() { + self.done = true; + return Some(vec![]); + } + + let result = self.current.clone(); + + // Advance (odometer style) + for i in (0..self.dims.len()).rev() { + self.current[i] += 1; + if self.current[i] < self.dims[i] { + break; + } + self.current[i] = 0; + if i == 0 { + self.done = true; + } + } + + Some(result) + } +} + +/// Iterator over Cartesian product of sparse tensor extents +pub(crate) struct CartesianProductIter<'a> { + tensors: &'a [SparseTensor], + iterators: Vec>>, + current: Vec>>, + done: bool, +} + +impl<'a> CartesianProductIter<'a> { + pub fn new(tensors: &'a [SparseTensor]) -> Self { + if tensors.is_empty() { + return Self { + tensors, + iterators: vec![], + current: vec![], + done: false, + }; + } + + let done = tensors.iter().any(|t| t.is_empty()); + let mut iterators: Vec<_> = tensors.iter().map(|t| t.extent.iter()).collect(); + let current: Vec<_> = iterators.iter_mut().map(|it| it.next()).collect(); + + Self { + tensors, + iterators, + current, + done, + } + } +} + +impl<'a> Iterator for CartesianProductIter<'a> { + type Item = Vec; + + fn next(&mut self) -> Option { + if self.done { + return None; + } + + if self.tensors.is_empty() { + self.done = true; + return Some(vec![]); + } + + // Build result + let result: Vec = self + .current + .iter() + .filter_map(|opt| opt.as_ref()) + .flat_map(|tuple| tuple.iter().copied()) + .collect(); + + // Advance (odometer style) + for i in (0..self.tensors.len()).rev() { + if let Some(next) = self.iterators[i].next() { + self.current[i] = Some(next); + break; + } else { + self.iterators[i] = self.tensors[i].extent.iter(); + self.current[i] = self.iterators[i].next(); + if i == 0 { + self.done = true; + } + } + } + + Some(result) + } +} + +/// Cartesian product of extents of multiple sparse tensors +pub(crate) fn cartesian_product_of_extents(tensors: &[SparseTensor]) -> BTreeSet> { + CartesianProductIter::new(tensors).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sparse_tensor_basic() { + let mut t = SparseTensor::empty(vec![3, 2]); + assert_eq!(t.arity(), 2); + assert!(t.is_empty()); + + t.insert(vec![0, 1]); + t.insert(vec![2, 0]); + assert_eq!(t.len(), 2); + assert!(t.contains(&[0, 1])); + assert!(t.contains(&[2, 0])); + assert!(!t.contains(&[0, 0])); + } +} diff --git a/src/universe.rs b/src/universe.rs new file mode 100644 index 0000000..076f8e9 --- /dev/null +++ b/src/universe.rs @@ -0,0 +1,207 @@ +//! Global UUID universe with Luid (Locally Universal ID) mapping +//! +//! This provides a single, persistent index of all UUIDs known to this +//! installation. UUIDs are mapped to compact integer Luids for efficient +//! in-memory operations. +//! +//! Following chit's multi-level ID design: +//! - Uuid: 128-bit globally unique identifier (for persistence, cross-system) +//! - Luid: Local index into this installation's universe (for computation) + +use crate::id::{Luid, NumericId, Uuid}; +use indexmap::IndexSet; +use memmap2::Mmap; +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Archive, Deserialize, Serialize, check_archived_root}; +use std::fs::{self, File}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +/// The global universe of all UUIDs known to this installation. +/// +/// Provides bidirectional mapping between UUIDs and Luids: +/// - `intern(uuid)` → Luid (get or create) +/// - `get(luid)` → Uuid +/// - `lookup(uuid)` → `Option` +/// +/// The universe is persisted to disk and can be memory-mapped for +/// efficient access without loading everything into memory. +#[derive(Debug)] +pub struct Universe { + /// The index mapping Luid → Uuid (and via IndexSet, Uuid → Luid) + index: IndexSet, + /// Path to the universe file (if persistent) + path: Option, + /// Whether there are unsaved changes + dirty: bool, +} + +/// Serializable form of the universe for persistence +#[derive(Archive, Deserialize, Serialize)] +#[archive(check_bytes)] +struct UniverseData { + uuids: Vec, +} + +impl Universe { + /// Create a new empty universe (in-memory only) + pub fn new() -> Self { + Self { + index: IndexSet::new(), + path: None, + dirty: false, + } + } + + /// Create a new universe with a persistence path + pub fn with_path(path: impl Into) -> Self { + Self { + index: IndexSet::new(), + path: Some(path.into()), + dirty: false, + } + } + + /// Load a universe from disk, or create empty if file doesn't exist + pub fn load(path: impl Into) -> Result { + let path = path.into(); + + if !path.exists() { + return Ok(Self::with_path(path)); + } + + let file = File::open(&path).map_err(|e| format!("Failed to open universe file: {}", e))?; + + // Memory-map the file for zero-copy access + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap universe file: {}", e))?; + + if mmap.is_empty() { + return Ok(Self::with_path(path)); + } + + // Validate and access the archived data + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Failed to validate universe archive: {}", e))?; + + // Deserialize to build the IndexSet + let data: UniverseData = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize universe")?; + + let index: IndexSet = data.uuids.into_iter().collect(); + + Ok(Self { + index, + path: Some(path), + dirty: false, + }) + } + + /// Save the universe to disk + pub fn save(&mut self) -> Result<(), String> { + let path = self + .path + .as_ref() + .ok_or("Universe has no persistence path")?; + + // Create parent directories if needed + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create universe directory: {}", e))?; + } + + // Serialize the universe + let data = UniverseData { + uuids: self.index.iter().copied().collect(), + }; + + let mut serializer = AllocSerializer::<1024>::default(); + serializer + .serialize_value(&data) + .map_err(|e| format!("Failed to serialize universe: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + // Write atomically by writing to temp file then renaming + let temp_path = path.with_extension("universe.tmp"); + { + let mut file = File::create(&temp_path) + .map_err(|e| format!("Failed to create temp universe file: {}", e))?; + file.write_all(&bytes) + .map_err(|e| format!("Failed to write universe file: {}", e))?; + file.sync_all() + .map_err(|e| format!("Failed to sync universe file: {}", e))?; + } + + fs::rename(&temp_path, path) + .map_err(|e| format!("Failed to rename universe file: {}", e))?; + + self.dirty = false; + Ok(()) + } + + /// Intern a UUID, returning its Luid (creating if new) + pub fn intern(&mut self, uuid: Uuid) -> Luid { + let (idx, inserted) = self.index.insert_full(uuid); + if inserted { + self.dirty = true; + } + Luid::from_usize(idx) + } + + /// Get the UUID for a Luid + pub fn get(&self, luid: Luid) -> Option { + self.index.get_index(luid.index()).copied() + } + + /// Look up the Luid for a UUID (if known) + pub fn lookup(&self, uuid: &Uuid) -> Option { + self.index.get_index_of(uuid).map(Luid::from_usize) + } + + /// Get the number of UUIDs in the universe + pub fn len(&self) -> usize { + self.index.len() + } + + /// Check if the universe is empty + pub fn is_empty(&self) -> bool { + self.index.is_empty() + } + + /// Check if there are unsaved changes + pub fn is_dirty(&self) -> bool { + self.dirty + } + + /// Iterate over all (Luid, Uuid) pairs + pub fn iter(&self) -> impl Iterator + '_ { + self.index + .iter() + .enumerate() + .map(|(idx, &uuid)| (Luid::from_usize(idx), uuid)) + } + + /// Get the persistence path (if any) + pub fn path(&self) -> Option<&Path> { + self.path.as_deref() + } +} + +impl Default for Universe { + fn default() -> Self { + Self::new() + } +} + +impl Drop for Universe { + fn drop(&mut self) { + // Auto-save on drop if dirty and has a path + if self.dirty && self.path.is_some() { + let _ = self.save(); // Ignore errors on drop + } + } +} + +// Unit tests moved to tests/proptest_universe.rs diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 0000000..da4bb57 --- /dev/null +++ b/src/version.rs @@ -0,0 +1,272 @@ +//! Version control for geolog structures +//! +//! This module provides a simple linear version control system for structures. +//! Patches are serialized to disk and can be loaded to reconstruct any version. + +use crate::core::Structure; +use crate::id::Uuid; +use crate::naming::NamingIndex; +use crate::patch::{Patch, apply_patch, diff, to_initial_patch}; +use crate::universe::Universe; + +use rkyv::ser::Serializer; +use rkyv::ser::serializers::AllocSerializer; +use rkyv::{Deserialize, check_archived_root}; +use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +/// A version-controlled state for managing structure history. +/// +/// This provides a simple linear history (no branches/merges yet). +/// Patches are stored on disk and loaded on demand. +/// +/// Contains a Universe for mapping UUIDs to Luids. The Universe is +/// persisted alongside the patches. +#[derive(Debug)] +pub struct VersionedState { + /// All patches, indexed by target_commit UUID + pub patches: BTreeMap, + /// Map from target_commit to source_commit (for walking history) + pub commit_parents: BTreeMap>, + /// Current HEAD commit (None = empty) + pub head: Option, + /// Directory where patches are stored + pub patches_dir: PathBuf, + /// The universe for UUID↔Luid mapping + pub universe: Universe, + /// The naming index for element names + pub naming: NamingIndex, +} + +impl VersionedState { + /// Create a new versioned state with the given patches directory + pub fn new(patches_dir: impl Into) -> Self { + let patches_dir = patches_dir.into(); + let universe_path = patches_dir.join("universe.bin"); + let naming_path = patches_dir.join("names.bin"); + Self { + patches: BTreeMap::new(), + commit_parents: BTreeMap::new(), + head: None, + patches_dir, + universe: Universe::with_path(universe_path), + naming: NamingIndex::with_path(naming_path), + } + } + + /// Load all patches from the patches directory + pub fn load_patches(&mut self) -> Result<(), String> { + fs::create_dir_all(&self.patches_dir) + .map_err(|e| format!("Failed to create patches directory: {}", e))?; + + // Load the universe + let universe_path = self.patches_dir.join("universe.bin"); + self.universe = Universe::load(&universe_path)?; + + // Load the naming index + let naming_path = self.patches_dir.join("names.bin"); + self.naming = NamingIndex::load(&naming_path)?; + + let entries = fs::read_dir(&self.patches_dir) + .map_err(|e| format!("Failed to read patches directory: {}", e))?; + + for entry in entries { + let entry = entry.map_err(|e| format!("Failed to read directory entry: {}", e))?; + let path = entry.path(); + + if path.extension().is_some_and(|ext| ext == "patch") { + self.load_patch(&path)?; + } + } + + // Find the head (the commit that is not a source of any other commit) + self.find_head(); + + Ok(()) + } + + /// Load a single patch file + fn load_patch(&mut self, path: &Path) -> Result<(), String> { + let mut file = File::open(path).map_err(|e| format!("Failed to open patch file: {}", e))?; + + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes) + .map_err(|e| format!("Failed to read patch file: {}", e))?; + + // Use check_archived_root for validation + let archived = check_archived_root::(&bytes) + .map_err(|e| format!("Failed to validate patch archive: {}", e))?; + + // Deserialize to owned Patch + let patch: Patch = archived + .deserialize(&mut rkyv::Infallible) + .map_err(|_| "Failed to deserialize patch")?; + + let target = patch.target_commit; + let source = patch.source_commit; + + self.commit_parents.insert(target, source); + self.patches.insert(target, patch); + + Ok(()) + } + + /// Find the head commit (most recent commit not superseded by another) + fn find_head(&mut self) { + // Collect all source commits (commits that have children) + let sources: std::collections::HashSet = + self.commit_parents.values().filter_map(|s| *s).collect(); + + // Head is a commit that is not a source of any other commit + for &commit in self.commit_parents.keys() { + if !sources.contains(&commit) { + self.head = Some(commit); + return; + } + } + } + + /// Save a patch to disk (also saves the universe and naming if dirty) + pub fn save_patch(&mut self, patch: &Patch) -> Result<(), String> { + fs::create_dir_all(&self.patches_dir) + .map_err(|e| format!("Failed to create patches directory: {}", e))?; + + let filename = format!("{}.patch", patch.target_commit); + let path = self.patches_dir.join(filename); + + // Serialize with rkyv + let mut serializer = AllocSerializer::<256>::default(); + serializer + .serialize_value(patch) + .map_err(|e| format!("Failed to serialize patch: {}", e))?; + let bytes = serializer.into_serializer().into_inner(); + + let mut file = + File::create(&path).map_err(|e| format!("Failed to create patch file: {}", e))?; + + file.write_all(&bytes) + .map_err(|e| format!("Failed to write patch file: {}", e))?; + + // Save the universe if dirty + if self.universe.is_dirty() { + self.universe.save()?; + } + + // Save the naming index if dirty + if self.naming.is_dirty() { + self.naming.save()?; + } + + Ok(()) + } + + /// Checkout a specific commit, returning the reconstructed structure + /// + /// Also updates the naming index with names from applied patches. + pub fn checkout(&mut self, commit: Uuid) -> Result { + // Build the chain of patches from root to target + let mut chain = Vec::new(); + let mut current = Some(commit); + + while let Some(c) = current { + let patch = self + .patches + .get(&c) + .ok_or_else(|| format!("Commit {} not found", c))?; + chain.push(patch.clone()); + current = patch.source_commit; + } + + // Reverse to apply from root to target + chain.reverse(); + + // Apply patches in order + let mut structure = if let Some(first_patch) = chain.first() { + Structure::new(first_patch.num_sorts) + } else { + return Err("No patches to apply".to_string()); + }; + + // Create a temporary naming index for checkout (don't modify the main one) + let mut checkout_naming = NamingIndex::new(); + + for patch in &chain { + structure = apply_patch(&structure, patch, &mut self.universe, &mut checkout_naming)?; + } + + Ok(structure) + } + + /// Commit a structure, creating a new patch from the current HEAD + /// + /// Returns the new commit's UUID. + /// The naming parameter provides names for elements in the structure. + pub fn commit(&mut self, structure: &Structure, naming: &NamingIndex) -> Result { + let patch = if let Some(head) = self.head { + // Diff from current HEAD + let base = self.checkout(head)?; + // Use empty naming for base (names are reconstructed from patches) + let base_naming = NamingIndex::new(); + let mut patch = diff(&base, structure, &self.universe, &base_naming, naming); + patch.source_commit = Some(head); + patch + } else { + // Initial commit + to_initial_patch(structure, &self.universe, naming) + }; + + // Skip empty patches + if patch.is_empty() { + return Err("No changes to commit".to_string()); + } + + let commit_uuid = patch.target_commit; + + // Apply names from patch to our naming index + for (uuid, name) in &patch.names.additions { + self.naming.insert(*uuid, name.clone()); + } + + // Save to disk + self.save_patch(&patch)?; + + // Update in-memory state + self.commit_parents.insert(commit_uuid, patch.source_commit); + self.patches.insert(commit_uuid, patch); + self.head = Some(commit_uuid); + + Ok(commit_uuid) + } + + /// Get the current HEAD structure, or None if no commits + pub fn get_head_structure(&mut self) -> Result, String> { + match self.head { + Some(head) => Ok(Some(self.checkout(head)?)), + None => Ok(None), + } + } + + /// List all commits in order from oldest to newest + pub fn list_commits(&self) -> Vec { + // Build list by following parents + let mut commits = Vec::new(); + let mut current = self.head; + + while let Some(c) = current { + commits.push(c); + current = self.commit_parents.get(&c).and_then(|p| *p); + } + + commits.reverse(); + commits + } + + /// Get the number of commits + pub fn num_commits(&self) -> usize { + self.patches.len() + } +} + +// Unit tests moved to tests/unit_version.rs diff --git a/src/zerocopy.rs b/src/zerocopy.rs new file mode 100644 index 0000000..0f66b64 --- /dev/null +++ b/src/zerocopy.rs @@ -0,0 +1,422 @@ +//! Zero-copy access to serialized structures via memory mapping. +//! +//! This module provides `MappedStructure` which memory-maps a serialized structure +//! file and provides direct access to the archived data without deserialization. +//! +//! # Benefits +//! - **No deserialization cost**: Data is accessed directly from the mmap +//! - **Minimal memory overhead**: Only the mmap exists, no heap copies +//! - **Fast startup**: Opening a structure is O(1), not O(n) elements +//! +//! # Trade-offs +//! - Read-only access (archived types are immutable) +//! - Slightly different API (ArchivedVec vs Vec, etc.) +//! - Requires file to remain valid for lifetime of MappedStructure + +use std::fs::File; +use std::path::Path; +use std::sync::Arc; + +use memmap2::Mmap; +use rkyv::check_archived_root; +use rkyv::Archived; + +use crate::core::{SortId, TupleId}; +use crate::id::{Luid, Slid, NumericId}; +use crate::serialize::{ + StructureData, RelationData, FunctionColumnData, ArchivedFunctionColumnData, +}; + +/// A memory-mapped structure providing zero-copy access to archived data. +/// +/// The structure data is accessed directly from the memory map without +/// deserialization. This is ideal for read-heavy workloads on large structures. +pub struct MappedStructure { + /// The memory map - must outlive all references to archived data + _mmap: Arc, + /// Pointer to the archived structure data (valid for lifetime of mmap) + archived: &'static Archived, +} + +// Safety: The archived data is read-only and the mmap is reference-counted +unsafe impl Send for MappedStructure {} +unsafe impl Sync for MappedStructure {} + +impl MappedStructure { + /// Open a structure file with zero-copy access. + /// + /// The file is memory-mapped and validated. Returns an error if the file + /// cannot be opened or contains invalid data. + pub fn open(path: &Path) -> Result { + let file = File::open(path) + .map_err(|e| format!("Failed to open {}: {}", path.display(), e))?; + + let mmap = unsafe { Mmap::map(&file) } + .map_err(|e| format!("Failed to mmap {}: {}", path.display(), e))?; + + // Validate and get reference to archived data + let archived = check_archived_root::(&mmap) + .map_err(|e| format!("Invalid archive in {}: {:?}", path.display(), e))?; + + // Extend lifetime to 'static - safe because mmap is Arc'd and outlives the reference + let archived: &'static Archived = unsafe { + std::mem::transmute(archived) + }; + + Ok(Self { + _mmap: Arc::new(mmap), + archived, + }) + } + + /// Number of sorts in the structure + #[inline] + pub fn num_sorts(&self) -> usize { + self.archived.num_sorts as usize + } + + /// Number of elements in the structure + #[inline] + pub fn len(&self) -> usize { + self.archived.luids.len() + } + + /// Check if empty + #[inline] + pub fn is_empty(&self) -> bool { + self.archived.luids.is_empty() + } + + /// Number of functions + #[inline] + pub fn num_functions(&self) -> usize { + self.archived.functions.len() + } + + /// Number of relations + #[inline] + pub fn num_relations(&self) -> usize { + self.archived.relations.len() + } + + /// Get the Luid for an element by Slid + #[inline] + pub fn get_luid(&self, slid: Slid) -> Option { + self.archived.luids.get(slid.index()).map(|l| Luid::from_usize(l.rep as usize)) + } + + /// Get the sort for an element by Slid + #[inline] + pub fn get_sort(&self, slid: Slid) -> Option { + self.archived.sorts.get(slid.index()).map(|&s| s as SortId) + } + + /// Iterate over all (slid, luid, sort) triples + pub fn elements(&self) -> impl Iterator + '_ { + self.archived.luids.iter().enumerate().map(|(i, luid)| { + let slid = Slid::from_usize(i); + let luid = Luid::from_usize(luid.rep as usize); + let sort = self.archived.sorts[i] as SortId; + (slid, luid, sort) + }) + } + + /// Get a zero-copy view of a relation + pub fn relation(&self, rel_id: usize) -> Option> { + self.archived.relations.get(rel_id).map(|r| MappedRelation { archived: r }) + } + + /// Iterate over all relations + pub fn relations(&self) -> impl Iterator> + '_ { + self.archived.relations.iter().map(|r| MappedRelation { archived: r }) + } + + /// Get a zero-copy view of a function column + pub fn function(&self, func_id: usize) -> Option> { + self.archived.functions.get(func_id).map(|f| MappedFunction { archived: f }) + } + + /// Get elements of a particular sort (zero-copy iteration) + pub fn elements_of_sort(&self, sort_id: SortId) -> impl Iterator + '_ { + self.archived.sorts.iter().enumerate() + .filter(move |&(_, s)| *s as SortId == sort_id) + .map(|(i, _)| Slid::from_usize(i)) + } +} + +/// A zero-copy view of an archived relation. +pub struct MappedRelation<'a> { + archived: &'a Archived, +} + +impl<'a> MappedRelation<'a> { + /// Relation arity + #[inline] + pub fn arity(&self) -> usize { + self.archived.arity as usize + } + + /// Number of tuples in the relation (including non-live ones) + #[inline] + pub fn tuple_count(&self) -> usize { + self.archived.tuples.len() + } + + /// Number of live tuples (in the extent) + #[inline] + pub fn live_count(&self) -> usize { + self.archived.extent.len() + } + + /// Get a tuple by ID (zero-copy - returns slice into mmap) + pub fn get_tuple(&self, id: TupleId) -> Option + '_> { + self.archived.tuples.get(id).map(|tuple| { + tuple.iter().map(|s| Slid::from_usize(s.rep as usize)) + }) + } + + /// Iterate over live tuple IDs + pub fn live_tuple_ids(&self) -> impl Iterator + '_ { + self.archived.extent.iter().map(|&id| id as TupleId) + } + + /// Iterate over live tuples (zero-copy) + pub fn live_tuples(&self) -> impl Iterator + '_> + '_ { + self.live_tuple_ids().filter_map(|id| self.get_tuple(id)) + } +} + +/// A zero-copy view of an archived function column. +pub struct MappedFunction<'a> { + archived: &'a Archived, +} + +impl<'a> MappedFunction<'a> { + /// Check if this is a local function + pub fn is_local(&self) -> bool { + matches!(self.archived, ArchivedFunctionColumnData::Local(_)) + } + + /// Get function value for a domain element (local functions only) + pub fn get_local(&self, domain_sort_local_id: usize) -> Option { + match self.archived { + ArchivedFunctionColumnData::Local(col) => { + col.get(domain_sort_local_id).and_then(|opt| { + // ArchivedOption - check if Some + match opt { + rkyv::option::ArchivedOption::Some(idx) => { + Some(Slid::from_usize(*idx as usize)) + } + rkyv::option::ArchivedOption::None => None, + } + }) + } + _ => None, + } + } + + /// Iterate over defined local function values: (domain_sort_local_id, codomain_slid) + pub fn iter_local(&self) -> impl Iterator + '_ { + match self.archived { + ArchivedFunctionColumnData::Local(col) => { + itertools::Either::Left(col.iter().enumerate().filter_map(|(i, opt)| { + match opt { + rkyv::option::ArchivedOption::Some(idx) => { + Some((i, Slid::from_usize(*idx as usize))) + } + rkyv::option::ArchivedOption::None => None, + } + })) + } + _ => itertools::Either::Right(std::iter::empty()), + } + } + + /// Iterate over product domain function values: (tuple, result_slid) + pub fn iter_product(&self) -> impl Iterator, Slid)> + '_ { + match self.archived { + ArchivedFunctionColumnData::ProductLocal { entries, .. } => { + itertools::Either::Left(entries.iter().map(|(tuple, result)| { + let tuple: Vec = tuple.iter().map(|&x| x as usize).collect(); + let result = Slid::from_usize(*result as usize); + (tuple, result) + })) + } + _ => itertools::Either::Right(std::iter::empty()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::Structure; + use crate::universe::Universe; + use crate::serialize::save_structure; + use tempfile::tempdir; + + #[test] + fn test_mapped_structure_basic() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + + // Create and save a structure + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts + structure.init_relations(&[1, 2]); // unary and binary relations + + // Add some elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 1); + + // Assert some relation tuples + structure.assert_relation(0, vec![a]); + structure.assert_relation(0, vec![b]); + structure.assert_relation(1, vec![a, c]); + + save_structure(&structure, &path).unwrap(); + + // Open with zero-copy + let mapped = MappedStructure::open(&path).unwrap(); + + assert_eq!(mapped.num_sorts(), 2); + assert_eq!(mapped.len(), 3); + assert_eq!(mapped.num_relations(), 2); + + // Check relation 0 (unary) + let rel0 = mapped.relation(0).unwrap(); + assert_eq!(rel0.arity(), 1); + assert_eq!(rel0.live_count(), 2); + + // Check relation 1 (binary) + let rel1 = mapped.relation(1).unwrap(); + assert_eq!(rel1.arity(), 2); + assert_eq!(rel1.live_count(), 1); + + // Iterate over live tuples + let tuples: Vec> = rel0.live_tuples() + .map(|t| t.collect()) + .collect(); + assert_eq!(tuples.len(), 2); + } + + #[test] + fn test_zero_copy_elements() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + + let mut universe = Universe::new(); + let mut structure = Structure::new(3); + + // Add elements to different sorts + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + structure.add_element(&mut universe, 2); + structure.add_element(&mut universe, 2); + structure.add_element(&mut universe, 2); + + save_structure(&structure, &path).unwrap(); + + let mapped = MappedStructure::open(&path).unwrap(); + + // Count elements per sort + assert_eq!(mapped.elements_of_sort(0).count(), 2); + assert_eq!(mapped.elements_of_sort(1).count(), 1); + assert_eq!(mapped.elements_of_sort(2).count(), 3); + } + + /// Benchmark test comparing zero-copy vs deserialize access patterns. + /// Run with: `cargo test --release benchmark_zerocopy -- --ignored --nocapture` + #[test] + #[ignore] + fn benchmark_zerocopy_vs_deserialize() { + use crate::serialize::load_structure; + use std::time::Instant; + + let dir = tempdir().unwrap(); + let path = dir.path().join("large.structure"); + + // Create a moderately large structure + let num_elements = 100_000; + let num_sorts = 10; + let num_relations = 5; + + eprintln!("Creating structure with {} elements, {} sorts, {} relations...", + num_elements, num_sorts, num_relations); + + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + // Initialize relations with varying arities + let arities: Vec = (0..num_relations).map(|i| (i % 3) + 1).collect(); + structure.init_relations(&arities); + + // Add elements distributed across sorts + let elements: Vec = (0..num_elements) + .map(|i| { + let sort = i % num_sorts; + let (slid, _) = structure.add_element(&mut universe, sort); + slid + }) + .collect(); + + // Add some relation tuples + for (rel_id, &arity) in arities.iter().enumerate().take(num_relations) { + for i in (0..1000).step_by(arity) { + let tuple: Vec = (0..arity) + .map(|j| elements[(i + j) % num_elements]) + .collect(); + structure.assert_relation(rel_id, tuple); + } + } + + save_structure(&structure, &path).unwrap(); + + let file_size = std::fs::metadata(&path).unwrap().len(); + eprintln!("Structure file size: {} bytes ({:.2} KB)", file_size, file_size as f64 / 1024.0); + + // Benchmark: deserialize approach (current) + const ITERATIONS: usize = 100; + + eprintln!("\n--- Deserialize approach ({} iterations) ---", ITERATIONS); + let start = Instant::now(); + for _ in 0..ITERATIONS { + let loaded = load_structure(&path).unwrap(); + // Access pattern: count elements per sort using carrier_size + let _counts: Vec = (0..num_sorts) + .map(|sort| loaded.carrier_size(sort)) + .collect(); + // Also access all elements to exercise deserialization + let _total: usize = loaded.luids.len(); + } + let deserialize_time = start.elapsed(); + eprintln!("Total: {:?}, Per iteration: {:?}", + deserialize_time, deserialize_time / ITERATIONS as u32); + + // Benchmark: zero-copy approach (new) + eprintln!("\n--- Zero-copy approach ({} iterations) ---", ITERATIONS); + let start = Instant::now(); + for _ in 0..ITERATIONS { + let mapped = MappedStructure::open(&path).unwrap(); + // Same access pattern: count elements of each sort + let _counts: Vec = (0..num_sorts) + .map(|sort| mapped.elements_of_sort(sort).count()) + .collect(); + // Also access len + let _total: usize = mapped.len(); + } + let zerocopy_time = start.elapsed(); + eprintln!("Total: {:?}, Per iteration: {:?}", + zerocopy_time, zerocopy_time / ITERATIONS as u32); + + // Compare + let speedup = deserialize_time.as_nanos() as f64 / zerocopy_time.as_nanos() as f64; + eprintln!("\n--- Results ---"); + eprintln!("Zero-copy is {:.2}x faster than deserialize", speedup); + + // The zero-copy approach should be faster for large structures + // (we don't assert this since performance varies by system) + } +} diff --git a/tests/examples_integration.rs b/tests/examples_integration.rs new file mode 100644 index 0000000..d7e40f6 --- /dev/null +++ b/tests/examples_integration.rs @@ -0,0 +1,960 @@ +//! Integration tests for example .geolog files +//! +//! These tests ensure that the example files in `examples/geolog/` remain +//! valid as the language evolves. They serve as living documentation. + +use geolog::repl::ReplState; +use std::fs; +use std::path::Path; + +/// Helper to load and execute a .geolog file, returning the REPL state +fn load_geolog_file(path: &Path) -> Result { + let content = fs::read_to_string(path) + .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?; + + let mut state = ReplState::new(); + + // Use execute_geolog which handles everything correctly + state + .execute_geolog(&content) + .map_err(|e| format!("Error in {}: {}", path.display(), e))?; + + Ok(state) +} + +// ============================================================================ +// Graph examples +// ============================================================================ + +#[test] +fn test_graph_example_parses() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).expect("graph.geolog should parse and elaborate"); + + // Check theory + let graph = state.theories.get("Graph").expect("Graph theory should exist"); + assert_eq!(graph.theory.signature.sorts.len(), 2, "Graph should have 2 sorts (V, E)"); + assert_eq!(graph.theory.signature.functions.len(), 2, "Graph should have 2 functions (src, tgt)"); + + // Check instances + assert!(state.instances.contains_key("Triangle"), "Triangle instance should exist"); + assert!(state.instances.contains_key("Loop"), "Loop instance should exist"); + assert!(state.instances.contains_key("Arrow"), "Arrow instance should exist"); + assert!(state.instances.contains_key("Diamond"), "Diamond instance should exist"); +} + +#[test] +fn test_graph_triangle_structure() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).unwrap(); + + let triangle = state.instances.get("Triangle").unwrap(); + + // Triangle has 3 vertices + 3 edges = 6 elements + assert_eq!(triangle.structure.len(), 6, "Triangle should have 6 elements"); + + // Check carrier sizes: V has 3, E has 3 + assert_eq!(triangle.structure.carrier_size(0), 3, "Triangle should have 3 vertices"); + assert_eq!(triangle.structure.carrier_size(1), 3, "Triangle should have 3 edges"); +} + +#[test] +fn test_graph_diamond_structure() { + let path = Path::new("examples/geolog/graph.geolog"); + let state = load_geolog_file(path).unwrap(); + + let diamond = state.instances.get("Diamond").unwrap(); + + // Diamond has 4 vertices + 4 edges = 8 elements + assert_eq!(diamond.structure.len(), 8, "Diamond should have 8 elements"); + assert_eq!(diamond.structure.carrier_size(0), 4, "Diamond should have 4 vertices"); + assert_eq!(diamond.structure.carrier_size(1), 4, "Diamond should have 4 edges"); +} + +// ============================================================================ +// Petri net examples +// ============================================================================ + +#[test] +fn test_petri_net_example_parses() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).expect("petri_net.geolog should parse and elaborate"); + + // Check theory + let petri = state.theories.get("PetriNet").expect("PetriNet theory should exist"); + assert_eq!(petri.theory.signature.sorts.len(), 4, "PetriNet should have 4 sorts (P, T, In, Out)"); + assert_eq!(petri.theory.signature.functions.len(), 4, "PetriNet should have 4 functions"); + + // Check instances + assert!(state.instances.contains_key("ProducerConsumer")); + assert!(state.instances.contains_key("MutualExclusion")); +} + +// ============================================================================ +// Petri Net Showcase - Full Type-Theoretic Encoding (from 2025-12-12 vision) +// ============================================================================ + +#[test] +fn test_petri_net_showcase_loads() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).expect("petri_net_showcase.geolog should parse and elaborate"); + + // Check theories + assert!(state.theories.contains_key("PetriNet"), "PetriNet theory should exist"); + assert!(state.theories.contains_key("Marking"), "Marking theory should exist"); + assert!(state.theories.contains_key("ReachabilityProblem"), "ReachabilityProblem theory should exist"); + assert!(state.theories.contains_key("Trace"), "Trace theory should exist"); + assert!(state.theories.contains_key("Iso"), "Iso theory should exist"); + assert!(state.theories.contains_key("Solution"), "Solution theory should exist"); + + // Check PetriNet theory structure + let petri = state.theories.get("PetriNet").unwrap(); + assert_eq!(petri.theory.signature.sorts.len(), 4, "PetriNet should have 4 sorts"); + assert_eq!(petri.theory.signature.functions.len(), 4, "PetriNet should have 4 functions"); + + // Check parameterized theories have correct parameter structure + let marking = state.theories.get("Marking").unwrap(); + assert_eq!(marking.params.len(), 1, "Marking should have 1 parameter (N : PetriNet instance)"); + assert_eq!(marking.params[0].name, "N"); + + let reach_prob = state.theories.get("ReachabilityProblem").unwrap(); + assert_eq!(reach_prob.params.len(), 1, "ReachabilityProblem should have 1 parameter"); + + let trace = state.theories.get("Trace").unwrap(); + assert_eq!(trace.params.len(), 1, "Trace should have 1 parameter"); + + let iso = state.theories.get("Iso").unwrap(); + assert_eq!(iso.params.len(), 2, "Iso should have 2 parameters (X : Sort, Y : Sort)"); + + let solution = state.theories.get("Solution").unwrap(); + assert_eq!(solution.params.len(), 2, "Solution should have 2 parameters (N, RP)"); + + // Check instances + assert!(state.instances.contains_key("ExampleNet"), "ExampleNet instance should exist"); + assert!(state.instances.contains_key("problem0"), "problem0 instance should exist"); + assert!(state.instances.contains_key("solution0"), "solution0 instance should exist"); + assert!(state.instances.contains_key("problem2"), "problem2 instance should exist"); + assert!(state.instances.contains_key("solution2"), "solution2 instance should exist"); +} + +#[test] +fn test_petri_net_showcase_example_net_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + let example_net = state.instances.get("ExampleNet").unwrap(); + + // ExampleNet has: + // - 3 places (A, B, C) + // - 3 transitions (ab, ba, abc) + // - 4 input arcs (ab_in, ba_in, abc_in1, abc_in2) + // - 3 output arcs (ab_out, ba_out, abc_out) + // Total: 3 + 3 + 4 + 3 = 13 elements + assert_eq!(example_net.structure.len(), 13, "ExampleNet should have 13 elements"); + + // Check carrier sizes by sort index (P=0, T=1, in=2, out=3) + assert_eq!(example_net.structure.carrier_size(0), 3, "ExampleNet should have 3 places"); + assert_eq!(example_net.structure.carrier_size(1), 3, "ExampleNet should have 3 transitions"); + assert_eq!(example_net.structure.carrier_size(2), 4, "ExampleNet should have 4 input arcs"); + assert_eq!(example_net.structure.carrier_size(3), 3, "ExampleNet should have 3 output arcs"); +} + +#[test] +fn test_petri_net_showcase_problem0_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // problem0: 1 token in A -> 1 token in B + let problem0 = state.instances.get("problem0").unwrap(); + + // ReachabilityProblem structure includes the nested Marking instances + // This test verifies the cross-references work correctly + assert!(!problem0.structure.is_empty(), "problem0 should have elements"); +} + +#[test] +fn test_petri_net_showcase_solution0_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // solution0 proves A -> B reachability by firing 'ab' once + let solution0 = state.instances.get("solution0").unwrap(); + + // Solution structure includes nested Trace and Iso instances + // The trace has: 1 firing, 1 input terminal, 1 output terminal + // Plus the isomorphism mappings + assert!(!solution0.structure.is_empty(), "solution0 should have elements"); +} + +#[test] +fn test_petri_net_showcase_solution2_structure() { + let path = Path::new("examples/geolog/petri_net_showcase.geolog"); + let state = load_geolog_file(path).unwrap(); + + // solution2 proves 2A -> C reachability by firing 'ab' then 'abc' + // This is the complex case: requires firing ab to move one A-token to B, + // then abc consumes both an A-token and the new B-token to produce C + let solution2 = state.instances.get("solution2").unwrap(); + + assert!(!solution2.structure.is_empty(), "solution2 should have elements"); +} + +/// NEGATIVE TEST: Verify that an incomplete solution correctly fails axiom checking. +/// +/// This test ensures that the Trace theory's wire axioms properly catch +/// solutions that are missing required wires between firings. +#[test] +fn test_solution2_incomplete_fails_axiom_check() { + let path = Path::new("tests/negative/solution2_incomplete_negative_test.geolog"); + let result = load_geolog_file(path); + + // This file should FAIL to load because it's missing a wire + // connecting f1's output to f2's input + match result { + Ok(_) => panic!("Incomplete solution2 should fail axiom checking, but it succeeded"), + Err(err) => { + assert!( + err.contains("axiom") && err.contains("violated"), + "Error should mention axiom violation, got: {}", + err + ); + } + } +} + +#[test] +fn test_petri_net_producer_consumer() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).unwrap(); + + let pc = state.instances.get("ProducerConsumer").unwrap(); + + // ProducerConsumer: 3 places + 2 transitions + 2 input arcs + 2 output arcs = 9 + assert_eq!(pc.structure.len(), 9, "ProducerConsumer should have 9 elements"); +} + +#[test] +fn test_petri_net_mutual_exclusion() { + let path = Path::new("examples/geolog/petri_net.geolog"); + let state = load_geolog_file(path).unwrap(); + + let mutex = state.instances.get("MutualExclusion").unwrap(); + + // MutualExclusion: 5 places + 4 transitions + 6 input arcs + 6 output arcs = 21 + assert_eq!(mutex.structure.len(), 21, "MutualExclusion should have 21 elements"); +} + +// ============================================================================ +// Monoid example (with product domain function support) +// ============================================================================ + +#[test] +fn test_monoid_example_parses() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).expect("monoid.geolog should parse and elaborate"); + + // Check theory + let monoid = state.theories.get("Monoid").expect("Monoid theory should exist"); + assert_eq!(monoid.theory.signature.sorts.len(), 1, "Monoid should have 1 sort (M)"); + assert_eq!(monoid.theory.signature.functions.len(), 2, "Monoid should have 2 functions (mul, id)"); + assert_eq!(monoid.theory.axioms.len(), 4, "Monoid should have 4 axioms"); + + // Check instances (product domain support via geolog-ulh) + assert!(state.instances.contains_key("Trivial"), "Trivial monoid should exist"); + assert!(state.instances.contains_key("BoolAnd"), "BoolAnd monoid should exist"); + assert!(state.instances.contains_key("BoolOr"), "BoolOr monoid should exist"); +} + +#[test] +fn test_monoid_trivial_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let trivial = state.instances.get("Trivial").unwrap(); + + // Trivial monoid has 1 element + assert_eq!(trivial.structure.carrier_size(0), 1, "Trivial monoid should have 1 element"); + + // Check id function (base domain: M -> M) + // id: e -> e + assert!(trivial.structure.functions[1].as_local().is_some(), "id should be a local function"); + let id_col = trivial.structure.functions[1].as_local().unwrap(); + assert_eq!(id_col.len(), 1, "id should have 1 entry"); + assert!(id_col[0].is_some(), "id(e) should be defined"); + + // Check mul function (product domain: M × M -> M) + // mul: (e,e) -> e + if let geolog::core::FunctionColumn::ProductLocal { storage, field_sorts } = &trivial.structure.functions[0] { + assert_eq!(field_sorts.len(), 2, "mul should have 2-element domain"); + assert_eq!(storage.defined_count(), 1, "mul should have 1 entry defined"); + } else { + panic!("mul should be a ProductLocal function"); + } +} + +#[test] +fn test_monoid_bool_and_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let bool_and = state.instances.get("BoolAnd").unwrap(); + + // BoolAnd has 2 elements (T, F) + assert_eq!(bool_and.structure.carrier_size(0), 2, "BoolAnd should have 2 elements"); + + // Check mul function (product domain): all 4 entries should be defined + if let geolog::core::FunctionColumn::ProductLocal { storage, .. } = &bool_and.structure.functions[0] { + assert_eq!(storage.defined_count(), 4, "mul should have all 4 entries defined (2×2)"); + + // Verify it's total + assert!(storage.is_total(&[2, 2]), "mul should be total on 2×2 domain"); + } else { + panic!("mul should be a ProductLocal function"); + } + + // Check id function (base domain): both entries defined + let id_col = bool_and.structure.functions[1].as_local().unwrap(); + assert_eq!(id_col.len(), 2, "id should have 2 entries"); + assert!(id_col.iter().all(|opt| opt.is_some()), "id should be total"); +} + +#[test] +fn test_monoid_bool_or_structure() { + let path = Path::new("examples/geolog/monoid.geolog"); + let state = load_geolog_file(path).unwrap(); + + let bool_or = state.instances.get("BoolOr").unwrap(); + + // BoolOr has 2 elements (T, F) + assert_eq!(bool_or.structure.carrier_size(0), 2, "BoolOr should have 2 elements"); + + // Check mul function is total + if let geolog::core::FunctionColumn::ProductLocal { storage, .. } = &bool_or.structure.functions[0] { + assert!(storage.is_total(&[2, 2]), "mul should be total on 2×2 domain"); + } else { + panic!("mul should be a ProductLocal function"); + } +} + +// ============================================================================ +// Preorder example +// ============================================================================ + +#[test] +fn test_preorder_example_parses() { + let path = Path::new("examples/geolog/preorder.geolog"); + let state = load_geolog_file(path).expect("preorder.geolog should parse and elaborate"); + + // Check theory + let preorder = state.theories.get("Preorder").expect("Preorder theory should exist"); + assert_eq!(preorder.theory.signature.sorts.len(), 1, "Preorder should have 1 sort (X)"); + assert_eq!(preorder.theory.signature.relations.len(), 1, "Preorder should have 1 relation (leq)"); + assert_eq!(preorder.theory.axioms.len(), 2, "Preorder should have 2 axioms (refl, trans)"); + + // Check instances + assert!(state.instances.contains_key("Discrete3")); + assert!(state.instances.contains_key("Chain3")); +} + +// ============================================================================ +// Transitive closure example (demonstrates chase algorithm) +// ============================================================================ + +#[test] +fn test_transitive_closure_example_parses() { + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let state = load_geolog_file(path).expect("transitive_closure.geolog should parse and elaborate"); + + // Check theory + let graph = state.theories.get("Graph").expect("Graph theory should exist"); + assert_eq!(graph.theory.signature.sorts.len(), 1, "Graph should have 1 sort (V)"); + assert_eq!(graph.theory.signature.relations.len(), 2, "Graph should have 2 relations (Edge, Path)"); + assert_eq!(graph.theory.axioms.len(), 2, "Graph should have 2 axioms (base, trans)"); + + // Check instances + assert!(state.instances.contains_key("Chain"), "Chain instance should exist"); + assert!(state.instances.contains_key("Diamond"), "Diamond instance should exist"); + assert!(state.instances.contains_key("Cycle"), "Cycle instance should exist"); +} + +#[test] +fn test_transitive_closure_chain_structure() { + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let state = load_geolog_file(path).unwrap(); + + let chain = state.instances.get("Chain").unwrap(); + + // Chain has 4 vertices + assert_eq!(chain.structure.carrier_size(0), 4, "Chain should have 4 vertices"); + + // With `= chase { ... }`, axioms are applied during elaboration. + // Path now has 6 tuples (transitive closure computed automatically). + use geolog::core::RelationStorage; + assert_eq!(chain.structure.relations[0].len(), 3, "Chain should have 3 Edge tuples"); + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should have 6 Path tuples after chase: 3 base + 2 one-step + 1 two-step"); +} + +#[test] +fn test_transitive_closure_chase() { + use geolog::core::RelationStorage; + use geolog::query::chase::chase_fixpoint; + use geolog::universe::Universe; + + let path = Path::new("examples/geolog/transitive_closure.geolog"); + let mut state = load_geolog_file(path).unwrap(); + + let chain = state.instances.get_mut("Chain").unwrap(); + let theory = state.theories.get("Graph").unwrap(); + + // Chase already ran during elaboration (instance uses `= chase { ... }`), + // so Path already has 6 tuples. + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should have 6 Path tuples after elaboration with chase"); + + // Running chase again should be idempotent (1 iteration, no changes) + let mut universe = Universe::new(); + + let iterations = chase_fixpoint( + &theory.theory.axioms, + &mut chain.structure, + &mut universe, + &theory.theory.signature, + 100, + ).unwrap(); + + // Should converge immediately (already at fixpoint) + assert_eq!(iterations, 1, "Chase should converge in 1 iteration when already at fixpoint"); + + // Still have 6 Path tuples + assert_eq!(chain.structure.relations[1].len(), 6, + "Chain should still have 6 Path tuples"); +} + +// ============================================================================ +// Theories: GeologMeta and RelAlgIR +// ============================================================================ + +#[test] +fn test_geolog_meta_loads() { + let path = Path::new("theories/GeologMeta.geolog"); + let state = load_geolog_file(path).expect("GeologMeta.geolog should parse and elaborate"); + + let meta = state.theories.get("GeologMeta").expect("GeologMeta theory should exist"); + + // GeologMeta is a large theory: 41 sorts, 78 functions, 3 relations, 16 axioms + assert_eq!(meta.theory.signature.sorts.len(), 41, "GeologMeta should have 41 sorts"); + assert_eq!(meta.theory.signature.functions.len(), 78, "GeologMeta should have 78 functions"); + assert_eq!(meta.theory.signature.relations.len(), 3, "GeologMeta should have 3 relations"); + assert_eq!(meta.theory.axioms.len(), 16, "GeologMeta should have 16 axioms"); + + // Check some key sorts exist + assert!(meta.theory.signature.lookup_sort("Theory").is_some(), "Theory sort should exist"); + assert!(meta.theory.signature.lookup_sort("Srt").is_some(), "Srt sort should exist"); + assert!(meta.theory.signature.lookup_sort("Func").is_some(), "Func sort should exist"); + assert!(meta.theory.signature.lookup_sort("Elem").is_some(), "Elem sort should exist"); +} + +#[test] +fn test_relalg_ir_loads() { + // First load GeologMeta (RelAlgIR extends it) + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + + state.execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state.execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + let ir = state.theories.get("RelAlgIR").expect("RelAlgIR theory should exist"); + + // RelAlgIR has 80 sorts (41 from GeologMeta + 39 own) + assert_eq!(ir.theory.signature.sorts.len(), 80, "RelAlgIR should have 80 sorts"); + + // Check GeologMeta sorts are correctly qualified + assert!(ir.theory.signature.lookup_sort("GeologMeta/Srt").is_some(), + "GeologMeta/Srt should exist (inherited sort)"); + assert!(ir.theory.signature.lookup_sort("GeologMeta/Func").is_some(), + "GeologMeta/Func should exist (inherited sort)"); + + // Check RelAlgIR's own sorts exist (no prefix) + assert!(ir.theory.signature.lookup_sort("Wire").is_some(), + "Wire sort should exist"); + assert!(ir.theory.signature.lookup_sort("Op").is_some(), + "Op sort should exist"); + assert!(ir.theory.signature.lookup_sort("ScanOp").is_some(), + "ScanOp sort should exist"); + + // Check functions are correctly qualified + // GeologMeta's "Func/dom" should become "GeologMeta/Func/dom" + assert!(ir.theory.signature.lookup_func("GeologMeta/Func/dom").is_some(), + "GeologMeta/Func/dom should exist (inherited function)"); + assert!(ir.theory.signature.lookup_func("GeologMeta/Func/cod").is_some(), + "GeologMeta/Func/cod should exist (inherited function)"); + + // RelAlgIR's own functions + assert!(ir.theory.signature.lookup_func("Wire/schema").is_some(), + "Wire/schema should exist"); + assert!(ir.theory.signature.lookup_func("ScanOp/out").is_some(), + "ScanOp/out should exist"); + + // Check functions referencing inherited sorts have correct domain/codomain + // ScanOp/srt : ScanOp -> GeologMeta/Srt + let scan_srt = ir.theory.signature.lookup_func("ScanOp/srt") + .expect("ScanOp/srt should exist"); + let func_info = &ir.theory.signature.functions[scan_srt]; + match &func_info.codomain { + geolog::core::DerivedSort::Base(sort_id) => { + let sort_name = &ir.theory.signature.sorts[*sort_id]; + assert_eq!(sort_name, "GeologMeta/Srt", + "ScanOp/srt codomain should be GeologMeta/Srt"); + } + _ => panic!("ScanOp/srt codomain should be a base sort"), + } +} + +// ============================================================================ +// RelAlgIR query plan examples +// ============================================================================ + +/// Test that RelAlgIR instances can be created and represent query plans +/// +/// These instances use `= chase { ... }` to derive relations from axioms. +/// The chase handles function applications in premises (e.g., `s ScanOp/out = w`) +/// and universal conclusions (e.g., `forall x. |- R(x,x)`). +#[test] +fn test_relalg_simple_examples() { + // Load theories first + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + let examples_content = fs::read_to_string("examples/geolog/relalg_simple.geolog") + .expect("Failed to read relalg_simple.geolog"); + + let mut state = ReplState::new(); + + state.execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state.execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + state.execute_geolog(&examples_content) + .expect("relalg_simple.geolog should load"); + + // Check ScanV instance + let scan_v = state.instances.get("ScanV") + .expect("ScanV instance should exist"); + assert_eq!(scan_v.structure.len(), 7, "ScanV should have 7 elements"); + + // Check FilterScan instance + let filter_scan = state.instances.get("FilterScan") + .expect("FilterScan instance should exist"); + assert_eq!(filter_scan.structure.len(), 18, "FilterScan should have 18 elements"); + + // Verify FilterScan has the expected sorts populated + // Get RelAlgIR theory for sort lookups + let ir = state.theories.get("RelAlgIR").expect("RelAlgIR should exist"); + + // Check Wire sort has 2 elements (w1, w2) + let wire_sort = ir.theory.signature.lookup_sort("Wire").expect("Wire sort"); + assert_eq!( + filter_scan.structure.carrier_size(wire_sort), 2, + "FilterScan should have 2 Wire elements" + ); + + // Check FilterOp sort has 1 element + let filter_sort = ir.theory.signature.lookup_sort("FilterOp").expect("FilterOp sort"); + assert_eq!( + filter_scan.structure.carrier_size(filter_sort), 1, + "FilterScan should have 1 FilterOp element" + ); + + // Check ScanOp sort has 1 element + let scan_sort = ir.theory.signature.lookup_sort("ScanOp").expect("ScanOp sort"); + assert_eq!( + filter_scan.structure.carrier_size(scan_sort), 1, + "FilterScan should have 1 ScanOp element" + ); +} + +// ============================================================================ +// RelAlgIR compile → execute roundtrip +// ============================================================================ + +/// Helper to load RelAlgIR theory for tests +fn load_relalg_for_test() -> (ReplState, std::rc::Rc) { + let meta_content = fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + let relalg_theory = state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone(); + + (state, relalg_theory) +} + +/// Helper to verify roundtrip: direct execution == RelAlgIR execution +fn verify_roundtrip( + plan: &geolog::query::backend::QueryOp, + target: &geolog::core::Structure, + relalg_theory: &geolog::core::ElaboratedTheory, + description: &str, +) { + use geolog::query::backend::execute; + use geolog::query::from_relalg::execute_relalg; + use geolog::query::to_relalg::compile_to_relalg; + use geolog::universe::Universe; + + // Execute directly + let direct_result = execute(plan, target); + + // Compile to RelAlgIR + let mut universe = Universe::new(); + let relalg_instance = compile_to_relalg(plan, &std::rc::Rc::new(relalg_theory.clone()), &mut universe) + .unwrap_or_else(|e| panic!("{}: Compilation failed: {}", description, e)); + + // Execute via RelAlgIR interpreter + let relalg_result = execute_relalg(&relalg_instance, relalg_theory, target, None) + .unwrap_or_else(|e| panic!("{}: RelAlgIR execution failed: {}", description, e)); + + // Compare results + assert_eq!( + direct_result.len(), + relalg_result.len(), + "{}: Length mismatch ({} vs {})", + description, + direct_result.len(), + relalg_result.len() + ); + + for (tuple, mult) in direct_result.iter() { + assert_eq!( + relalg_result.tuples.get(tuple), + Some(mult), + "{}: Tuple {:?} has wrong multiplicity", + description, + tuple + ); + } +} + +/// Tests that we can compile a query to RelAlgIR and then execute it, +/// getting the same results as direct execution. +#[test] +fn test_relalg_compile_execute_roundtrip() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create a simple test structure with 3 elements in sort 0 + let mut target = Structure::new(1); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[0].insert(2); + + // Test Scan + let scan_plan = QueryOp::Scan { sort_idx: 0 }; + verify_roundtrip(&scan_plan, &target, &relalg_theory, "Scan"); +} + +#[test] +fn test_relalg_roundtrip_filter() { + use geolog::core::Structure; + use geolog::id::{NumericId, Slid}; + use geolog::query::backend::{Predicate, QueryOp}; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure with 5 elements + let mut target = Structure::new(1); + for i in 0..5 { + target.carriers[0].insert(i); + } + + // Filter with True predicate (should keep all) + let filter_true = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::True, + }; + verify_roundtrip(&filter_true, &target, &relalg_theory, "Filter(True)"); + + // Filter with False predicate (should keep none) + let filter_false = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::False, + }; + verify_roundtrip(&filter_false, &target, &relalg_theory, "Filter(False)"); + + // Filter with ColEqConst + let filter_const = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(2), + }, + }; + verify_roundtrip(&filter_const, &target, &relalg_theory, "Filter(ColEqConst)"); +} + +#[test] +fn test_relalg_roundtrip_join() { + use geolog::core::Structure; + use geolog::query::backend::{JoinCond, QueryOp}; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure with 2 sorts + let mut target = Structure::new(2); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[1].insert(10); + target.carriers[1].insert(11); + target.carriers[1].insert(12); + + // Cross join + let cross_join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + verify_roundtrip(&cross_join, &target, &relalg_theory, "Join(Cross)"); +} + +#[test] +fn test_relalg_roundtrip_union() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure + let mut target = Structure::new(2); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[1].insert(2); + target.carriers[1].insert(3); + + // Union of two scans + let union_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + }; + verify_roundtrip(&union_plan, &target, &relalg_theory, "Union"); +} + +#[test] +fn test_relalg_roundtrip_distinct_negate() { + use geolog::core::Structure; + use geolog::query::backend::QueryOp; + + let (_, relalg_theory) = load_relalg_for_test(); + + // Create structure + let mut target = Structure::new(1); + target.carriers[0].insert(0); + target.carriers[0].insert(1); + target.carriers[0].insert(2); + + // Distinct + let distinct_plan = QueryOp::Distinct { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + verify_roundtrip(&distinct_plan, &target, &relalg_theory, "Distinct"); + + // Negate + let negate_plan = QueryOp::Negate { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + verify_roundtrip(&negate_plan, &target, &relalg_theory, "Negate"); +} + +// ============================================================================ +// Meta-test: all examples should parse +// ============================================================================ + +/// Tests that all standalone .geolog example files parse and elaborate. +/// +/// Note: Some examples require loading theories first (e.g., relalg_simple.geolog +/// requires GeologMeta and RelAlgIR). These are tested separately. +#[test] +fn test_all_examples_parse() { + let examples_dir = Path::new("examples/geolog"); + + if !examples_dir.exists() { + panic!("examples/geolog directory does not exist"); + } + + // Examples that require loading theories first (tested separately) + let requires_theories = [ + "relalg_simple.geolog", + ]; + + let mut failures = Vec::new(); + + for entry in fs::read_dir(examples_dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + + // Skip files that require loading theories first + if path.file_name() + .and_then(|f| f.to_str()) + .is_some_and(|file_name| requires_theories.contains(&file_name)) + { + continue; + } + + if path.extension().is_some_and(|ext| ext == "geolog") + && let Err(e) = load_geolog_file(&path) { + failures.push(format!("{}: {}", path.display(), e)); + } + } + + if !failures.is_empty() { + panic!( + "The following example files failed to parse/elaborate:\n{}", + failures.join("\n") + ); + } +} + +// ============================================================================ +// Multi-session persistence tests +// ============================================================================ + +/// Tests that theories and instances survive REPL restarts +/// +/// This is a critical test for persistence: create data in one "session", +/// then verify it's still accessible after creating a new ReplState with +/// the same persistence path. +#[test] +fn test_persistence_survives_restart() { + use tempfile::tempdir; + + let dir = tempdir().expect("Failed to create temp directory"); + let db_path = dir.path().join("test.geolog"); + + // Session 1: Create a theory and instance + { + let mut state = ReplState::with_path(&db_path); + + let theory_def = r#" + theory Counter { + C : Sort; + next : C -> C; + } + "#; + state.execute_geolog(theory_def).expect("Theory should define"); + + let instance_def = r#" + instance Mod3 : Counter = { + zero : C; + one : C; + two : C; + zero next = one; + one next = two; + two next = zero; + } + "#; + state.execute_geolog(instance_def).expect("Instance should define"); + + // Verify it's in the current session + assert!(state.theories.contains_key("Counter"), "Counter theory should exist in session 1"); + assert!(state.instances.contains_key("Mod3"), "Mod3 instance should exist in session 1"); + + // Explicitly save before dropping + state.store.save().expect("Save should succeed"); + } + + // Session 2: Load from same path and verify data persists + { + let state = ReplState::with_path(&db_path); + + // Theory should be reconstructed + assert!( + state.theories.contains_key("Counter"), + "Counter theory should persist across sessions" + ); + + // Instance should be reconstructed + assert!( + state.instances.contains_key("Mod3"), + "Mod3 instance should persist across sessions" + ); + + // Verify instance structure has correct element count + let mod3 = state.instances.get("Mod3").expect("Mod3 should exist"); + assert_eq!( + mod3.structure.len(), + 3, + "Mod3 should have 3 elements after reload" + ); + } +} + +/// Tests that chase-derived data persists correctly +#[test] +fn test_persistence_with_chase() { + use tempfile::tempdir; + + let dir = tempdir().expect("Failed to create temp directory"); + let db_path = dir.path().join("chase_test.geolog"); + + // Session 1: Create a theory with chase + { + let mut state = ReplState::with_path(&db_path); + + let content = r#" + theory Preorder { + X : Sort; + leq : [lo: X, hi: X] -> Prop; + ax/refl : forall x : X. |- [lo: x, hi: x] leq; + } + + instance Three : Preorder = chase { + a : X; + b : X; + c : X; + } + "#; + state.execute_geolog(content).expect("Should define theory and chase instance"); + + // Verify chase added diagonal tuples + let three = state.instances.get("Three").expect("Three should exist"); + assert_eq!(three.structure.relations[0].tuples.len(), 3, "Should have 3 reflexive tuples"); + + // Explicitly save before dropping + state.store.save().expect("Save should succeed"); + } + + // Session 2: Verify chase results persist + { + let state = ReplState::with_path(&db_path); + + assert!(state.theories.contains_key("Preorder"), "Theory should persist"); + assert!(state.instances.contains_key("Three"), "Instance should persist"); + + let three = state.instances.get("Three").expect("Three should exist"); + assert_eq!( + three.structure.relations[0].tuples.len(), + 3, + "Chase-derived tuples should persist" + ); + } +} diff --git a/tests/generators.rs b/tests/generators.rs new file mode 100644 index 0000000..82b0c38 --- /dev/null +++ b/tests/generators.rs @@ -0,0 +1,375 @@ +//! Proptest generators for geolog data structures +//! +//! Provides `Strategy` implementations for generating valid instances +//! of core data types used in property tests. + +#![allow(dead_code)] + +use geolog::core::{SortId, Structure}; +use geolog::id::{NumericId, Slid, Uuid}; +use geolog::naming::NamingIndex; +use geolog::universe::Universe; +use proptest::collection::vec; +use proptest::prelude::*; +use std::collections::HashSet; + +// ============================================================================ +// UUID Generation +// ============================================================================ + +/// Generate arbitrary UUIDs (using v7 format) +pub fn arb_uuid() -> impl Strategy { + // Generate random bytes for the UUID + prop::array::uniform16(any::()).prop_map(|bytes| { + // Create a valid v7-ish UUID from random bytes + Uuid::from_bytes(bytes) + }) +} + +/// Generate a vector of unique UUIDs +pub fn arb_unique_uuids(count: usize) -> impl Strategy> { + vec(arb_uuid(), count..=count).prop_filter_map("unique uuids", |uuids| { + let set: HashSet<_> = uuids.iter().collect(); + if set.len() == uuids.len() { + Some(uuids) + } else { + None + } + }) +} + +// ============================================================================ +// Name Generation +// ============================================================================ + +/// Generate a valid identifier (alphanumeric, starting with letter) +pub fn arb_identifier() -> impl Strategy { + "[a-zA-Z][a-zA-Z0-9_]{0,15}".prop_map(String::from) +} + +/// Generate a qualified name path (non-empty vector of identifiers) +pub fn arb_qualified_name() -> impl Strategy> { + vec(arb_identifier(), 1..=3) +} + +// ============================================================================ +// Structure Generation +// ============================================================================ + +/// Parameters for structure generation +#[derive(Debug, Clone)] +pub struct StructureParams { + pub num_sorts: usize, + pub max_elements_per_sort: usize, +} + +impl Default for StructureParams { + fn default() -> Self { + Self { + num_sorts: 3, + max_elements_per_sort: 5, + } + } +} + +/// Generate a valid Structure with elements distributed across sorts +pub fn arb_structure(params: StructureParams) -> impl Strategy { + // Generate element counts for each sort + vec(0..=params.max_elements_per_sort, params.num_sorts) + .prop_flat_map(move |element_counts| { + let num_sorts = params.num_sorts; + Just((element_counts, num_sorts)) + }) + .prop_map(|(element_counts, num_sorts)| { + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + for (sort_id, &count) in element_counts.iter().enumerate() { + for _ in 0..count { + structure.add_element(&mut universe, sort_id as SortId); + } + } + + (structure, universe) + }) +} + +/// Generate a structure with specific element count +pub fn arb_structure_with_elements( + num_sorts: usize, + total_elements: usize, +) -> impl Strategy { + // Distribute elements randomly across sorts + vec(0..num_sorts, total_elements).prop_map(move |sort_assignments| { + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + for sort_id in sort_assignments { + structure.add_element(&mut universe, sort_id as SortId); + } + + (structure, universe) + }) +} + +// ============================================================================ +// NamingIndex Generation +// ============================================================================ + +/// Generate a NamingIndex with random entries +pub fn arb_naming_index(max_entries: usize) -> impl Strategy { + vec((arb_uuid(), arb_qualified_name()), 0..=max_entries).prop_filter_map( + "unique uuids in naming", + |entries| { + // Ensure UUIDs are unique + let uuids: HashSet<_> = entries.iter().map(|(u, _)| u).collect(); + if uuids.len() == entries.len() { + let mut index = NamingIndex::new(); + for (uuid, name) in entries { + index.insert(uuid, name); + } + Some(index) + } else { + None + } + }, + ) +} + +/// Generate a NamingIndex that matches a Universe (same UUIDs) +pub fn arb_naming_for_universe(universe: &Universe) -> impl Strategy { + let uuids: Vec = universe.iter().map(|(_, uuid)| uuid).collect(); + let count = uuids.len(); + + vec(arb_qualified_name(), count).prop_map(move |names| { + let mut index = NamingIndex::new(); + for (uuid, name) in uuids.iter().zip(names.into_iter()) { + index.insert(*uuid, name); + } + index + }) +} + +// ============================================================================ +// Element Operations (for testing add/remove sequences) +// ============================================================================ + +/// An operation on a structure +#[derive(Debug, Clone)] +pub enum StructureOp { + AddElement { sort_id: SortId }, +} + +/// Generate a sequence of structure operations +pub fn arb_structure_ops( + num_sorts: usize, + max_ops: usize, +) -> impl Strategy> { + vec( + (0..num_sorts).prop_map(|sort_id| StructureOp::AddElement { sort_id }), + 0..=max_ops, + ) +} + +// ============================================================================ +// Test Helpers +// ============================================================================ + +/// Check that a Structure maintains its internal invariants +pub fn check_structure_invariants(structure: &Structure) -> Result<(), String> { + // Invariant 1: luids and sorts have same length + if structure.luids.len() != structure.sorts.len() { + return Err(format!( + "luids.len({}) != sorts.len({})", + structure.luids.len(), + structure.sorts.len() + )); + } + + // Invariant 2: luid_to_slid is inverse of luids + for (slid_idx, &luid) in structure.luids.iter().enumerate() { + let slid = Slid::from_usize(slid_idx); + match structure.luid_to_slid.get(&luid) { + Some(&mapped_slid) if mapped_slid == slid => {} + Some(&mapped_slid) => { + return Err(format!( + "luid_to_slid[{}] = {}, but luids[{}] = {}", + luid, mapped_slid, slid, luid + )); + } + None => { + return Err(format!( + "luid {} at slid {} not in luid_to_slid", + luid, slid + )); + } + } + } + + // Invariant 3: Each element appears in exactly one carrier, matching its sort + for (slid, &sort_id) in structure.sorts.iter().enumerate() { + if sort_id >= structure.carriers.len() { + return Err(format!( + "sort_id {} at slid {} >= carriers.len({})", + sort_id, + slid, + structure.carriers.len() + )); + } + + if !structure.carriers[sort_id].contains(slid as u64) { + return Err(format!( + "slid {} with sort {} not in carriers[{}]", + slid, sort_id, sort_id + )); + } + + // Check it's not in any other carrier + for (other_sort, carrier) in structure.carriers.iter().enumerate() { + if other_sort != sort_id && carrier.contains(slid as u64) { + return Err(format!( + "slid {} appears in carrier {} but has sort {}", + slid, other_sort, sort_id + )); + } + } + } + + // Invariant 4: Total carrier size equals number of elements + let total_carrier_size: usize = structure.carriers.iter().map(|c| c.len() as usize).sum(); + if total_carrier_size != structure.luids.len() { + return Err(format!( + "total carrier size {} != luids.len({})", + total_carrier_size, + structure.luids.len() + )); + } + + Ok(()) +} + +/// Check that two structures are equivalent (same elements and functions) +pub fn structures_equivalent(s1: &Structure, s2: &Structure, u1: &Universe, u2: &Universe) -> bool { + // Same number of sorts + if s1.num_sorts() != s2.num_sorts() { + return false; + } + + // Same number of elements + if s1.len() != s2.len() { + return false; + } + + // Same UUIDs (via Luid lookup) + let uuids1: HashSet<_> = s1.luids.iter().filter_map(|&luid| u1.get(luid)).collect(); + let uuids2: HashSet<_> = s2.luids.iter().filter_map(|&luid| u2.get(luid)).collect(); + + uuids1 == uuids2 +} + +// ============================================================================ +// Tensor Generation +// ============================================================================ + +use geolog::tensor::SparseTensor; +use std::collections::BTreeSet; + +/// Parameters for sparse tensor generation +#[derive(Debug, Clone)] +pub struct TensorParams { + pub max_dims: usize, + pub max_dim_size: usize, + pub max_tuples: usize, +} + +impl Default for TensorParams { + fn default() -> Self { + Self { + max_dims: 4, + max_dim_size: 10, + max_tuples: 20, + } + } +} + +/// Generate a random sparse tensor +pub fn arb_sparse_tensor(params: TensorParams) -> impl Strategy { + // First generate dimensions + vec(1..=params.max_dim_size, 0..=params.max_dims).prop_flat_map(move |dims| { + let dims_clone = dims.clone(); + let max_tuples = params.max_tuples; + + // Generate tuples within the dimension bounds + if dims.is_empty() { + // Scalar tensor - either true or false + any::() + .prop_map(|value| { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + SparseTensor { dims: vec![], extent } + }) + .boxed() + } else { + // Generate random tuples + let tuple_gen = dims + .iter() + .map(|&d| 0..d) + .collect::>(); + + vec(tuple_gen.prop_map(|indices| indices), 0..=max_tuples) + .prop_map(move |tuples| { + let extent: BTreeSet> = tuples.into_iter().collect(); + SparseTensor { + dims: dims_clone.clone(), + extent, + } + }) + .boxed() + } + }) +} + +/// Generate a sparse tensor with specific dimensions +pub fn arb_sparse_tensor_with_dims(dims: Vec, max_tuples: usize) -> impl Strategy { + if dims.is_empty() { + any::() + .prop_map(|value| { + let mut extent = BTreeSet::new(); + if value { + extent.insert(vec![]); + } + SparseTensor { dims: vec![], extent } + }) + .boxed() + } else { + let tuple_gen: Vec<_> = dims.iter().map(|&d| 0..d).collect(); + let dims_clone = dims.clone(); + + vec(tuple_gen.prop_map(|indices| indices), 0..=max_tuples) + .prop_map(move |tuples| { + let extent: BTreeSet> = tuples.into_iter().collect(); + SparseTensor { + dims: dims_clone.clone(), + extent, + } + }) + .boxed() + } +} + +/// Generate a pair of tensors with matching dimensions (for disjunction tests) +pub fn arb_tensor_pair_same_dims(params: TensorParams) -> impl Strategy { + vec(1..=params.max_dim_size, 0..=params.max_dims).prop_flat_map(move |dims| { + let max_tuples = params.max_tuples; + let t1 = arb_sparse_tensor_with_dims(dims.clone(), max_tuples); + let t2 = arb_sparse_tensor_with_dims(dims, max_tuples); + (t1, t2) + }) +} + +/// Generate variable names +pub fn arb_var_names(count: usize) -> impl Strategy> { + Just((0..count).map(|i| format!("v{}", i)).collect()) +} diff --git a/tests/manual_fuzz.rs b/tests/manual_fuzz.rs new file mode 100644 index 0000000..d810eda --- /dev/null +++ b/tests/manual_fuzz.rs @@ -0,0 +1,188 @@ +//! Quick manual fuzzer - run with: cargo test --release manual_fuzz -- --ignored --nocapture + +use geolog::repl::ReplState; +use rand::prelude::*; +use std::time::Instant; + +fn random_ascii_string(rng: &mut impl Rng, len: usize) -> String { + (0..len).map(|_| rng.random_range(0x20u8..0x7F) as char).collect() +} + +fn random_geolog_like(rng: &mut impl Rng) -> String { + let keywords = ["theory", "instance", "Sort", "Prop", "forall", "exists", "chase"]; + let ops = [":", "->", "=", "|-", "{", "}", "[", "]", "(", ")", ";", ",", "."]; + let idents = ["x", "y", "z", "A", "B", "foo", "bar", "src", "tgt"]; + + let mut s = String::new(); + let len = rng.random_range(1..200); + for _ in 0..len { + match rng.random_range(0..4) { + 0 => s.push_str(keywords.choose(rng).unwrap()), + 1 => s.push_str(ops.choose(rng).unwrap()), + 2 => s.push_str(idents.choose(rng).unwrap()), + _ => s.push(' '), + } + if rng.random_bool(0.3) { s.push(' '); } + } + s +} + +#[test] +#[ignore] +fn manual_fuzz_parser() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + while start.elapsed().as_secs() < 10 { + let len = rng.random_range(1usize..500); + let input = if rng.random_bool(0.5) { + random_ascii_string(&mut rng, len) + } else { + random_geolog_like(&mut rng) + }; + + // This should never panic + let result = std::panic::catch_unwind(|| { + let _ = geolog::parse(&input); + }); + + if result.is_err() { + eprintln!("PANIC on input: {:?}", input); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} iterations, {} panics", count, errors); + assert_eq!(errors, 0, "Parser panicked on some inputs!"); +} + +#[test] +#[ignore] +fn manual_fuzz_repl() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + while start.elapsed().as_secs() < 10 { + let len = rng.random_range(1usize..500); + let input = if rng.random_bool(0.5) { + random_ascii_string(&mut rng, len) + } else { + random_geolog_like(&mut rng) + }; + + let result = std::panic::catch_unwind(|| { + let mut state = ReplState::new(); + let _ = state.execute_geolog(&input); + }); + + if result.is_err() { + eprintln!("PANIC on input: {:?}", input); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} iterations, {} panics", count, errors); + assert_eq!(errors, 0, "REPL panicked on some inputs!"); +} + +/// More aggressive fuzzer with edge-case generators +#[test] +#[ignore] +fn manual_fuzz_edge_cases() { + let mut rng = rand::rng(); + let start = Instant::now(); + let mut count = 0; + let mut errors = 0; + + // Edge case generators + let edge_cases: Vec String> = vec![ + // Deep nesting + |rng: &mut rand::rngs::ThreadRng| { + let depth = rng.random_range(10..100); + let mut s = "theory T { ".repeat(depth); + s.push_str(&"}".repeat(depth)); + s + }, + // Very long identifiers + |rng: &mut rand::rngs::ThreadRng| { + let len = rng.random_range(1000..10000); + format!("theory {} {{ }}", "a".repeat(len)) + }, + // Many small tokens + |rng: &mut rand::rngs::ThreadRng| { + let count = rng.random_range(100..1000); + (0..count).map(|_| "x ").collect::() + }, + // Unicode stress + |_rng: &mut rand::rngs::ThreadRng| { + "theory 日本語 { ∀ : Sort; ∃ : Sort -> Prop; }".to_string() + }, + // Null bytes and control chars + |rng: &mut rand::rngs::ThreadRng| { + let mut s = String::from("theory T { "); + for _ in 0..rng.random_range(1..50) { + s.push(rng.random_range(0u8..32) as char); + } + s.push_str(" }"); + s + }, + // Deeply nested records + |rng: &mut rand::rngs::ThreadRng| { + let depth = rng.random_range(5..30); + let mut s = String::from("theory T { f : "); + for _ in 0..depth { + s.push_str("[x: "); + } + s.push_str("Sort"); + for _ in 0..depth { + s.push_str("]"); + } + s.push_str(" -> Prop; }"); + s + }, + // Many axioms + |rng: &mut rand::rngs::ThreadRng| { + let count = rng.random_range(50..200); + let mut s = String::from("theory T { X : Sort; "); + for i in 0..count { + s.push_str(&format!("ax{} : forall x : X. |- x = x; ", i)); + } + s.push('}'); + s + }, + // Pathological chase + |_rng: &mut rand::rngs::ThreadRng| { + r#" + theory Loop { X : Sort; r : [a: X, b: X] -> Prop; + ax : forall x : X. |- exists y : X. [a: x, b: y] r; + } + instance I : Loop = chase { start : X; } + "#.to_string() + }, + ]; + + while start.elapsed().as_secs() < 30 { + let gen_idx = rng.random_range(0..edge_cases.len()); + let input = edge_cases[gen_idx](&mut rng); + + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut state = ReplState::new(); + let _ = state.execute_geolog(&input); + })); + + if result.is_err() { + eprintln!("PANIC on input (gen {}): {:?}", gen_idx, &input[..input.len().min(200)]); + errors += 1; + } + count += 1; + } + + eprintln!("Ran {} edge-case iterations, {} panics", count, errors); + assert_eq!(errors, 0, "REPL panicked on edge cases!"); +} diff --git a/tests/negative/solution2_incomplete_negative_test.geolog b/tests/negative/solution2_incomplete_negative_test.geolog new file mode 100644 index 0000000..f3ee078 --- /dev/null +++ b/tests/negative/solution2_incomplete_negative_test.geolog @@ -0,0 +1,218 @@ +// NEGATIVE TEST: This file contains an INTENTIONALLY INCOMPLETE solution. +// +// The trace for problem2 is missing the wire connecting f1's output to f2's input. +// When Trace theory has proper wire axioms, this should FAIL axiom checking. +// +// This file serves as a regression test: if this ever starts passing, +// either the axioms are broken or the solver has a bug. + +// ============================================================ +// THEORY: PetriNet +// ============================================================ + +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +// ============================================================ +// THEORY: Marking +// ============================================================ + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} + +// ============================================================ +// THEORY: ReachabilityProblem +// ============================================================ + +theory (N : PetriNet instance) ReachabilityProblem { + initial_marking : N Marking instance; + target_marking : N Marking instance; +} + +// ============================================================ +// THEORY: Trace (with Wire axioms) +// +// A trace records transition firings and token flow via wires. +// The completeness axioms (ax5, ax6) ensure every arc is accounted for. +// ============================================================ + +theory (N : PetriNet instance) Trace { + // Firings + F : Sort; + F/of : F -> N/T; + + // Wires connect output arcs of firings to input arcs of other firings + W : Sort; + W/src_firing : W -> F; + W/src_arc : W -> N/out; + W/tgt_firing : W -> F; + W/tgt_arc : W -> N/in; + + // Wire coherence: source arc must belong to source firing's transition + ax/wire_src_coherent : forall w : W. + |- w W/src_arc N/out/src = w W/src_firing F/of; + + // Wire coherence: target arc must belong to target firing's transition + ax/wire_tgt_coherent : forall w : W. + |- w W/tgt_arc N/in/tgt = w W/tgt_firing F/of; + + // Wire place coherence: wire connects matching places + ax/wire_place_coherent : forall w : W. + |- w W/src_arc N/out/tgt = w W/tgt_arc N/in/src; + + // Terminals + input_terminal : Sort; + output_terminal : Sort; + input_terminal/of : input_terminal -> N/P; + output_terminal/of : output_terminal -> N/P; + + // Terminals connect to specific firings and arcs + input_terminal/tgt_firing : input_terminal -> F; + input_terminal/tgt_arc : input_terminal -> N/in; + output_terminal/src_firing : output_terminal -> F; + output_terminal/src_arc : output_terminal -> N/out; + + // Terminal coherence axioms + ax/input_terminal_coherent : forall i : input_terminal. + |- i input_terminal/tgt_arc N/in/tgt = i input_terminal/tgt_firing F/of; + + ax/output_terminal_coherent : forall o : output_terminal. + |- o output_terminal/src_arc N/out/src = o output_terminal/src_firing F/of; + + // Terminal place coherence + ax/input_terminal_place : forall i : input_terminal. + |- i input_terminal/of = i input_terminal/tgt_arc N/in/src; + + ax/output_terminal_place : forall o : output_terminal. + |- o output_terminal/of = o output_terminal/src_arc N/out/tgt; + + // COMPLETENESS: Every arc of every firing must be accounted for. + + // Input completeness: catches the missing wire in solution2! + ax/input_complete : forall f : F, arc : N/in. + arc N/in/tgt = f F/of |- + (exists w : W. w W/tgt_firing = f, w W/tgt_arc = arc) \/ + (exists i : input_terminal. i input_terminal/tgt_firing = f, i input_terminal/tgt_arc = arc); + + // Output completeness: every output arc must be captured + ax/output_complete : forall f : F, arc : N/out. + arc N/out/src = f F/of |- + (exists w : W. w W/src_firing = f, w W/src_arc = arc) \/ + (exists o : output_terminal. o output_terminal/src_firing = f, o output_terminal/src_arc = arc); +} + +// ============================================================ +// THEORY: Iso +// ============================================================ + +theory (X : Sort) (Y : Sort) Iso { + fwd : X -> Y; + bwd : Y -> X; + + // Roundtrip axioms ensure this is a true bijection + fb : forall x : X. |- x fwd bwd = x; + bf : forall y : Y. |- y bwd fwd = y; +} + +// ============================================================ +// THEORY: Solution +// ============================================================ + +theory (N : PetriNet instance) (RP : N ReachabilityProblem instance) Solution { + trace : N Trace instance; + initial_iso : (trace/input_terminal) (RP/initial_marking/token) Iso instance; + target_iso : (trace/output_terminal) (RP/target_marking/token) Iso instance; +} + +// ============================================================ +// INSTANCE: ExampleNet +// ============================================================ + +instance ExampleNet : PetriNet = { + A : P; B : P; C : P; + ab : T; ba : T; abc : T; + + ab_in : in; ab_in in/src = A; ab_in in/tgt = ab; + ab_out : out; ab_out out/src = ab; ab_out out/tgt = B; + + ba_in : in; ba_in in/src = B; ba_in in/tgt = ba; + ba_out : out; ba_out out/src = ba; ba_out out/tgt = A; + + abc_in1 : in; abc_in1 in/src = A; abc_in1 in/tgt = abc; + abc_in2 : in; abc_in2 in/src = B; abc_in2 in/tgt = abc; + abc_out : out; abc_out out/src = abc; abc_out out/tgt = C; +} + +// ============================================================ +// PROBLEM 2: Can we reach C from two A-tokens? +// ============================================================ + +instance problem2 : ExampleNet ReachabilityProblem = { + initial_marking = { + t1 : token; t1 token/of = ExampleNet/A; + t2 : token; t2 token/of = ExampleNet/A; + }; + target_marking = { + t : token; + t token/of = ExampleNet/C; + }; +} + +// ============================================================ +// INCOMPLETE SOLUTION 2: This should FAIL! +// +// The trace has two firings (f1: ab, f2: abc) but NO WIRE +// connecting f1's output to f2's B-input. The axiom +// ax/must_be_fed should catch this: f2's abc_in2 arc +// is neither wired nor terminal-fed. +// ============================================================ + +instance solution2_incomplete : ExampleNet problem2 Solution = { + trace = { + f1 : F; f1 F/of = ExampleNet/ab; + f2 : F; f2 F/of = ExampleNet/abc; + + // Input terminals for the two initial A-tokens + it1 : input_terminal; + it1 input_terminal/of = ExampleNet/A; + it1 input_terminal/tgt_firing = f1; + it1 input_terminal/tgt_arc = ExampleNet/ab_in; + + it2 : input_terminal; + it2 input_terminal/of = ExampleNet/A; + it2 input_terminal/tgt_firing = f2; + it2 input_terminal/tgt_arc = ExampleNet/abc_in1; + + // Output terminal for the final C-token + ot : output_terminal; + ot output_terminal/of = ExampleNet/C; + ot output_terminal/src_firing = f2; + ot output_terminal/src_arc = ExampleNet/abc_out; + + // INTENTIONALLY MISSING: The wire from f1's ab_out to f2's abc_in2! + // This means f2's abc_in2 (the B-input) is not fed by anything. + }; + + initial_iso = { + trace/it1 fwd = problem2/initial_marking/t1; + trace/it2 fwd = problem2/initial_marking/t2; + problem2/initial_marking/t1 bwd = trace/it1; + problem2/initial_marking/t2 bwd = trace/it2; + }; + + target_iso = { + trace/ot fwd = problem2/target_marking/t; + problem2/target_marking/t bwd = trace/ot; + }; +} diff --git a/tests/proptest_naming.proptest-regressions b/tests/proptest_naming.proptest-regressions new file mode 100644 index 0000000..e56ed90 --- /dev/null +++ b/tests/proptest_naming.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 06ecbb5d81f792dbc38ba7b644be7c170752d98e62f77f39f94b80cb12be32c1 # shrinks to entries = [(00000000-0000-0000-0000-000000000000, ["O"]), (00000000-0000-0000-0000-000000000001, ["O"])] diff --git a/tests/proptest_naming.rs b/tests/proptest_naming.rs new file mode 100644 index 0000000..8000dbb --- /dev/null +++ b/tests/proptest_naming.rs @@ -0,0 +1,218 @@ +//! Property tests for NamingIndex (UUID ↔ Name bidirectional consistency) + +mod generators; + +use geolog::id::Uuid; +use geolog::naming::NamingIndex; +use proptest::prelude::*; +use std::collections::HashSet; +use tempfile::tempdir; + +proptest! { + /// Insert then lookup returns the same name + #[test] + fn insert_get_roundtrip( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + + index.insert(uuid, name.clone()); + + let retrieved = index.get(&uuid); + prop_assert_eq!(retrieved, Some(&name)); + } + + /// Simple name (last component) is correctly extracted + #[test] + fn simple_name_is_last_component( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + index.insert(uuid, name.clone()); + + let simple = index.get_simple(&uuid); + let expected = name.last().map(|s| s.as_str()); + + prop_assert_eq!(simple, expected); + } + + /// lookup(simple_name) contains the UUID + #[test] + fn lookup_contains_uuid( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let mut index = NamingIndex::new(); + index.insert(uuid, name.clone()); + + if let Some(simple) = name.last() { + let results = index.lookup(simple); + prop_assert!(!results.is_empty()); + prop_assert!(results.contains(&uuid)); + } + } + + /// lookup_unique returns Some iff exactly one UUID has that name + #[test] + fn lookup_unique_semantics( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 1..10 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let mut index = NamingIndex::new(); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + + // For each simple name, check lookup_unique semantics + let mut name_counts: std::collections::HashMap> = + std::collections::HashMap::new(); + for (uuid, name) in &unique_entries { + if let Some(simple) = name.last() { + name_counts.entry(simple.clone()).or_default().push(*uuid); + } + } + + for (simple_name, uuids) in name_counts { + let unique_result = index.lookup_unique(&simple_name); + if uuids.len() == 1 { + prop_assert_eq!(unique_result, Some(uuids[0])); + } else { + prop_assert_eq!(unique_result, None); + } + } + } + + /// Ambiguous names (multiple UUIDs) return None for lookup_unique + #[test] + fn ambiguous_names_return_none( + uuid1 in generators::arb_uuid(), + uuid2 in generators::arb_uuid(), + shared_name in generators::arb_identifier() + ) { + prop_assume!(uuid1 != uuid2); + + let mut index = NamingIndex::new(); + index.insert(uuid1, vec!["Theory1".to_string(), shared_name.clone()]); + index.insert(uuid2, vec!["Theory2".to_string(), shared_name.clone()]); + + // lookup returns both + let results = index.lookup(&shared_name); + prop_assert_eq!(results.len(), 2); + prop_assert!(results.contains(&uuid1)); + prop_assert!(results.contains(&uuid2)); + + // lookup_unique returns None + prop_assert_eq!(index.lookup_unique(&shared_name), None); + } + + /// display_name returns the simple name if set, otherwise UUID string + #[test] + fn display_name_fallback(uuid in generators::arb_uuid()) { + let mut index = NamingIndex::new(); + + // Without name: should contain UUID + let display_without = index.display_name(&uuid); + let uuid_str = format!("{}", uuid); + prop_assert!(display_without.contains(&uuid_str)); + + // With name: should be the simple name + let name = vec!["Test".to_string(), "Element".to_string()]; + index.insert(uuid, name); + let display_with = index.display_name(&uuid); + prop_assert_eq!(display_with, "Element"); + } + + /// Save and load preserves all mappings + #[test] + fn save_load_roundtrip( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 1..15 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let dir = tempdir().unwrap(); + let path = dir.path().join("names.bin"); + + // Save + { + let mut index = NamingIndex::with_path(&path); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + index.save().unwrap(); + } + + // Load + { + let loaded = NamingIndex::load(&path).unwrap(); + + for (uuid, name) in &unique_entries { + prop_assert_eq!(loaded.get(uuid), Some(name)); + } + + prop_assert_eq!(loaded.len(), unique_entries.len()); + } + } + + /// Dirty flag consistency + #[test] + fn dirty_flag_consistency( + uuid in generators::arb_uuid(), + name in generators::arb_qualified_name() + ) { + let dir = tempdir().unwrap(); + let path = dir.path().join("names.bin"); + + let mut index = NamingIndex::with_path(&path); + + // Initially clean + prop_assert!(!index.is_dirty()); + + // Dirty after insert + index.insert(uuid, name); + prop_assert!(index.is_dirty()); + + // Clean after save + index.save().unwrap(); + prop_assert!(!index.is_dirty()); + } + + /// Len reflects number of unique UUIDs + #[test] + fn len_reflects_entries( + entries in proptest::collection::vec( + (generators::arb_uuid(), generators::arb_qualified_name()), + 0..20 + ) + ) { + // Filter to unique UUIDs + let mut seen_uuids = HashSet::new(); + let unique_entries: Vec<_> = entries.into_iter() + .filter(|(uuid, _)| seen_uuids.insert(*uuid)) + .collect(); + + let mut index = NamingIndex::new(); + for (uuid, name) in &unique_entries { + index.insert(*uuid, name.clone()); + } + + prop_assert_eq!(index.len(), unique_entries.len()); + prop_assert_eq!(index.is_empty(), unique_entries.is_empty()); + } +} diff --git a/tests/proptest_overlay.proptest-regressions b/tests/proptest_overlay.proptest-regressions new file mode 100644 index 0000000..1a28615 --- /dev/null +++ b/tests/proptest_overlay.proptest-regressions @@ -0,0 +1,10 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 64ebf3015f934d35012468a9c62bc4bed3ccdfef098a2781f1a2e18d876db64f # shrinks to num_sorts = 1, num_elements_per_sort = 1, ops = [AddElement(1)] +cc b7c34753dab453dc9aa1c33b4726646aaa7fc1facd6b4037e6bf9a89aa469ff8 # shrinks to num_sorts = 1, num_elements_per_sort = 0, ops = [AddElement(1)] +cc 1cabffe39859e7c40bed9cc70a038cd0c84b121ffda2b8462c38941490c3b207 # shrinks to num_sorts = 1, num_elements_per_sort = 1, ops = [AddElement(1)] +cc bff7adcc8a25d73683c8b1c0a8dad051539b2ed9bc538d469ffa5d61cd7f34e7 # shrinks to num_sorts = 1, num_elements_per_sort = 0, ops = [AddElement(1)] diff --git a/tests/proptest_overlay.rs b/tests/proptest_overlay.rs new file mode 100644 index 0000000..22cb79a --- /dev/null +++ b/tests/proptest_overlay.rs @@ -0,0 +1,614 @@ +//! Property tests for the overlay system. +//! +//! The key invariant: reads through an overlay should match reads against +//! the materialized (committed) structure. This ensures the overlay correctly +//! represents all accumulated changes. + +use std::collections::{BTreeSet, HashSet}; +use std::sync::Arc; + +use geolog::core::{SortId, Structure}; +use geolog::id::{Luid, NumericId, Slid, Uuid}; +use geolog::overlay::OverlayStructure; +use geolog::universe::Universe; +use geolog::serialize::save_structure; +use geolog::zerocopy::MappedStructure; + +use proptest::prelude::*; +use tempfile::tempdir; + +// ============================================================================ +// STRATEGIES +// ============================================================================ + +/// Operations that can be applied to an overlay. +#[derive(Clone, Debug)] +enum OverlayOp { + /// Add a new element with the given sort + AddElement(SortId), + /// Assert a relation tuple (rel_id, indices into current elements) + AssertRelation(usize, Vec), + /// Retract a relation tuple (rel_id, indices into current elements) + RetractRelation(usize, Vec), +} + +/// Strategy for generating overlay operations. +fn overlay_op( + num_sorts: usize, + num_relations: usize, + arities: Vec, + max_elements: usize, +) -> impl Strategy { + let arities_assert = arities.clone(); + let arities_retract = arities; + + prop_oneof![ + // Add element (weighted more heavily to build up elements) + 3 => (0..num_sorts).prop_map(OverlayOp::AddElement), + // Assert relation + 2 => ((0..num_relations), prop::collection::vec(0..max_elements.max(1), 0..5)) + .prop_flat_map(move |(rel, indices)| { + let arity = arities_assert.get(rel).copied().unwrap_or(1); + let indices = if indices.len() >= arity { + indices[..arity].to_vec() + } else { + // Pad with zeros if not enough indices + let mut v = indices; + while v.len() < arity { + v.push(0); + } + v + }; + Just(OverlayOp::AssertRelation(rel, indices)) + }), + // Retract relation + 1 => ((0..num_relations), prop::collection::vec(0..max_elements.max(1), 0..5)) + .prop_flat_map(move |(rel, indices)| { + let arity = arities_retract.get(rel).copied().unwrap_or(1); + let indices = if indices.len() >= arity { + indices[..arity].to_vec() + } else { + let mut v = indices; + while v.len() < arity { + v.push(0); + } + v + }; + Just(OverlayOp::RetractRelation(rel, indices)) + }), + ] +} + +/// Strategy for generating a sequence of overlay operations. +fn overlay_ops( + num_sorts: usize, + num_relations: usize, + arities: Vec, + num_ops: usize, +) -> impl Strategy> { + // We generate ops that reference element indices up to some max + // The actual indices get clamped to valid range during execution + prop::collection::vec( + overlay_op(num_sorts, num_relations, arities, 100), + 0..num_ops, + ) +} + +// ============================================================================ +// TEST HELPERS +// ============================================================================ + +/// Create a base structure with some initial elements and relations. +fn create_base_structure( + universe: &mut Universe, + num_sorts: usize, + num_elements_per_sort: usize, + arities: &[usize], +) -> Structure { + let mut structure = Structure::new(num_sorts); + + // Add initial elements + for sort in 0..num_sorts { + for _ in 0..num_elements_per_sort { + structure.add_element(universe, sort); + } + } + + // Initialize relations + structure.init_relations(arities); + + structure +} + +/// Apply an operation to an overlay, tracking current element count. +fn apply_op( + overlay: &mut OverlayStructure, + universe: &mut Universe, + op: &OverlayOp, + element_count: &mut usize, +) { + let num_sorts = overlay.num_sorts(); + let num_relations = overlay.num_relations(); + + match op { + OverlayOp::AddElement(sort) => { + // Clamp sort to valid range + let sort = *sort % num_sorts; + let luid = universe.intern(Uuid::now_v7()); + overlay.add_element(luid, sort); + *element_count += 1; + } + OverlayOp::AssertRelation(rel_id, indices) => { + if *element_count == 0 || num_relations == 0 { + return; // Can't assert tuples without elements or relations + } + // Clamp rel_id and indices to valid range + let rel_id = *rel_id % num_relations; + let tuple: Vec = indices + .iter() + .map(|&i| Slid::from_usize(i % *element_count)) + .collect(); + overlay.assert_relation(rel_id, tuple); + } + OverlayOp::RetractRelation(rel_id, indices) => { + if *element_count == 0 || num_relations == 0 { + return; + } + let rel_id = *rel_id % num_relations; + let tuple: Vec = indices + .iter() + .map(|&i| Slid::from_usize(i % *element_count)) + .collect(); + overlay.retract_relation(rel_id, tuple); + } + } +} + +/// Collect all elements from a MappedStructure into a set. +fn collect_elements_mapped(mapped: &MappedStructure) -> HashSet<(Slid, Luid, SortId)> { + mapped.elements().collect() +} + +/// Collect all elements from an overlay into a set. +fn collect_elements_overlay(overlay: &OverlayStructure) -> HashSet<(Slid, Luid, SortId)> { + overlay.elements().collect() +} + +/// Collect all live tuples from a relation in a MappedStructure. +fn collect_tuples_mapped(mapped: &MappedStructure, rel_id: usize) -> BTreeSet> { + mapped + .relation(rel_id) + .map(|r| r.live_tuples().map(|t| t.collect()).collect()) + .unwrap_or_default() +} + +/// Collect all live tuples from a relation in an overlay. +fn collect_tuples_overlay(overlay: &OverlayStructure, rel_id: usize) -> BTreeSet> { + overlay + .relation(rel_id) + .map(|r| r.live_tuples().collect()) + .unwrap_or_default() +} + +// ============================================================================ +// PROPERTY TESTS +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// The committed structure should have the same elements as the overlay. + #[test] + fn overlay_commit_preserves_elements( + num_sorts in 1usize..5, + num_elements_per_sort in 0usize..10, + ops in overlay_ops(4, 3, vec![1, 2, 3], 50), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2, 3]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Collect elements from overlay + let overlay_elements = collect_elements_overlay(&overlay); + + // Commit and collect elements from committed structure + let committed = overlay.commit(&commit_path).unwrap(); + let committed_elements = collect_elements_mapped(&committed); + + // They should match + prop_assert_eq!( + overlay_elements.len(), + committed_elements.len(), + "Element count mismatch" + ); + + // Check each element + for (slid, luid, sort) in &overlay_elements { + prop_assert!( + committed_elements.contains(&(*slid, *luid, *sort)), + "Element {:?} in overlay but not in committed", + (slid, luid, sort) + ); + } + } + + /// The committed structure should have the same relation tuples as the overlay. + #[test] + fn overlay_commit_preserves_relations( + num_sorts in 1usize..4, + num_elements_per_sort in 1usize..8, + ops in overlay_ops(3, 3, vec![1, 2, 2], 30), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2, 2]; // unary, binary, binary + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Collect tuples from overlay for each relation + let overlay_tuples: Vec>> = (0..arities.len()) + .map(|rel_id| collect_tuples_overlay(&overlay, rel_id)) + .collect(); + + // Commit + let committed = overlay.commit(&commit_path).unwrap(); + + // Collect tuples from committed + let committed_tuples: Vec>> = (0..arities.len()) + .map(|rel_id| collect_tuples_mapped(&committed, rel_id)) + .collect(); + + // They should match for each relation + for (rel_id, (overlay_set, committed_set)) in + overlay_tuples.iter().zip(committed_tuples.iter()).enumerate() + { + prop_assert_eq!( + overlay_set, + committed_set, + "Relation {} tuples mismatch.\nOverlay: {:?}\nCommitted: {:?}", + rel_id, + overlay_set, + committed_set + ); + } + } + + /// Element lookups should be consistent between overlay and committed structure. + #[test] + fn overlay_element_lookups_match_committed( + num_sorts in 1usize..5, + num_elements_per_sort in 0usize..10, + ops in overlay_ops(4, 2, vec![1, 2], 40), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Commit + let committed = overlay.commit(&commit_path).unwrap(); + + // Check that len matches + prop_assert_eq!(overlay.len(), committed.len(), "len() mismatch"); + + // Check each element lookup + for i in 0..overlay.len() { + let slid = Slid::from_usize(i); + + let overlay_luid = overlay.get_luid(slid); + let committed_luid = committed.get_luid(slid); + prop_assert_eq!( + overlay_luid, + committed_luid, + "get_luid({:?}) mismatch", + slid + ); + + let overlay_sort = overlay.get_sort(slid); + let committed_sort = committed.get_sort(slid); + prop_assert_eq!( + overlay_sort, + committed_sort, + "get_sort({:?}) mismatch", + slid + ); + } + } + + /// Rollback should restore the overlay to match the base. + #[test] + fn overlay_rollback_restores_base( + num_sorts in 1usize..4, + num_elements_per_sort in 1usize..8, + ops in overlay_ops(3, 2, vec![1, 2], 20), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create base with some initial relation tuples + let mut base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + // Add some initial tuples + if base.len() >= 2 { + base.assert_relation(0, vec![Slid::from_usize(0)]); + base.assert_relation(1, vec![Slid::from_usize(0), Slid::from_usize(1)]); + } + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = Arc::new(MappedStructure::open(&base_path).unwrap()); + let mut overlay = OverlayStructure::new(mapped.clone()); + + // Record base state + let base_len = overlay.len(); + let base_tuples_0 = collect_tuples_overlay(&overlay, 0); + let base_tuples_1 = collect_tuples_overlay(&overlay, 1); + + // Apply operations (mutate the overlay) + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Rollback + overlay.rollback(); + + // Should match base again + prop_assert_eq!(overlay.len(), base_len, "len() should match base after rollback"); + prop_assert!(overlay.is_clean(), "should be clean after rollback"); + + let after_tuples_0 = collect_tuples_overlay(&overlay, 0); + let after_tuples_1 = collect_tuples_overlay(&overlay, 1); + + prop_assert_eq!(base_tuples_0, after_tuples_0, "Relation 0 should match base after rollback"); + prop_assert_eq!(base_tuples_1, after_tuples_1, "Relation 1 should match base after rollback"); + } + + /// Assert then retract should result in no change (for overlay-only tuples). + #[test] + fn assert_then_retract_is_noop( + num_elements in 2usize..10, + rel_idx_a in 0usize..10, + rel_idx_b in 0usize..10, + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + + // Create base with elements but no relation tuples + let mut base = Structure::new(1); + for _ in 0..num_elements { + base.add_element(&mut universe, 0); + } + base.init_relations(&[2]); // binary relation + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Create a tuple + let idx_a = rel_idx_a % num_elements; + let idx_b = rel_idx_b % num_elements; + let tuple = vec![Slid::from_usize(idx_a), Slid::from_usize(idx_b)]; + + // Should start with no tuples + let initial_tuples = collect_tuples_overlay(&overlay, 0); + prop_assert!(initial_tuples.is_empty(), "Should start empty"); + + // Assert + overlay.assert_relation(0, tuple.clone()); + let after_assert = collect_tuples_overlay(&overlay, 0); + prop_assert!(after_assert.contains(&tuple), "Should contain tuple after assert"); + + // Retract + overlay.retract_relation(0, tuple.clone()); + let after_retract = collect_tuples_overlay(&overlay, 0); + prop_assert!(!after_retract.contains(&tuple), "Should not contain tuple after retract"); + + // Should be clean (no net change) + prop_assert!(overlay.is_clean(), "Should be clean after assert+retract of new tuple"); + } + + /// Retracting a base tuple should hide it from iteration. + #[test] + fn retract_hides_base_tuple( + num_elements in 3usize..10, + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + + let mut universe = Universe::new(); + + // Create base with elements and a relation tuple + let mut base = Structure::new(1); + for _ in 0..num_elements { + base.add_element(&mut universe, 0); + } + base.init_relations(&[2]); // binary relation + let base_tuple = vec![Slid::from_usize(0), Slid::from_usize(1)]; + base.assert_relation(0, base_tuple.clone()); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Should see the base tuple + let initial = collect_tuples_overlay(&overlay, 0); + prop_assert!(initial.contains(&base_tuple), "Should see base tuple initially"); + + // Retract it + overlay.retract_relation(0, base_tuple.clone()); + + // Should no longer see it + let after = collect_tuples_overlay(&overlay, 0); + prop_assert!(!after.contains(&base_tuple), "Should not see base tuple after retract"); + + // But overlay should not be clean (we have a retraction) + prop_assert!(!overlay.is_clean(), "Should not be clean with a retraction"); + } + + /// Multiple commits should produce identical results. + #[test] + fn double_commit_is_idempotent( + num_sorts in 1usize..3, + num_elements_per_sort in 1usize..5, + ops in overlay_ops(2, 2, vec![1, 2], 15), + ) { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit1_path = dir.path().join("commit1.structure"); + let commit2_path = dir.path().join("commit2.structure"); + + let mut universe = Universe::new(); + let arities = vec![1, 2]; + + // Create and save base + let base = create_base_structure(&mut universe, num_sorts, num_elements_per_sort, &arities); + save_structure(&base, &base_path).unwrap(); + + // Load and create overlay + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + // Apply operations + let mut element_count = overlay.len(); + for op in &ops { + apply_op(&mut overlay, &mut universe, op, &mut element_count); + } + + // Commit twice + let committed1 = overlay.commit(&commit1_path).unwrap(); + let committed2 = overlay.commit(&commit2_path).unwrap(); + + // Both should have the same content + prop_assert_eq!(committed1.len(), committed2.len(), "len() should match"); + + for rel_id in 0..arities.len() { + let tuples1 = collect_tuples_mapped(&committed1, rel_id); + let tuples2 = collect_tuples_mapped(&committed2, rel_id); + prop_assert_eq!(tuples1, tuples2, "Relation {} should match", rel_id); + } + } +} + +// ============================================================================ +// ADDITIONAL TARGETED TESTS +// ============================================================================ + +#[test] +fn test_empty_overlay_commit() { + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + + // Create base with some content + let mut base = Structure::new(2); + base.add_element(&mut universe, 0); + base.add_element(&mut universe, 1); + base.init_relations(&[1]); + base.assert_relation(0, vec![Slid::from_usize(0)]); + save_structure(&base, &base_path).unwrap(); + + // Create overlay but don't modify it + let mapped = MappedStructure::open(&base_path).unwrap(); + let overlay = OverlayStructure::new(Arc::new(mapped)); + + assert!(overlay.is_clean()); + + // Commit should produce identical structure + let committed = overlay.commit(&commit_path).unwrap(); + + assert_eq!(committed.len(), 2); + assert_eq!(collect_tuples_mapped(&committed, 0).len(), 1); +} + +#[test] +fn test_overlay_with_mixed_element_tuples() { + // Test tuples that reference both base and overlay elements + let dir = tempdir().unwrap(); + let base_path = dir.path().join("base.structure"); + let commit_path = dir.path().join("commit.structure"); + + let mut universe = Universe::new(); + + // Create base with one element + let mut base = Structure::new(1); + let (base_elem, _) = base.add_element(&mut universe, 0); + base.init_relations(&[2]); // binary relation + save_structure(&base, &base_path).unwrap(); + + // Create overlay and add an element + let mapped = MappedStructure::open(&base_path).unwrap(); + let mut overlay = OverlayStructure::new(Arc::new(mapped)); + + let new_luid = universe.intern(Uuid::now_v7()); + let new_elem = overlay.add_element(new_luid, 0); + + // Assert a tuple mixing base and overlay elements + let mixed_tuple = vec![base_elem, new_elem]; + overlay.assert_relation(0, mixed_tuple.clone()); + + // Verify we can see it + let tuples = collect_tuples_overlay(&overlay, 0); + assert!(tuples.contains(&mixed_tuple)); + + // Commit and verify + let committed = overlay.commit(&commit_path).unwrap(); + let committed_tuples = collect_tuples_mapped(&committed, 0); + assert_eq!(committed_tuples.len(), 1); +} diff --git a/tests/proptest_patch.proptest-regressions b/tests/proptest_patch.proptest-regressions new file mode 100644 index 0000000..5fb747e --- /dev/null +++ b/tests/proptest_patch.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 3a0157c22c0fb9016c677f4a6e7765c1e0294d6e50ff98b0f608c276d432927a # shrinks to (old, mut universe) = (Structure { theory_luid: None, luids: [0], luid_to_slid: {0: 0}, sorts: [1], carriers: [RoaringTreemap<[]>, RoaringTreemap<[0]>], functions: [], nested: {} }, Universe { index: {019b1e9d-0b71-7471-94d9-cef7c2d2959d}, path: None, dirty: true }), (new, mut universe2) = (Structure { theory_luid: None, luids: [0], luid_to_slid: {0: 0}, sorts: [1], carriers: [RoaringTreemap<[]>, RoaringTreemap<[0]>], functions: [], nested: {} }, Universe { index: {019b1e9d-0b71-7471-94d9-cf28a3b61209}, path: None, dirty: true }) diff --git a/tests/proptest_patch.rs b/tests/proptest_patch.rs new file mode 100644 index 0000000..f2b40e8 --- /dev/null +++ b/tests/proptest_patch.rs @@ -0,0 +1,334 @@ +//! Property tests for Patch algebra (diff/apply roundtrips) + +mod generators; + +use generators::{StructureParams, check_structure_invariants, structures_equivalent}; +use geolog::core::Structure; +use geolog::naming::NamingIndex; +use geolog::patch::{Patch, apply_patch, diff, to_initial_patch}; +use geolog::universe::Universe; +use proptest::prelude::*; +use std::collections::HashSet; + +proptest! { + /// Empty patch is identity: apply_patch(s, empty) == s + #[test] + fn empty_patch_is_identity( + (structure, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + let empty_patch = Patch::new(None, structure.num_sorts(), structure.num_functions(), structure.relations.len()); + let mut naming = NamingIndex::new(); + + let result = apply_patch(&structure, &empty_patch, &mut universe, &mut naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + prop_assert_eq!(result.len(), structure.len()); + prop_assert_eq!(result.num_sorts(), structure.num_sorts()); + + // Check same UUIDs + prop_assert!(structures_equivalent(&result, &structure, &universe, &universe)); + } + + /// diff(s, s) produces empty patch + #[test] + fn diff_same_is_empty( + (structure, universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + let naming = NamingIndex::new(); + + let patch = diff(&structure, &structure, &universe, &naming, &naming); + + prop_assert!(patch.is_empty()); + } + + /// to_initial_patch creates patch that builds structure from empty + #[test] + fn initial_patch_builds_from_empty( + (structure, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + // Build naming for structure elements + let mut naming = NamingIndex::new(); + for &luid in &structure.luids { + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", luid)]); + } + } + + let patch = to_initial_patch(&structure, &universe, &naming); + + // Apply to empty structure + let empty = Structure::new(structure.num_sorts()); + let mut result_naming = NamingIndex::new(); + let result = apply_patch(&empty, &patch, &mut universe, &mut result_naming); + + prop_assert!(result.is_ok()); + let result = result.unwrap(); + + // Should have same number of elements + prop_assert_eq!(result.len(), structure.len()); + + // Should have same UUIDs + prop_assert!(structures_equivalent(&result, &structure, &universe, &universe)); + } + + /// Element additions are tracked in patch + #[test] + fn additions_tracked( + num_elements in 1usize..10, + ) { + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + + let old = Structure::new(2); + let mut new = Structure::new(2); + + for i in 0..num_elements { + let (_, luid) = new.add_element(&mut universe, i % 2); + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let old_naming = NamingIndex::new(); + let patch = diff(&old, &new, &universe, &old_naming, &naming); + + prop_assert_eq!(patch.elements.additions.len(), num_elements); + prop_assert!(patch.elements.deletions.is_empty()); + } + + /// Element deletions are tracked in patch + #[test] + fn deletions_tracked( + num_elements in 1usize..10, + ) { + let mut universe = Universe::new(); + let mut old_naming = NamingIndex::new(); + + let mut old = Structure::new(2); + for i in 0..num_elements { + let (_, luid) = old.add_element(&mut universe, i % 2); + if let Some(uuid) = universe.get(luid) { + old_naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let new = Structure::new(2); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + + prop_assert_eq!(patch.elements.deletions.len(), num_elements); + prop_assert!(patch.elements.additions.is_empty()); + } + + /// Element patch has disjoint additions and deletions + #[test] + fn element_patch_disjoint( + (old, universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 5, + }), + (new, _) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 5, + }) + ) { + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + + // Additions and deletions should be disjoint + let additions: HashSet<_> = patch.elements.additions.keys().collect(); + let deletions: HashSet<_> = patch.elements.deletions.iter().collect(); + + let intersection: Vec<_> = additions.intersection(&deletions).collect(); + prop_assert!(intersection.is_empty()); + } + + /// NamingPatch tracks name additions for new elements + #[test] + fn naming_patch_additions( + num_elements in 1usize..8, + ) { + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + + let old = Structure::new(2); + let mut new = Structure::new(2); + + for i in 0..num_elements { + let (_, luid) = new.add_element(&mut universe, 0); + if let Some(uuid) = universe.get(luid) { + naming.insert(uuid, vec![format!("elem_{}", i)]); + } + } + + let old_naming = NamingIndex::new(); + let patch = diff(&old, &new, &universe, &old_naming, &naming); + + // Naming patch should have names for new elements + prop_assert_eq!(patch.names.additions.len(), num_elements); + } + + /// Patch inversion swaps additions/deletions + #[test] + fn inversion_swaps_elements( + (old, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 4, + }) + ) { + // Create a new structure with some different elements + let mut new = Structure::new(2); + new.add_element(&mut universe, 0); + new.add_element(&mut universe, 1); + + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let inverted = patch.invert(); + + // Inverted patch swaps source/target commits + prop_assert_eq!(inverted.source_commit, Some(patch.target_commit)); + + // Additions become deletions (by key count) + prop_assert_eq!( + inverted.elements.deletions.len(), + patch.elements.additions.len() + ); + } + + /// Double inversion preserves target_commit (but creates new source) + #[test] + fn double_inversion_target_preserved( + (structure, universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 3, + }) + ) { + let naming = NamingIndex::new(); + let patch = to_initial_patch(&structure, &universe, &naming); + + let inverted = patch.invert(); + let double_inverted = inverted.invert(); + + // Original target becomes source after double inversion + // (because each inversion swaps source ↔ target) + prop_assert_eq!(double_inverted.source_commit, Some(inverted.target_commit)); + } + + /// Result of apply_patch maintains structure invariants + #[test] + fn apply_patch_maintains_invariants( + (old, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }), + (new, _) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }) + ) { + let old_naming = NamingIndex::new(); + let new_naming = NamingIndex::new(); + + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let mut result_naming = NamingIndex::new(); + + let result = apply_patch(&old, &patch, &mut universe, &mut result_naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + prop_assert!(check_structure_invariants(&result).is_ok()); + } +} + +// More focused roundtrip tests + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// THE KEY PROPERTY: diff then apply is identity + /// diff(old, new) |> apply_patch(old, _) ≈ new + /// + /// We test this by starting with a structure and modifying it (adding/removing elements) + /// to create `new`, ensuring both share the same Universe. + #[test] + fn diff_apply_roundtrip( + (base, mut universe) in generators::arb_structure(StructureParams { + num_sorts: 2, + max_elements_per_sort: 4, + }), + additions in proptest::collection::vec(0usize..2, 0..4), + deletions_count in 0usize..3, + ) { + // Create `old` as a clone of base + let old = base.clone(); + + // Build naming for old structure + let mut old_naming = NamingIndex::new(); + for &luid in &old.luids { + if let Some(uuid) = universe.get(luid) { + old_naming.insert(uuid, vec![format!("old_elem_{}", luid)]); + } + } + + // Create `new` by modifying base: add some elements, potentially skip some old ones + let mut new = Structure::new(base.num_sorts()); + let mut new_naming = NamingIndex::new(); + + // Keep some elements from old (skip the first `deletions_count`) + let keep_count = base.len().saturating_sub(deletions_count); + for slid in 0..keep_count { + let luid = base.luids[slid]; + let sort_id = base.sorts[slid]; + new.add_element_with_luid(luid, sort_id); + + if let Some(uuid) = universe.get(luid) { + new_naming.insert(uuid, vec![format!("kept_elem_{}", luid)]); + } + } + + // Add new elements + for sort_id in additions { + let (_, luid) = new.add_element(&mut universe, sort_id); + if let Some(uuid) = universe.get(luid) { + new_naming.insert(uuid, vec![format!("new_elem_{}", luid)]); + } + } + + // Now diff and apply + let patch = diff(&old, &new, &universe, &old_naming, &new_naming); + let mut result_naming = NamingIndex::new(); + + let result = apply_patch(&old, &patch, &mut universe, &mut result_naming); + prop_assert!(result.is_ok()); + + let result = result.unwrap(); + + // Result should have same number of elements as new + prop_assert_eq!(result.len(), new.len()); + + // Result should have same UUIDs as new (both use the same universe) + let result_uuids: HashSet<_> = result.luids.iter() + .filter_map(|&luid| universe.get(luid)) + .collect(); + let new_uuids: HashSet<_> = new.luids.iter() + .filter_map(|&luid| universe.get(luid)) + .collect(); + + prop_assert_eq!(result_uuids, new_uuids); + } +} diff --git a/tests/proptest_query.proptest-regressions b/tests/proptest_query.proptest-regressions new file mode 100644 index 0000000..cd0e0be --- /dev/null +++ b/tests/proptest_query.proptest-regressions @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 58732b0034992e09c980d677405cd7448269d1b30d5deb09f018a60e30c56215 # shrinks to structure = Structure { theory_luid: None, luids: [], luid_to_slid: {}, sorts: [], carriers: [RoaringTreemap<[]>, RoaringTreemap<[]>, RoaringTreemap<[0]>, RoaringTreemap<[]>], functions: [], relations: [], parents: {}, nested: {} }, query = Filter { input: Scan { sort_idx: 2 }, pred: Or(FuncEqConst { func_idx: 0, arg_col: 0, expected: Slid(0) }, True) } diff --git a/tests/proptest_query.rs b/tests/proptest_query.rs new file mode 100644 index 0000000..7a9b924 --- /dev/null +++ b/tests/proptest_query.rs @@ -0,0 +1,946 @@ +//! Property tests for query operations. +//! +//! Verifies that execute_optimized produces the same results as execute (naive). + +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::query::{JoinCond, Predicate, QueryOp, execute, execute_optimized}; +use proptest::prelude::*; + +// ============================================================================ +// QueryOp Generators +// ============================================================================ + +/// Generate arbitrary Slid values (within reasonable range) +fn arb_slid() -> impl Strategy { + (0..100usize).prop_map(Slid::from_usize) +} + +/// Generate a simple structure with multiple sorts and elements +fn arb_query_structure(num_sorts: usize, max_per_sort: usize) -> impl Strategy { + prop::collection::vec( + prop::collection::vec(0..50u64, 0..=max_per_sort), + num_sorts, + ) + .prop_map(|sort_elements| { + let mut structure = Structure::new(sort_elements.len()); + for (sort_idx, elements) in sort_elements.iter().enumerate() { + for &elem in elements { + structure.carriers[sort_idx].insert(elem); + } + } + structure + }) +} + +/// Generate a scan operation +fn arb_scan(max_sort: usize) -> impl Strategy { + (0..max_sort).prop_map(|sort_idx| QueryOp::Scan { sort_idx }) +} + +/// Generate a constant tuple +fn arb_constant() -> impl Strategy { + prop::collection::vec(arb_slid(), 1..=3) + .prop_map(|tuple| QueryOp::Constant { tuple }) +} + +/// Generate empty +fn arb_empty() -> impl Strategy { + Just(QueryOp::Empty) +} + +/// Generate a simple query (scan, constant, or empty) +fn arb_simple_query(max_sort: usize) -> impl Strategy { + prop_oneof![ + arb_scan(max_sort), + arb_constant(), + arb_empty(), + ] +} + +/// Generate a join condition for given arity +fn arb_join_cond(left_arity: usize, right_arity: usize) -> impl Strategy { + if left_arity == 0 || right_arity == 0 { + Just(JoinCond::Cross).boxed() + } else { + prop_oneof![ + Just(JoinCond::Cross), + (0..left_arity, 0..right_arity) + .prop_map(|(left_col, right_col)| JoinCond::Equi { left_col, right_col }), + ] + .boxed() + } +} + +/// Generate a join of two scans +fn arb_scan_join(max_sort: usize) -> impl Strategy { + (0..max_sort, 0..max_sort) + .prop_flat_map(move |(left_sort, right_sort)| { + arb_join_cond(1, 1).prop_map(move |cond| QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond, + }) + }) +} + +/// Generate a union of two simple queries +fn arb_union(max_sort: usize) -> impl Strategy { + (arb_simple_query(max_sort), arb_simple_query(max_sort)) + .prop_map(|(left, right)| QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }) +} + +/// Generate a negate of a simple query +fn arb_negate(max_sort: usize) -> impl Strategy { + arb_simple_query(max_sort).prop_map(|input| QueryOp::Negate { + input: Box::new(input), + }) +} + +/// Generate a distinct of a simple query +fn arb_distinct(max_sort: usize) -> impl Strategy { + arb_simple_query(max_sort).prop_map(|input| QueryOp::Distinct { + input: Box::new(input), + }) +} + +/// Generate a simple predicate (no recursion, no function predicates) +/// Use this for tests with structures that don't have functions. +fn arb_simple_predicate_no_funcs() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0..5usize, 0..5usize).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + (0..5usize, arb_slid()).prop_map(|(col, val)| Predicate::ColEqConst { col, val }), + ] +} + +/// Generate a simple predicate (no recursion) - includes function predicates +/// Use this for to_relalg compilation tests where functions don't need to evaluate. +fn arb_simple_predicate() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0..5usize, 0..5usize).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + (0..5usize, arb_slid()).prop_map(|(col, val)| Predicate::ColEqConst { col, val }), + (0..3usize, 0..5usize, 0..5usize) + .prop_map(|(func_idx, arg_col, result_col)| Predicate::FuncEq { func_idx, arg_col, result_col }), + (0..3usize, 0..5usize, arb_slid()) + .prop_map(|(func_idx, arg_col, expected)| Predicate::FuncEqConst { func_idx, arg_col, expected }), + ] +} + +/// Generate a predicate with possible And/Or nesting (no function predicates) +fn arb_predicate_no_funcs() -> impl Strategy { + arb_simple_predicate_no_funcs().prop_recursive(2, 8, 2, |inner| { + prop_oneof![ + inner.clone(), + (inner.clone(), inner.clone()).prop_map(|(l, r)| Predicate::And(Box::new(l), Box::new(r))), + (inner.clone(), inner).prop_map(|(l, r)| Predicate::Or(Box::new(l), Box::new(r))), + ] + }) +} + +/// Generate a predicate with possible And/Or nesting (includes function predicates) +fn arb_predicate() -> impl Strategy { + arb_simple_predicate().prop_recursive(2, 8, 2, |inner| { + prop_oneof![ + inner.clone(), + (inner.clone(), inner.clone()).prop_map(|(l, r)| Predicate::And(Box::new(l), Box::new(r))), + (inner.clone(), inner).prop_map(|(l, r)| Predicate::Or(Box::new(l), Box::new(r))), + ] + }) +} + +/// Generate a filter with arbitrary predicate (no function predicates) +/// Safe for testing against structures without functions. +fn arb_filter_safe(max_sort: usize) -> impl Strategy { + (arb_scan(max_sort), arb_predicate_no_funcs()) + .prop_map(|(input, pred)| QueryOp::Filter { + input: Box::new(input), + pred, + }) +} + +/// Generate a filter with column equality predicate (simple version) +fn arb_filter_col_eq_const(max_sort: usize) -> impl Strategy { + (arb_scan(max_sort), arb_slid()) + .prop_map(|(input, val)| QueryOp::Filter { + input: Box::new(input), + pred: Predicate::ColEqConst { col: 0, val }, + }) +} + +/// Generate a query without DBSP operators (for comparing naive vs optimized) +/// Uses arb_filter_safe to avoid function predicates that require functions in the structure. +fn arb_query_no_dbsp(max_sort: usize) -> impl Strategy { + prop_oneof![ + 4 => arb_scan(max_sort), + 2 => arb_constant(), + 1 => arb_empty(), + 3 => arb_scan_join(max_sort), + 2 => arb_union(max_sort), + 1 => arb_negate(max_sort), + 1 => arb_distinct(max_sort), + 2 => arb_filter_col_eq_const(max_sort), + 3 => arb_filter_safe(max_sort), + ] +} + +// ============================================================================ +// Property Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(500))] + + /// execute_optimized should produce identical results to execute for any query + #[test] + fn optimized_matches_naive( + structure in arb_query_structure(4, 10), + query in arb_query_no_dbsp(4) + ) { + let naive_result = execute(&query, &structure); + let optimized_result = execute_optimized(&query, &structure); + + // Same number of unique tuples + prop_assert_eq!( + naive_result.len(), + optimized_result.len(), + "Length mismatch for query {:?}", + query + ); + + // Same multiplicities for each tuple + for (tuple, mult) in naive_result.iter() { + prop_assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "Multiplicity mismatch for tuple {:?}", + tuple + ); + } + } + + /// Equi-join should be symmetric in a sense: swapping left/right and columns + /// should produce equivalent results (after accounting for tuple order) + #[test] + fn equijoin_symmetric( + structure in arb_query_structure(2, 8), + left_sort in 0..2usize, + right_sort in 0..2usize, + ) { + let join1 = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join2 = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: right_sort }), + right: Box::new(QueryOp::Scan { sort_idx: left_sort }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let result1 = execute_optimized(&join1, &structure); + let result2 = execute_optimized(&join2, &structure); + + // Should have same number of tuples (with columns swapped) + prop_assert_eq!(result1.len(), result2.len()); + } + + /// Nested equijoins: (A ⋈ B) ⋈ C should work correctly + #[test] + fn nested_equijoin( + structure in arb_query_structure(3, 6), + ) { + let join_ab = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let join_abc = QueryOp::Join { + left: Box::new(join_ab.clone()), + right: Box::new(QueryOp::Scan { sort_idx: 2 }), + cond: JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + + let naive_result = execute(&join_abc, &structure); + let optimized_result = execute_optimized(&join_abc, &structure); + + prop_assert_eq!(naive_result.len(), optimized_result.len()); + + for (tuple, mult) in naive_result.iter() { + prop_assert_eq!( + optimized_result.tuples.get(tuple), + Some(mult), + "Mismatch in nested join" + ); + } + } + + /// Cross join should produce |A| * |B| results + #[test] + fn cross_join_cardinality( + structure in arb_query_structure(2, 5), + ) { + let join = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + cond: JoinCond::Cross, + }; + + let result = execute_optimized(&join, &structure); + let expected_size = structure.carriers[0].len() as usize * structure.carriers[1].len() as usize; + + prop_assert_eq!(result.len(), expected_size); + } + + /// Union is commutative: A ∪ B = B ∪ A + #[test] + fn union_commutative( + structure in arb_query_structure(2, 5), + ) { + let union1 = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 0 }), + right: Box::new(QueryOp::Scan { sort_idx: 1 }), + }; + + let union2 = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: 1 }), + right: Box::new(QueryOp::Scan { sort_idx: 0 }), + }; + + let result1 = execute_optimized(&union1, &structure); + let result2 = execute_optimized(&union2, &structure); + + prop_assert_eq!(result1.len(), result2.len()); + + for (tuple, mult) in result1.iter() { + prop_assert_eq!( + result2.tuples.get(tuple), + Some(mult), + "Union commutativity failed" + ); + } + } + + /// Distinct is idempotent: distinct(distinct(x)) = distinct(x) + #[test] + fn distinct_idempotent( + structure in arb_query_structure(1, 10), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + + let distinct1 = QueryOp::Distinct { + input: Box::new(scan.clone()), + }; + + let distinct2 = QueryOp::Distinct { + input: Box::new(QueryOp::Distinct { + input: Box::new(scan), + }), + }; + + let result1 = execute_optimized(&distinct1, &structure); + let result2 = execute_optimized(&distinct2, &structure); + + prop_assert_eq!(result1.len(), result2.len()); + + for (tuple, mult) in result1.iter() { + prop_assert_eq!( + result2.tuples.get(tuple), + Some(mult), + "Distinct idempotency failed" + ); + } + } + + /// Negate twice is identity: negate(negate(x)) = x + #[test] + fn negate_involution( + structure in arb_query_structure(1, 10), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + + let double_negate = QueryOp::Negate { + input: Box::new(QueryOp::Negate { + input: Box::new(scan.clone()), + }), + }; + + let result_original = execute_optimized(&scan, &structure); + let result_double_neg = execute_optimized(&double_negate, &structure); + + prop_assert_eq!(result_original.len(), result_double_neg.len()); + + for (tuple, mult) in result_original.iter() { + prop_assert_eq!( + result_double_neg.tuples.get(tuple), + Some(mult), + "Negate involution failed" + ); + } + } +} + +// ============================================================================ +// RelAlgIR Compilation Property Tests +// ============================================================================ + +mod to_relalg_tests { + use geolog::core::ElaboratedTheory; + use geolog::query::{Predicate, QueryOp, to_relalg::compile_to_relalg}; + use geolog::universe::Universe; + use geolog::repl::ReplState; + use proptest::prelude::*; + use std::rc::Rc; + + /// Load the RelAlgIR theory for testing + fn load_relalg_theory() -> Rc { + let meta_content = std::fs::read_to_string("theories/GeologMeta.geolog") + .expect("Failed to read GeologMeta.geolog"); + let ir_content = std::fs::read_to_string("theories/RelAlgIR.geolog") + .expect("Failed to read RelAlgIR.geolog"); + + let mut state = ReplState::new(); + state + .execute_geolog(&meta_content) + .expect("GeologMeta should load"); + state + .execute_geolog(&ir_content) + .expect("RelAlgIR should load"); + + state + .theories + .get("RelAlgIR") + .expect("RelAlgIR should exist") + .clone() + } + + /// Generate a simple QueryOp without Constant/Apply (which need target context) + fn arb_simple_query_op() -> impl Strategy { + prop_oneof![ + // Scan + (0..10usize).prop_map(|sort_idx| QueryOp::Scan { sort_idx }), + // Empty + Just(QueryOp::Empty), + ] + } + + /// Generate a nested QueryOp (depth 2) + fn arb_nested_query_op() -> impl Strategy { + arb_simple_query_op().prop_flat_map(|base| { + prop_oneof![ + // Filter with various predicates + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::True, + }), + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::False, + }), + Just(QueryOp::Filter { + input: Box::new(base.clone()), + pred: Predicate::ColEqCol { left: 0, right: 0 }, + }), + // Negate + Just(QueryOp::Negate { + input: Box::new(base.clone()), + }), + // Distinct + Just(QueryOp::Distinct { + input: Box::new(base.clone()), + }), + // Project + prop::collection::vec(0..3usize, 1..=3).prop_map(move |columns| QueryOp::Project { + input: Box::new(base.clone()), + columns, + }), + ] + }) + } + + proptest! { + /// Compiling simple QueryOps to RelAlgIR should not panic + #[test] + fn compile_simple_query_no_panic(plan in arb_simple_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Should not panic - may error for Constant/Apply but shouldn't crash + let _ = compile_to_relalg(&plan, &relalg_theory, &mut universe); + } + + /// Compiling nested QueryOps to RelAlgIR should not panic + #[test] + fn compile_nested_query_no_panic(plan in arb_nested_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Should not panic + let _ = compile_to_relalg(&plan, &relalg_theory, &mut universe); + } + + /// Compiled instances should have at least output wire + #[test] + fn compile_produces_valid_instance(plan in arb_simple_query_op()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + if let Ok(instance) = compile_to_relalg(&plan, &relalg_theory, &mut universe) { + // Instance should have elements + prop_assert!(!instance.structure.is_empty(), "Instance should have elements"); + // Should have named elements including output wire + prop_assert!(!instance.names.is_empty(), "Instance should have named elements"); + } + } + + /// Compiling binary operations should work + #[test] + fn compile_binary_ops_no_panic( + left_sort in 0..5usize, + right_sort in 0..5usize, + ) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + // Join (cross) + let join_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: geolog::query::JoinCond::Cross, + }; + let _ = compile_to_relalg(&join_plan, &relalg_theory, &mut universe); + + // Join (equi) + let equi_plan = QueryOp::Join { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + cond: geolog::query::JoinCond::Equi { left_col: 0, right_col: 0 }, + }; + let _ = compile_to_relalg(&equi_plan, &relalg_theory, &mut universe); + + // Union + let union_plan = QueryOp::Union { + left: Box::new(QueryOp::Scan { sort_idx: left_sort }), + right: Box::new(QueryOp::Scan { sort_idx: right_sort }), + }; + let _ = compile_to_relalg(&union_plan, &relalg_theory, &mut universe); + } + + /// Compiling DBSP operators should work + #[test] + fn compile_dbsp_ops_no_panic(sort_idx in 0..5usize, state_id in 0..3usize) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let scan = QueryOp::Scan { sort_idx }; + + // Delay + let delay_plan = QueryOp::Delay { + input: Box::new(scan.clone()), + state_id, + }; + let _ = compile_to_relalg(&delay_plan, &relalg_theory, &mut universe); + + // Diff + let diff_plan = QueryOp::Diff { + input: Box::new(scan.clone()), + state_id, + }; + let _ = compile_to_relalg(&diff_plan, &relalg_theory, &mut universe); + + // Integrate + let integrate_plan = QueryOp::Integrate { + input: Box::new(scan), + state_id, + }; + let _ = compile_to_relalg(&integrate_plan, &relalg_theory, &mut universe); + } + + /// Compiling all predicate types should work + #[test] + fn compile_all_predicate_types_no_panic(pred in super::arb_predicate()) { + let relalg_theory = load_relalg_theory(); + let mut universe = Universe::new(); + + let filter_plan = QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred, + }; + + // Should compile without panic + let _ = compile_to_relalg(&filter_plan, &relalg_theory, &mut universe); + } + } +} + +// ============================================================================ +// Chase Algorithm Proptests +// ============================================================================ + +mod chase_proptest { + use super::*; + use geolog::core::{Context, DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term, Theory, VecRelation}; + use geolog::cc::CongruenceClosure; + use geolog::query::chase::{chase_step, chase_fixpoint}; + use geolog::universe::Universe; + + /// Generate a simple theory with one sort and one unary relation + fn simple_relation_theory() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + } + } + + proptest! { + #[test] + fn chase_step_no_panic_on_empty_axioms( + num_elements in 0..10usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); + s + }; + let theory = simple_relation_theory(); + + // Empty axioms should not change anything + let mut cc = CongruenceClosure::new(); + let changed = chase_step(&[], &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + prop_assert!(!changed); + } + + #[test] + fn chase_step_adds_to_relation( + num_elements in 1..10usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); // Unary relation + s + }; + let theory = simple_relation_theory(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + // First chase step should add elements + let mut cc = CongruenceClosure::new(); + let changed = chase_step(std::slice::from_ref(&axiom), &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + + if num_elements > 0 { + prop_assert!(changed); + prop_assert_eq!(structure.relations[0].len(), num_elements); + } + + // Second chase step should not change anything + let changed2 = chase_step(&[axiom], &mut structure, &mut cc, &mut universe, &theory.signature).unwrap(); + prop_assert!(!changed2); + } + + #[test] + fn chase_fixpoint_converges( + num_elements in 1..8usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(1)); // Unary relation + s + }; + let theory = simple_relation_theory(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + // Chase should converge in exactly 2 iterations: + // 1. Add all elements to relation + // 2. Verify no more changes + let iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + prop_assert_eq!(iterations, 2); + prop_assert_eq!(structure.relations[0].len(), num_elements); + } + + /// Test reflexivity axiom: forall x. |- [lo: x, hi: x] leq + /// Should create diagonal tuples for all elements + #[test] + fn chase_reflexivity_creates_diagonal( + num_elements in 1..8usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + // Binary relation: leq : [lo: V, hi: V] -> Prop + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("leq".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Axiom: forall x : V. |- [lo: x, hi: x] leq + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ])), + }; + + let iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // Should have exactly num_elements diagonal tuples + prop_assert_eq!(structure.relations[0].len(), num_elements); + prop_assert!(iterations <= 3); // Should converge quickly + } + + /// Test transitivity axiom: [lo: x, hi: y] leq, [lo: y, hi: z] leq |- [lo: x, hi: z] leq + /// Classic transitive closure - should derive all reachable pairs + #[test] + fn chase_transitivity_computes_closure( + chain_length in 2..5usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + // Create a chain: 0 -> 1 -> 2 -> ... -> n-1 + for i in 0..chain_length { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("leq".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Seed the chain edges: 0->1, 1->2, ..., (n-2)->(n-1) + use geolog::id::Slid; + for i in 0..(chain_length - 1) { + structure.relations[0].insert(vec![ + Slid::from_usize(i), + Slid::from_usize(i + 1), + ]); + } + + // Transitivity axiom + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ])), + Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ])), + ]), + conclusion: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ])), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // For a chain of length n, transitive closure has n*(n-1)/2 pairs + // (all pairs (i,j) where i < j) + let expected_tuples = chain_length * (chain_length - 1) / 2; + prop_assert_eq!(structure.relations[0].len(), expected_tuples); + } + + /// Test existential conclusion creates fresh witnesses + /// ax/witness : forall x : V. |- exists y : V. [lo: x, hi: y] R + #[test] + fn chase_existential_creates_witnesses( + num_elements in 1..5usize, + ) { + let mut universe = Universe::new(); + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Axiom: forall x : V. |- exists y : V. [lo: x, hi: y] R + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Exists( + "y".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]))), + ), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // Each original element should have at least one witness + // So we should have at least num_elements tuples + prop_assert!(structure.relations[0].len() >= num_elements); + } + + /// Test equality conclusion merges elements via CC + /// ax/collapse : forall x, y : V. [lo: x, hi: y] R |- x = y + #[test] + fn chase_equality_conclusion_reduces_carrier( + num_pairs in 1..4usize, + ) { + let mut universe = Universe::new(); + let num_elements = num_pairs * 2; // Each pair will merge + + let mut structure = { + let mut s = Structure::new(1); + for i in 0..num_elements { + s.carriers[0].insert(i as u64); + } + s.relations.push(VecRelation::new(2)); + s + }; + + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Product(vec![ + ("lo".to_string(), DerivedSort::Base(0)), + ("hi".to_string(), DerivedSort::Base(0)), + ])); + + // Seed pairs: (0,1), (2,3), (4,5), ... + // Each pair will be collapsed by the equality axiom + use geolog::id::Slid; + for i in 0..num_pairs { + structure.relations[0].insert(vec![ + Slid::from_usize(i * 2), + Slid::from_usize(i * 2 + 1), + ]); + } + + // Axiom: forall x, y : V. [lo: x, hi: y] R |- x = y + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Rel(0, Term::Record(vec![ + ("lo".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("hi".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ])), + conclusion: Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("y".to_string(), DerivedSort::Base(0)), + ), + }; + + let _iterations = chase_fixpoint( + &[axiom], + &mut structure, + &mut universe, + &sig, + 100, + ).unwrap(); + + // After canonicalization, carrier should have fewer elements + // Each pair merges into one, so we should have num_pairs elements + prop_assert_eq!(structure.carriers[0].len() as usize, num_pairs); + } + } +} diff --git a/tests/proptest_query_backend.rs b/tests/proptest_query_backend.rs new file mode 100644 index 0000000..290c072 --- /dev/null +++ b/tests/proptest_query_backend.rs @@ -0,0 +1,870 @@ +//! Property tests for query backend. +//! +//! Generates random structures and queries, then verifies the naive backend +//! produces correct results by comparing against a reference implementation. + +use proptest::prelude::*; +use std::collections::HashSet; + +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::query::backend::{execute, Bag, JoinCond, Predicate, QueryOp}; + +/// Generate a random structure with given number of sorts. +fn arb_structure(num_sorts: usize, max_elements_per_sort: usize) -> impl Strategy { + // For each sort, generate a set of element indices + let sort_elements = prop::collection::vec( + prop::collection::btree_set(0u64..1000, 0..=max_elements_per_sort), + num_sorts, + ); + + sort_elements.prop_map(move |elements_per_sort| { + let mut structure = Structure::new(num_sorts); + for (sort_idx, elements) in elements_per_sort.into_iter().enumerate() { + for elem in elements { + structure.carriers[sort_idx].insert(elem); + } + } + structure + }) +} + +/// Reference implementation for Scan: iterate all elements +fn reference_scan(structure: &Structure, sort_idx: usize) -> HashSet> { + let mut result = HashSet::new(); + if let Some(carrier) = structure.carriers.get(sort_idx) { + for elem in carrier.iter() { + result.insert(vec![Slid::from_usize(elem as usize)]); + } + } + result +} + +/// Reference implementation for Filter +fn reference_filter( + input: &HashSet>, + pred: &Predicate, + _structure: &Structure, +) -> HashSet> { + input + .iter() + .filter(|tuple| reference_eval_predicate(pred, tuple)) + .cloned() + .collect() +} + +fn reference_eval_predicate(pred: &Predicate, tuple: &[Slid]) -> bool { + match pred { + Predicate::True => true, + Predicate::False => false, + Predicate::ColEqConst { col, val } => tuple.get(*col) == Some(val), + Predicate::ColEqCol { left, right } => { + tuple.get(*left) == tuple.get(*right) && tuple.get(*left).is_some() + } + Predicate::And(a, b) => { + reference_eval_predicate(a, tuple) && reference_eval_predicate(b, tuple) + } + Predicate::Or(a, b) => { + reference_eval_predicate(a, tuple) || reference_eval_predicate(b, tuple) + } + Predicate::FuncEq { .. } => true, // Skip function predicates in reference (need structure access) + Predicate::FuncEqConst { .. } => true, // Skip function predicates in reference + } +} + +/// Reference implementation for Cross Join +fn reference_cross_join( + left: &HashSet>, + right: &HashSet>, +) -> HashSet> { + let mut result = HashSet::new(); + for l in left { + for r in right { + let mut combined = l.clone(); + combined.extend(r.iter().cloned()); + result.insert(combined); + } + } + result +} + +/// Reference implementation for Union +fn reference_union( + left: &HashSet>, + right: &HashSet>, +) -> HashSet> { + left.union(right).cloned().collect() +} + +/// Convert Bag to HashSet (ignoring multiplicities, for comparison) +fn bag_to_set(bag: &Bag) -> HashSet> { + bag.iter() + .filter(|(_, mult)| **mult > 0) + .map(|(tuple, _)| tuple.clone()) + .collect() +} + +/// Generate a random predicate +fn arb_predicate() -> impl Strategy { + prop_oneof![ + Just(Predicate::True), + Just(Predicate::False), + (0usize..3, 0usize..100).prop_map(|(col, val)| Predicate::ColEqConst { + col, + val: Slid::from_usize(val), + }), + (0usize..3, 0usize..3).prop_map(|(left, right)| Predicate::ColEqCol { left, right }), + ] +} + +/// Generate a base query (no recursion) +fn arb_base_query() -> impl Strategy { + prop_oneof![ + (0usize..3).prop_map(|sort_idx| QueryOp::Scan { sort_idx }), + Just(QueryOp::Empty), + prop::collection::vec(0usize..100, 1..=2) + .prop_map(|tuple| QueryOp::Constant { + tuple: tuple.into_iter().map(Slid::from_usize).collect() + }), + ] +} + +/// Generate a random query plan using prop_recursive +fn arb_query_op() -> impl Strategy { + arb_base_query().prop_recursive( + 3, // max depth + 64, // max nodes + 10, // items per collection + |inner| { + prop_oneof![ + // Keep some base cases at each level + arb_base_query(), + // Unary operations + (inner.clone(), arb_predicate()) + .prop_map(|(input, pred)| QueryOp::Filter { + input: Box::new(input), + pred, + }), + inner.clone().prop_map(|input| QueryOp::Distinct { + input: Box::new(input), + }), + inner.clone().prop_map(|input| QueryOp::Negate { + input: Box::new(input), + }), + // Binary operations + (inner.clone(), inner.clone()) + .prop_map(|(left, right)| QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }), + (inner.clone(), inner) + .prop_map(|(left, right)| QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }), + ] + } + ) +} + +proptest! { + /// Test that optimizer preserves semantics for randomly generated plans. + #[test] + fn test_optimize_preserves_semantics( + structure in arb_structure(3, 5), + plan in arb_query_op(), + ) { + use geolog::query::optimize; + + let unoptimized_result = execute(&plan, &structure); + let optimized = optimize(&plan); + let optimized_result = execute(&optimized, &structure); + + prop_assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + } + + /// Test that Scan produces all elements of a sort. + #[test] + fn test_scan_correct( + structure in arb_structure(3, 10), + sort_idx in 0usize..3, + ) { + let plan = QueryOp::Scan { sort_idx }; + let result = execute(&plan, &structure); + + let reference = reference_scan(&structure, sort_idx); + let actual = bag_to_set(&result); + + prop_assert_eq!(actual, reference); + } + + /// Test that Filter with True predicate returns all input. + #[test] + fn test_filter_true_is_identity( + structure in arb_structure(2, 8), + sort_idx in 0usize..2, + ) { + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::True, + }; + + let scan_result = execute(&scan, &structure); + let filter_result = execute(&filter, &structure); + + prop_assert_eq!(bag_to_set(&scan_result), bag_to_set(&filter_result)); + } + + /// Test that Filter with False predicate returns empty. + #[test] + fn test_filter_false_is_empty( + structure in arb_structure(2, 8), + sort_idx in 0usize..2, + ) { + let scan = QueryOp::Scan { sort_idx }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + + let result = execute(&filter, &structure); + prop_assert!(result.is_empty()); + } + + /// Test that cross join produces correct cardinality. + #[test] + fn test_cross_join_cardinality( + structure in arb_structure(2, 5), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left.clone()), + right: Box::new(right.clone()), + cond: JoinCond::Cross, + }; + + let left_result = execute(&left, &structure); + let right_result = execute(&right, &structure); + let join_result = execute(&join, &structure); + + let expected_size = left_result.len() * right_result.len(); + prop_assert_eq!(join_result.len(), expected_size); + } + + /// Test that cross join matches reference. + #[test] + fn test_cross_join_correct( + structure in arb_structure(2, 4), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + let result = execute(&join, &structure); + + let ref_left = reference_scan(&structure, 0); + let ref_right = reference_scan(&structure, 1); + let reference = reference_cross_join(&ref_left, &ref_right); + + prop_assert_eq!(bag_to_set(&result), reference); + } + + /// Test that Union is commutative. + #[test] + fn test_union_commutative( + structure in arb_structure(2, 5), + ) { + let a = QueryOp::Scan { sort_idx: 0 }; + let b = QueryOp::Scan { sort_idx: 1 }; + + let union_ab = QueryOp::Union { + left: Box::new(a.clone()), + right: Box::new(b.clone()), + }; + let union_ba = QueryOp::Union { + left: Box::new(b), + right: Box::new(a), + }; + + let result_ab = execute(&union_ab, &structure); + let result_ba = execute(&union_ba, &structure); + + // As sets, they should be equal (multiplicities may differ) + prop_assert_eq!(bag_to_set(&result_ab), bag_to_set(&result_ba)); + } + + /// Test that Distinct is idempotent. + #[test] + fn test_distinct_idempotent( + structure in arb_structure(1, 8), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let distinct1 = QueryOp::Distinct { + input: Box::new(scan), + }; + let distinct2 = QueryOp::Distinct { + input: Box::new(distinct1.clone()), + }; + + let result1 = execute(&distinct1, &structure); + let result2 = execute(&distinct2, &structure); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2)); + } + + /// Test that Empty produces no results. + #[test] + fn test_empty_is_empty( + structure in arb_structure(1, 5), + ) { + let empty = QueryOp::Empty; + let result = execute(&empty, &structure); + prop_assert!(result.is_empty()); + } + + /// Test that Constant produces exactly one tuple. + #[test] + fn test_constant_singleton( + tuple in prop::collection::vec(0usize..100, 1..=3), + ) { + let structure = Structure::new(1); // Empty structure + let slid_tuple: Vec = tuple.iter().map(|&i| Slid::from_usize(i)).collect(); + let constant = QueryOp::Constant { tuple: slid_tuple.clone() }; + let result = execute(&constant, &structure); + + prop_assert_eq!(result.len(), 1); + prop_assert!(result.tuples.contains_key(&slid_tuple)); + } + + /// Test filter with constant equality. + #[test] + fn test_filter_col_eq_const( + structure in arb_structure(1, 10), + filter_val in 0usize..1000, + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan.clone()), + pred: Predicate::ColEqConst { + col: 0, + val: Slid::from_usize(filter_val), + }, + }; + + let scan_result = execute(&scan, &structure); + let filter_result = execute(&filter, &structure); + + // Reference: manually filter + let reference: HashSet> = bag_to_set(&scan_result) + .into_iter() + .filter(|tuple| tuple[0] == Slid::from_usize(filter_val)) + .collect(); + + prop_assert_eq!(bag_to_set(&filter_result), reference); + } + + /// Test filter matches reference implementation for compound predicates. + #[test] + fn test_filter_matches_reference( + structure in arb_structure(1, 10), + ) { + // Get all elements as single-column tuples + let input = reference_scan(&structure, 0); + + // Test with True predicate + let filtered_true = reference_filter(&input, &Predicate::True, &structure); + prop_assert_eq!(filtered_true, input.clone()); + + // Test with False predicate + let filtered_false = reference_filter(&input, &Predicate::False, &structure); + prop_assert!(filtered_false.is_empty()); + } + + /// Test union matches reference implementation. + #[test] + fn test_union_matches_reference( + structure in arb_structure(2, 5), + ) { + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let union = QueryOp::Union { + left: Box::new(left), + right: Box::new(right), + }; + + let result = execute(&union, &structure); + + let ref_left = reference_scan(&structure, 0); + let ref_right = reference_scan(&structure, 1); + let reference = reference_union(&ref_left, &ref_right); + + prop_assert_eq!(bag_to_set(&result), reference); + } + + /// Test that Negate(Negate(x)) = x. + #[test] + fn test_negate_involutive( + structure in arb_structure(1, 8), + ) { + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan.clone()), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + + let original = execute(&scan, &structure); + let double_negated = execute(&negate2, &structure); + + prop_assert_eq!(bag_to_set(&original), bag_to_set(&double_negated)); + } + + /// Test that Project preserves all tuples (just reduces columns). + #[test] + fn test_project_same_size( + structure in arb_structure(2, 4), + ) { + // Cross join creates (a, b) tuples + let left = QueryOp::Scan { sort_idx: 0 }; + let right = QueryOp::Scan { sort_idx: 1 }; + let join = QueryOp::Join { + left: Box::new(left), + right: Box::new(right), + cond: JoinCond::Cross, + }; + + // Project to first column only + let project = QueryOp::Project { + input: Box::new(join.clone()), + columns: vec![0], + }; + + let join_result = execute(&join, &structure); + let project_result = execute(&project, &structure); + + // Projected result should have same or fewer distinct tuples + // (could be fewer due to duplicate first elements) + prop_assert!(bag_to_set(&project_result).len() <= join_result.len()); + } +} + +#[test] +fn test_basic_operations_smoke() { + // Simple smoke test to ensure the proptest infrastructure works + let mut structure = Structure::new(2); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + structure.carriers[1].insert(10); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let result = execute(&scan, &structure); + assert_eq!(result.len(), 2); +} + +#[test] +fn test_pattern_compile_scan() { + use geolog::query::Pattern; + + // Create a structure with one sort + let mut structure = Structure::new(1); + structure.carriers[0].insert(5); + structure.carriers[0].insert(10); + structure.carriers[0].insert(15); + + // Create a simple pattern: scan sort 0, no constraints, return element + let pattern = Pattern::new(0); + + // Compile and execute + let plan = pattern.compile(); + let result = execute(&plan, &structure); + + // Should get all 3 elements + assert_eq!(result.len(), 3); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(5)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(10)])); + assert!(result.tuples.contains_key(&vec![Slid::from_usize(15)])); +} + +/// Test that optimize preserves semantics for filter with True predicate. +#[test] +fn test_optimize_filter_true_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + structure.carriers[0].insert(3); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::True, + }; + + let unoptimized_result = execute(&filter, &structure); + let optimized = optimize(&filter); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that optimize preserves semantics for filter with False predicate. +#[test] +fn test_optimize_filter_false_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let filter = QueryOp::Filter { + input: Box::new(scan), + pred: Predicate::False, + }; + + let unoptimized_result = execute(&filter, &structure); + let optimized = optimize(&filter); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + assert!(unoptimized_result.is_empty()); + assert!(optimized_result.is_empty()); +} + +/// Test that double negation optimization preserves semantics. +#[test] +fn test_optimize_double_negate_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(10); + structure.carriers[0].insert(20); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let negate1 = QueryOp::Negate { + input: Box::new(scan), + }; + let negate2 = QueryOp::Negate { + input: Box::new(negate1), + }; + + let unoptimized_result = execute(&negate2, &structure); + let optimized = optimize(&negate2); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that union with empty optimization preserves semantics. +#[test] +fn test_optimize_union_empty_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(1); + structure.carriers[0].insert(5); + structure.carriers[0].insert(15); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let union = QueryOp::Union { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + }; + + let unoptimized_result = execute(&union, &structure); + let optimized = optimize(&union); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); +} + +/// Test that join with empty optimization preserves semantics. +#[test] +fn test_optimize_join_empty_preserves_semantics() { + use geolog::query::optimize; + + let mut structure = Structure::new(2); + structure.carriers[0].insert(1); + structure.carriers[0].insert(2); + + let scan = QueryOp::Scan { sort_idx: 0 }; + let join = QueryOp::Join { + left: Box::new(scan), + right: Box::new(QueryOp::Empty), + cond: JoinCond::Cross, + }; + + let unoptimized_result = execute(&join, &structure); + let optimized = optimize(&join); + let optimized_result = execute(&optimized, &structure); + + assert_eq!(bag_to_set(&unoptimized_result), bag_to_set(&optimized_result)); + assert!(unoptimized_result.is_empty()); + assert!(optimized_result.is_empty()); +} + +#[test] +fn test_pattern_compile_with_function_filter() { + use geolog::query::Pattern; + use geolog::universe::Universe; + + // Create a structure with one sort and properly add elements + let mut structure = Structure::new(1); + let mut universe = Universe::new(); + + // Add 3 elements to sort 0 + let (slid0, _) = structure.add_element(&mut universe, 0); + let (slid1, _) = structure.add_element(&mut universe, 0); + let (slid2, _) = structure.add_element(&mut universe, 0); + + // Initialize function storage for 1 function with domain sort 0 + structure.init_functions(&[Some(0)]); + + // Function 0: maps elem0→slid10, elem1→slid20, elem2→slid10 + // We need target elements to map to - add them to a different "virtual" sort + // For simplicity, we'll use constant Slid values that represent the results + let slid10 = Slid::from_usize(10); + let slid20 = Slid::from_usize(20); + + structure.define_function(0, slid0, slid10).unwrap(); + structure.define_function(0, slid1, slid20).unwrap(); + structure.define_function(0, slid2, slid10).unwrap(); + + // Pattern: find elements where func(elem) = 10 + let pattern = Pattern::new(0) + .filter(0, slid10); + + // Compile and execute + let plan = pattern.compile(); + let result = execute(&plan, &structure); + + // Should get elements 0 and 2 (both map to 10) + assert_eq!(result.len(), 2); + assert!(result.tuples.contains_key(&vec![slid0])); + assert!(result.tuples.contains_key(&vec![slid2])); + // Element 1 (maps to 20) should not be included + assert!(!result.tuples.contains_key(&vec![slid1])); +} + +// ============================================================================ +// DBSP Temporal Operator Property Tests +// ============================================================================ + +use geolog::query::backend::StreamContext; +use geolog::query::backend::execute_stream; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// Delay at timestep 0 always produces empty output + #[test] + fn test_delay_initial_empty_proptest( + structure in arb_structure(2, 5), + sort_idx in 0usize..2, + ) { + let mut ctx = StreamContext::new(); + let plan = QueryOp::Delay { + input: Box::new(QueryOp::Scan { sort_idx }), + state_id: 0, + }; + + // At timestep 0, delay should output empty + let result = execute_stream(&plan, &structure, &mut ctx); + prop_assert!(result.is_empty(), "Delay at t=0 should be empty"); + } + + /// Delay outputs previous timestep's value + #[test] + fn test_delay_outputs_previous_proptest( + structure in arb_structure(1, 8), + ) { + let mut ctx = StreamContext::new(); + let scan = QueryOp::Scan { sort_idx: 0 }; + let delay = QueryOp::Delay { + input: Box::new(scan.clone()), + state_id: 0, + }; + + // Step 0: capture input, output empty + let _ = execute_stream(&delay, &structure, &mut ctx); + ctx.step(); + + // Step 1: should output what was input at step 0 + let result = execute_stream(&delay, &structure, &mut ctx); + let expected = reference_scan(&structure, 0); + + prop_assert_eq!(bag_to_set(&result), expected, "Delay should output previous input"); + } + + /// ∫(δ(x)) = x for stable input (fundamental DBSP identity) + #[test] + fn test_integrate_diff_identity( + structure in arb_structure(1, 10), + ) { + let mut ctx = StreamContext::new(); + + // ∫(δ(scan)) + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + state_id: 1, + }; + + // Step 0: should equal scan + let result = execute_stream(&plan, &structure, &mut ctx); + let expected = reference_scan(&structure, 0); + prop_assert_eq!(bag_to_set(&result), expected.clone(), "∫(δ(scan)) should equal scan at t=0"); + + ctx.step(); + + // Step 1: still should equal scan (no changes) + let result = execute_stream(&plan, &structure, &mut ctx); + prop_assert_eq!(bag_to_set(&result), expected, "∫(δ(scan)) should equal scan at t=1"); + } + + /// Diff of stable input becomes empty after first timestep + #[test] + fn test_diff_stable_becomes_empty( + structure in arb_structure(1, 8), + ) { + let mut ctx = StreamContext::new(); + let plan = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // Step 0: diff = scan - {} = scan (all elements are "new") + let result0 = execute_stream(&plan, &structure, &mut ctx); + let expected0 = reference_scan(&structure, 0); + prop_assert_eq!(bag_to_set(&result0), expected0); + ctx.step(); + + // Step 1: diff = scan - scan = {} (no changes) + let result1 = execute_stream(&plan, &structure, &mut ctx); + prop_assert!(result1.is_empty(), "Diff of stable input should be empty"); + } + + /// Integrate accumulates multiplicities across timesteps + #[test] + fn test_integrate_accumulates( + tuple in prop::collection::vec(0usize..100, 1..=2), + num_steps in 1usize..5, + ) { + let structure = Structure::new(1); + let mut ctx = StreamContext::new(); + + let slid_tuple: Vec = tuple.iter().map(|&i| Slid::from_usize(i)).collect(); + let plan = QueryOp::Integrate { + input: Box::new(QueryOp::Constant { tuple: slid_tuple.clone() }), + state_id: 0, + }; + + for step in 0..num_steps { + let result = execute_stream(&plan, &structure, &mut ctx); + + // After step i, multiplicity should be i+1 + let expected_mult = (step + 1) as i64; + let actual_mult = result.tuples.get(&slid_tuple).copied().unwrap_or(0); + prop_assert_eq!(actual_mult, expected_mult, "Multiplicity at step {}", step); + + ctx.step(); + } + } + + /// Negate and Integrate compose correctly: ∫(negate(δ(x))) + ∫(δ(x)) = 0 + #[test] + fn test_negate_integrate_diff_cancellation( + structure in arb_structure(1, 5), + ) { + let mut ctx1 = StreamContext::new(); + let mut ctx2 = StreamContext::new(); + + let diff = QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }; + + // ∫(δ(scan)) + let int_pos = QueryOp::Integrate { + input: Box::new(diff.clone()), + state_id: 1, + }; + + // ∫(negate(δ(scan))) + let int_neg = QueryOp::Integrate { + input: Box::new(QueryOp::Negate { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 2, + }), + }), + state_id: 3, + }; + + // Execute both for a couple steps + let result_pos = execute_stream(&int_pos, &structure, &mut ctx1); + let result_neg = execute_stream(&int_neg, &structure, &mut ctx2); + + // Union should cancel to zero + let combined = result_pos.union(&result_neg); + prop_assert!(combined.is_empty() || combined.iter().all(|(_, m)| *m == 0), + "∫(δ) + ∫(¬δ) should cancel"); + } + + /// DBSP filter distributes: Filter(Diff(x)) = Diff(Filter(x)) for stable input + /// (This is a key DBSP optimization: incrementalize then filter = filter then incrementalize) + #[test] + fn test_dbsp_filter_distribution( + structure in arb_structure(1, 10), + filter_val in 0usize..100, + ) { + let filter_slid = Slid::from_usize(filter_val); + let mut ctx1 = StreamContext::new(); + let mut ctx2 = StreamContext::new(); + + // Filter(Diff(Scan)) + let plan1 = QueryOp::Filter { + input: Box::new(QueryOp::Diff { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + state_id: 0, + }), + pred: Predicate::ColEqConst { col: 0, val: filter_slid }, + }; + + // Diff(Filter(Scan)) + let plan2 = QueryOp::Diff { + input: Box::new(QueryOp::Filter { + input: Box::new(QueryOp::Scan { sort_idx: 0 }), + pred: Predicate::ColEqConst { col: 0, val: filter_slid }, + }), + state_id: 1, + }; + + // Both should produce same results + let result1 = execute_stream(&plan1, &structure, &mut ctx1); + let result2 = execute_stream(&plan2, &structure, &mut ctx2); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2), + "Filter(Diff(x)) = Diff(Filter(x))"); + + ctx1.step(); + ctx2.step(); + + // Should remain equal at next timestep + let result1 = execute_stream(&plan1, &structure, &mut ctx1); + let result2 = execute_stream(&plan2, &structure, &mut ctx2); + + prop_assert_eq!(bag_to_set(&result1), bag_to_set(&result2), + "Filter(Diff(x)) = Diff(Filter(x)) at t=1"); + } +} diff --git a/tests/proptest_solver.rs b/tests/proptest_solver.rs new file mode 100644 index 0000000..04016b6 --- /dev/null +++ b/tests/proptest_solver.rs @@ -0,0 +1,382 @@ +//! Property tests for the geometric logic solver +//! +//! Tests key properties: +//! - solve(trivial_theory) always finds a model (empty structure) +//! - solve(inconsistent_theory) is always UNSAT +//! - enumerate_models(empty, T) = solve(T) + +mod generators; + +use std::rc::Rc; + +use geolog::core::{ + Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory, +}; +use geolog::solver::{solve, enumerate_models, Budget, EnumerationResult}; +use geolog::universe::Universe; +use proptest::prelude::*; + +// ============================================================================ +// Theory Generators +// ============================================================================ + +/// Generate a theory with no axioms (trivially satisfiable by empty model) +fn arb_trivial_theory() -> impl Strategy> { + (1usize..=5).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Trivial".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }) + }) +} + +/// Generate an inconsistent theory (True ⊢ False) +fn arb_inconsistent_theory() -> impl Strategy> { + (1usize..=3).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + let axiom = Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::False, + }; + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Inconsistent".to_string(), + signature: sig, + axioms: vec![axiom], + axiom_names: vec!["ax/inconsistent".to_string()], + }, + }) + }) +} + +/// Generate a theory with an existential axiom +fn arb_existential_theory() -> impl Strategy> { + (1usize..=3, 0usize..=2).prop_map(|(num_sorts, rel_count)| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + // Add unary relations + for i in 0..rel_count { + sig.add_relation(format!("R{}", i), DerivedSort::Base(0)); + } + + let mut axioms = vec![]; + + // Add unconditional existential: |- ∃x:S0. x = x + // This just requires creating at least one element + if num_sorts > 0 { + axioms.push(Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Eq( + Term::Var("x".to_string(), DerivedSort::Base(0)), + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }); + } + + // Generate axiom names + let axiom_names: Vec = (0..axioms.len()) + .map(|i| format!("ax/exists_{}", i)) + .collect(); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Existential".to_string(), + signature: sig, + axioms, + axiom_names, + }, + }) + }) +} + +// ============================================================================ +// Property Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(100))] + + /// Trivial theories (no axioms) are always solved with empty model + #[test] + fn trivial_theory_always_solved(theory in arb_trivial_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Empty model should have all carriers empty + for sort_idx in 0..model.num_sorts() { + prop_assert_eq!(model.carrier_size(sort_idx), 0); + } + } + _ => prop_assert!(false, "Trivial theory should always be solved"), + } + } + + /// Inconsistent theories (True ⊢ False) are always UNSAT + #[test] + fn inconsistent_theory_always_unsat(theory in arb_inconsistent_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Unsat { .. } => { + // Expected! + } + _ => prop_assert!(false, "Inconsistent theory should always be UNSAT"), + } + } + + /// solve(T) equals enumerate_models(empty, T) + #[test] + fn solve_equals_enumerate_empty(theory in arb_trivial_theory()) { + let budget = Budget::quick(); + + // Method 1: solve + let result1 = solve(theory.clone(), budget.clone()); + + // Method 2: enumerate_models with empty base + let num_sorts = theory.theory.signature.sorts.len(); + let empty_base = geolog::core::Structure::new(num_sorts); + let result2 = enumerate_models(empty_base, Universe::new(), theory, budget); + + // Both should produce equivalent results (both find models or both fail) + match (&result1, &result2) { + (EnumerationResult::Found { .. }, EnumerationResult::Found { .. }) => { + // Both found - good! + } + (EnumerationResult::Unsat { .. }, EnumerationResult::Unsat { .. }) => { + // Both UNSAT - good! + } + (EnumerationResult::Incomplete { .. }, EnumerationResult::Incomplete { .. }) => { + // Both incomplete - acceptable + } + _ => prop_assert!(false, "solve and enumerate_models should produce equivalent results"), + } + } + + /// Existential theory creates at least one element + #[test] + fn existential_creates_elements(theory in arb_existential_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // If theory has existential axioms, should have at least one element + if !theory.theory.axioms.is_empty() { + let has_elements = (0..model.num_sorts()) + .any(|s| model.carrier_size(s) > 0); + prop_assert!(has_elements, "Existential theory should have at least one element"); + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable - budget might be too small + } + EnumerationResult::Unsat { .. } => { + prop_assert!(false, "Existential theory should not be UNSAT"); + } + } + } +} + +/// Generate a theory with relations and implication axioms (Horn clauses) +fn arb_relation_theory() -> impl Strategy> { + (1usize..=2, 1usize..=3).prop_map(|(num_sorts, num_rels)| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + // Add unary relations on first sort + for i in 0..num_rels { + sig.add_relation(format!("R{}", i), DerivedSort::Base(0)); + } + + let mut axioms = vec![]; + + // Add existential axiom to ensure at least one element + axioms.push(Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Rel( + 0, // R0(x) + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }); + + // If we have R1, add Horn clause: R0(x) |- R1(x) + if num_rels > 1 { + let ctx = Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }; + axioms.push(Sequent { + context: ctx, + premise: Formula::Rel( + 0, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + conclusion: Formula::Rel( + 1, + Term::Var("x".to_string(), DerivedSort::Base(0)), + ), + }); + } + + let axiom_names: Vec = (0..axioms.len()) + .map(|i| format!("ax/rel_{}", i)) + .collect(); + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Relations".to_string(), + signature: sig, + axioms, + axiom_names, + }, + }) + }) +} + +/// Generate a theory with a function and equality axiom +fn arb_function_theory() -> impl Strategy> { + (1usize..=2).prop_map(|num_sorts| { + let mut sig = Signature::new(); + for i in 0..num_sorts { + sig.add_sort(format!("S{}", i)); + } + + // Add function f : S0 -> S0 + sig.add_function("f".to_string(), DerivedSort::Base(0), DerivedSort::Base(0)); + + // Add unconditional existential: |- ∃x:S0. f(x) = x + // This requires creating at least one fixed point + // BUT we need the tensor compiler to handle f(x) = x correctly + let axioms = vec![ + Sequent { + context: Context::new(), + premise: Formula::True, + conclusion: Formula::Exists( + "x".to_string(), + DerivedSort::Base(0), + Box::new(Formula::Eq( + Term::App(0, Box::new(Term::Var("x".to_string(), DerivedSort::Base(0)))), + Term::Var("x".to_string(), DerivedSort::Base(0)), + )), + ), + }, + ]; + + Rc::new(ElaboratedTheory { + params: vec![], + theory: Theory { + name: "FunctionTheory".to_string(), + signature: sig, + axioms, + axiom_names: vec!["ax/fixpoint".to_string()], + }, + }) + }) +} + +// ============================================================================ +// Focused Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(50))] + + /// Function theories with fixed-point existentials work + #[test] + fn function_fixed_point_theory(theory in arb_function_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Should have created at least one element that is its own fixed point + if !theory.theory.axioms.is_empty() { + let has_elements = (0..model.num_sorts()) + .any(|s| model.carrier_size(s) > 0); + prop_assert!(has_elements, "Function theory should have at least one element"); + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable - budget might be too small + } + EnumerationResult::Unsat { .. } => { + // This is acceptable! The axiom ∃x. f(x)=x might be UNSAT + // if we can't construct such an x with the solver's strategy. + // Actually this shouldn't happen for a fresh function. + } + } + } + + /// Relation theories with Horn clauses propagate correctly + #[test] + fn relation_horn_clause_propagation(theory in arb_relation_theory()) { + let result = solve(theory.clone(), Budget::quick()); + match result { + EnumerationResult::Found { model, .. } => { + // Should have at least one element in R0 + prop_assert!(model.carrier_size(0) > 0, "Should have elements"); + + // If theory has 2+ relations and a Horn clause R0(x) |- R1(x), + // then any element in R0 should also be in R1 + if theory.theory.signature.relations.len() > 1 { + // Check that R1 is populated + // (We can't easily verify the full Horn clause semantics here + // without access to relation contents, but we can check it runs) + } + } + EnumerationResult::Incomplete { .. } => { + // Acceptable + } + EnumerationResult::Unsat { .. } => { + prop_assert!(false, "Relation theory should not be UNSAT"); + } + } + } + + /// Budget limits are respected + #[test] + fn budget_limits_respected(theory in arb_existential_theory()) { + // Very small budget + let tiny_budget = Budget::new(1, 1); + let result = solve(theory.clone(), tiny_budget); + + // Should either solve quickly or timeout/incomplete + match result { + EnumerationResult::Found { time_ms, .. } => { + // If solved, should be fast + prop_assert!(time_ms < 100.0, "Solved within reasonable time"); + } + EnumerationResult::Incomplete { time_ms, .. } => { + // Should respect budget + prop_assert!(time_ms < 100.0, "Incomplete within reasonable time"); + } + EnumerationResult::Unsat { time_ms } => { + // Should respect budget + prop_assert!(time_ms < 100.0, "UNSAT within reasonable time"); + } + } + } +} diff --git a/tests/proptest_structure.rs b/tests/proptest_structure.rs new file mode 100644 index 0000000..0453fff --- /dev/null +++ b/tests/proptest_structure.rs @@ -0,0 +1,239 @@ +//! Property tests for Structure invariants + +mod generators; + +use generators::{StructureOp, StructureParams, check_structure_invariants}; +use geolog::core::Structure; +use geolog::id::{NumericId, Slid}; +use geolog::universe::Universe; +use proptest::prelude::*; + +proptest! { + #![proptest_config(ProptestConfig::with_cases(2048))] + /// Empty structure maintains invariants + #[test] + fn empty_structure_invariants(num_sorts in 1usize..10) { + let structure = Structure::new(num_sorts); + prop_assert!(check_structure_invariants(&structure).is_ok()); + prop_assert_eq!(structure.len(), 0); + prop_assert_eq!(structure.num_sorts(), num_sorts); + } + + /// Structure maintains invariants after adding elements + #[test] + fn structure_invariants_after_adds( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 4, + max_elements_per_sort: 10, + }) + ) { + prop_assert!(check_structure_invariants(&structure).is_ok()); + } + + /// add_element correctly sets up bijection + #[test] + fn add_element_bijection( + num_sorts in 1usize..5, + sort_id in any::(), + ) { + let sort_id = sort_id.index(num_sorts); + + let mut universe = Universe::new(); + let mut structure = Structure::new(num_sorts); + + let (slid, luid) = structure.add_element(&mut universe, sort_id); + + // Forward: slid → luid + prop_assert_eq!(structure.luids[slid.index()], luid); + + // Reverse: luid → slid + prop_assert_eq!(structure.luid_to_slid.get(&luid), Some(&slid)); + prop_assert_eq!(structure.lookup_luid(luid), Some(slid)); + + // Sort is correct + prop_assert_eq!(structure.sorts[slid.index()], sort_id); + + // Carrier contains the element + prop_assert!(structure.carriers[sort_id].contains(slid.index() as u64)); + } + + /// Carrier membership is exclusive (element in exactly one carrier) + #[test] + fn carrier_membership_exclusive( + ops in generators::arb_structure_ops(5, 20) + ) { + let mut universe = Universe::new(); + let mut structure = Structure::new(5); + + for op in ops { + match op { + StructureOp::AddElement { sort_id } => { + structure.add_element(&mut universe, sort_id); + } + } + } + + // Check each element appears in exactly one carrier + for slid in 0..structure.len() { + let sort_id = structure.sorts[slid]; + let mut found_in = Vec::new(); + + for (carrier_id, carrier) in structure.carriers.iter().enumerate() { + if carrier.contains(slid as u64) { + found_in.push(carrier_id); + } + } + + prop_assert_eq!( + found_in.len(), 1, + "slid {} should be in exactly one carrier, found in {:?}", + slid, found_in + ); + prop_assert_eq!(found_in[0], sort_id); + } + } + + /// sort_local_id is consistent with carrier rank + #[test] + fn sort_local_id_consistency( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 8, + }) + ) { + for slid_idx in 0..structure.len() { + let slid = Slid::from_usize(slid_idx); + let sort_id = structure.sorts[slid_idx]; + let sort_slid = structure.sort_local_id(slid); + + // sort_slid should be in range [0, carrier_size) + let carrier_size = structure.carrier_size(sort_id); + prop_assert!( + sort_slid.index() < carrier_size, + "sort_slid {} should be < carrier_size {}", + sort_slid, carrier_size + ); + } + } + + /// carrier_size matches number of elements with that sort + #[test] + fn carrier_size_matches_count( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 4, + max_elements_per_sort: 12, + }) + ) { + for sort_id in 0..structure.num_sorts() { + let carrier_size = structure.carrier_size(sort_id); + let count = structure.sorts.iter().filter(|&&s| s == sort_id).count(); + prop_assert_eq!(carrier_size, count); + } + } + + /// add_element_with_luid preserves existing element identity + #[test] + fn add_with_existing_luid_identity(num_sorts in 1usize..5) { + let mut universe = Universe::new(); + let mut structure1 = Structure::new(num_sorts); + + // Create element in first structure + let (slid1, luid1) = structure1.add_element(&mut universe, 0); + + // Create second structure and add element with same luid + let mut structure2 = Structure::new(num_sorts); + let slid2 = structure2.add_element_with_luid(luid1, 0); + + // Should have same luid + prop_assert_eq!(structure2.luids[slid2.index()], luid1); + prop_assert_eq!(structure2.lookup_luid(luid1), Some(slid2)); + + // Both structures should maintain invariants + prop_assert!(check_structure_invariants(&structure1).is_ok()); + prop_assert!(check_structure_invariants(&structure2).is_ok()); + + let _ = slid1; // silence warning + } + + /// get_luid returns correct luid for slid + #[test] + fn get_luid_correctness( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 10, + }) + ) { + for slid_idx in 0..structure.len() { + let slid = Slid::from_usize(slid_idx); + let luid = structure.get_luid(slid); + prop_assert_eq!(structure.luids[slid_idx], luid); + prop_assert_eq!(structure.lookup_luid(luid), Some(slid)); + } + } + + /// Total elements equals sum of carrier sizes + #[test] + fn total_equals_carrier_sum( + (structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 5, + max_elements_per_sort: 8, + }) + ) { + let carrier_total: usize = (0..structure.num_sorts()) + .map(|s| structure.carrier_size(s)) + .sum(); + + prop_assert_eq!(structure.len(), carrier_total); + } + + /// Sequential add_elements produce sequential slids + #[test] + fn sequential_slids(ops in generators::arb_structure_ops(3, 15)) { + let mut universe = Universe::new(); + let mut structure = Structure::new(3); + let mut expected_slid_idx: usize = 0; + + for op in ops { + match op { + StructureOp::AddElement { sort_id } => { + let (slid, _) = structure.add_element(&mut universe, sort_id); + prop_assert_eq!(slid, Slid::from_usize(expected_slid_idx)); + expected_slid_idx += 1; + } + } + } + + prop_assert_eq!(structure.len(), expected_slid_idx); + } +} + +// Additional focused tests + +proptest! { + /// Function initialization creates correct storage + #[test] + fn function_init_correct_size( + (mut structure, _universe) in generators::arb_structure(StructureParams { + num_sorts: 3, + max_elements_per_sort: 5, + }) + ) { + // Initialize functions with domain sort IDs + let domain_sort_ids: Vec> = vec![Some(0), Some(1), None]; + structure.init_functions(&domain_sort_ids); + + prop_assert_eq!(structure.num_functions(), 3); + + // Check sizes match carrier sizes + prop_assert_eq!( + structure.functions[0].len(), + structure.carrier_size(0) + ); + prop_assert_eq!( + structure.functions[1].len(), + structure.carrier_size(1) + ); + // Function 2 has None domain, so size should be 0 + prop_assert_eq!(structure.functions[2].len(), 0); + } +} diff --git a/tests/proptest_tensor.rs b/tests/proptest_tensor.rs new file mode 100644 index 0000000..10961c5 --- /dev/null +++ b/tests/proptest_tensor.rs @@ -0,0 +1,476 @@ +//! Property tests for tensor operations +//! +//! Tests algebraic properties of tensor operations using proptest. + +mod generators; + +use generators::{TensorParams, arb_sparse_tensor, arb_tensor_pair_same_dims, arb_sparse_tensor_with_dims}; +use geolog::tensor::{SparseTensor, TensorExpr, conjunction, exists, conjunction_all, disjunction_all}; +use proptest::prelude::*; + +// ============================================================================ +// SparseTensor Basic Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(1024))] + + /// Empty tensor has no tuples + #[test] + fn empty_tensor_is_empty(dims in proptest::collection::vec(1usize..10, 0..4)) { + let tensor = SparseTensor::empty(dims.clone()); + prop_assert!(tensor.is_empty()); + prop_assert_eq!(tensor.len(), 0); + prop_assert_eq!(tensor.dims, dims); + } + + /// Scalar true contains the empty tuple + #[test] + fn scalar_true_contains_empty(_seed in any::()) { + let tensor = SparseTensor::scalar(true); + prop_assert!(tensor.contains(&[])); + prop_assert_eq!(tensor.len(), 1); + prop_assert!(tensor.dims.is_empty()); + } + + /// Scalar false is empty + #[test] + fn scalar_false_is_empty(_seed in any::()) { + let tensor = SparseTensor::scalar(false); + prop_assert!(!tensor.contains(&[])); + prop_assert!(tensor.is_empty()); + } + + /// Insert/remove roundtrip + #[test] + fn insert_remove_roundtrip( + dims in proptest::collection::vec(1usize..5, 1..3), + tuple_idx in any::(), + ) { + let mut tensor = SparseTensor::empty(dims.clone()); + + // Generate a valid tuple + let tuple: Vec = dims.iter() + .map(|&d| tuple_idx.index(d.max(1))) + .collect(); + + prop_assert!(!tensor.contains(&tuple)); + tensor.insert(tuple.clone()); + prop_assert!(tensor.contains(&tuple)); + tensor.remove(&tuple); + prop_assert!(!tensor.contains(&tuple)); + } + + /// Generated tensor has valid tuples (within dimension bounds) + #[test] + fn generated_tensor_valid_tuples( + tensor in arb_sparse_tensor(TensorParams::default()) + ) { + for tuple in tensor.iter() { + prop_assert_eq!(tuple.len(), tensor.dims.len()); + for (i, &val) in tuple.iter().enumerate() { + prop_assert!(val < tensor.dims[i], "tuple value {} >= dim {}", val, tensor.dims[i]); + } + } + } +} + +// ============================================================================ +// TensorExpr Product Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(512))] + + /// Product of empty tensors is empty + #[test] + fn product_with_empty_is_empty( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let empty = SparseTensor::empty(vec![3]); + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(tensor), + TensorExpr::leaf(empty), + ]); + let result = expr.materialize(); + prop_assert!(result.is_empty()); + } + + /// Product with scalar true is identity (dims extended but tuples preserved) + #[test] + fn product_with_scalar_true( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let scalar_true = SparseTensor::scalar(true); + let orig_len = tensor.len(); + let orig_dims = tensor.dims.clone(); + + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(tensor), + TensorExpr::leaf(scalar_true), + ]); + let result = expr.materialize(); + + prop_assert_eq!(result.len(), orig_len); + prop_assert_eq!(result.dims, orig_dims); + } + + /// Empty product is scalar true + #[test] + fn empty_product_is_scalar_true(_seed in any::()) { + let expr = TensorExpr::Product(vec![]); + let result = expr.materialize(); + prop_assert!(result.contains(&[])); + prop_assert_eq!(result.len(), 1); + } + + /// Product dimensions are concatenation + #[test] + fn product_dims_concatenate( + t1 in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 4, max_tuples: 5 }), + t2 in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 4, max_tuples: 5 }), + ) { + let expected_dims: Vec = t1.dims.iter().chain(t2.dims.iter()).copied().collect(); + + let expr = TensorExpr::Product(vec![ + TensorExpr::leaf(t1), + TensorExpr::leaf(t2), + ]); + let result = expr.materialize(); + + prop_assert_eq!(result.dims, expected_dims); + } +} + +// ============================================================================ +// Sum (Disjunction) Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(512))] + + /// Sum is commutative + #[test] + fn sum_commutative( + (t1, t2) in arb_tensor_pair_same_dims(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum1 = TensorExpr::Sum(vec![ + TensorExpr::leaf(t1.clone()), + TensorExpr::leaf(t2.clone()), + ]).materialize(); + + let sum2 = TensorExpr::Sum(vec![ + TensorExpr::leaf(t2), + TensorExpr::leaf(t1), + ]).materialize(); + + prop_assert_eq!(sum1, sum2); + } + + /// Sum is idempotent (T ∨ T = T) + #[test] + fn sum_idempotent( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(tensor.clone()), + TensorExpr::leaf(tensor.clone()), + ]).materialize(); + + prop_assert_eq!(sum, tensor); + } + + /// Sum with empty is identity + #[test] + fn sum_with_empty_is_identity( + tensor in arb_sparse_tensor(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let empty = SparseTensor::empty(tensor.dims.clone()); + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(tensor.clone()), + TensorExpr::leaf(empty), + ]).materialize(); + + prop_assert_eq!(sum, tensor); + } + + /// Empty sum is scalar false + #[test] + fn empty_sum_is_scalar_false(_seed in any::()) { + let sum = TensorExpr::Sum(vec![]).materialize(); + prop_assert!(sum.is_empty()); + } + + /// Sum extent is union of extents + #[test] + fn sum_is_union( + (t1, t2) in arb_tensor_pair_same_dims(TensorParams { max_dims: 2, max_dim_size: 5, max_tuples: 10 }) + ) { + let sum = TensorExpr::Sum(vec![ + TensorExpr::leaf(t1.clone()), + TensorExpr::leaf(t2.clone()), + ]).materialize(); + + // Every tuple in t1 should be in sum + for tuple in t1.iter() { + prop_assert!(sum.contains(tuple)); + } + + // Every tuple in t2 should be in sum + for tuple in t2.iter() { + prop_assert!(sum.contains(tuple)); + } + + // Every tuple in sum should be in t1 or t2 + for tuple in sum.iter() { + prop_assert!(t1.contains(tuple) || t2.contains(tuple)); + } + } +} + +// ============================================================================ +// Conjunction Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Conjunction with scalar true is identity (modulo variable naming) + #[test] + fn conjunction_with_true( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let scalar_true = SparseTensor::scalar(true); + + let (expr, result_vars) = conjunction( + TensorExpr::leaf(tensor.clone()), + &vars, + TensorExpr::leaf(scalar_true), + &[], + ); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// Conjunction with scalar false is empty + #[test] + fn conjunction_with_false( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let scalar_false = SparseTensor::scalar(false); + + let (expr, _result_vars) = conjunction( + TensorExpr::leaf(tensor), + &vars, + TensorExpr::leaf(scalar_false), + &[], + ); + let result = expr.materialize(); + + prop_assert!(result.is_empty()); + } + + /// Conjunction is commutative (on shared variables) + #[test] + fn conjunction_commutative( + t1 in arb_sparse_tensor_with_dims(vec![3, 4], 5), + t2 in arb_sparse_tensor_with_dims(vec![4, 5], 5), + ) { + let vars1 = vec!["x".to_string(), "y".to_string()]; + let vars2 = vec!["y".to_string(), "z".to_string()]; + + let (expr1, _vars_result1) = conjunction( + TensorExpr::leaf(t1.clone()), + &vars1, + TensorExpr::leaf(t2.clone()), + &vars2, + ); + + let (expr2, _vars_result2) = conjunction( + TensorExpr::leaf(t2), + &vars2, + TensorExpr::leaf(t1), + &vars1, + ); + + let result1 = expr1.materialize(); + let result2 = expr2.materialize(); + + // Same number of tuples (though variable order may differ) + prop_assert_eq!(result1.len(), result2.len()); + } +} + +// ============================================================================ +// Exists (Contraction) Properties +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// Exists on non-existent variable is identity + #[test] + fn exists_nonexistent_var( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(TensorExpr::leaf(tensor.clone()), &vars, "z"); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// Exists reduces arity by 1 + #[test] + fn exists_reduces_arity( + tensor in arb_sparse_tensor_with_dims(vec![3, 4], 8) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = exists(TensorExpr::leaf(tensor), &vars, "y"); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vec!["x"]); + prop_assert_eq!(result.arity(), 1); + prop_assert_eq!(result.dims, vec![3]); + } + + /// Exists on scalar is identity + #[test] + fn exists_on_scalar(value in any::()) { + let tensor = SparseTensor::scalar(value); + let (expr, result_vars) = exists(TensorExpr::leaf(tensor.clone()), &[], "x"); + let result = expr.materialize(); + + prop_assert!(result_vars.is_empty()); + prop_assert_eq!(result, tensor); + } + + /// Double exists is same as single exists (idempotent on same var) + #[test] + fn exists_idempotent( + tensor in arb_sparse_tensor_with_dims(vec![3, 4], 8) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + + let (expr1, vars1) = exists(TensorExpr::leaf(tensor.clone()), &vars, "y"); + let (expr2, vars2) = exists(expr1, &vars1, "y"); + + let result = expr2.materialize(); + + prop_assert_eq!(vars2, vec!["x"]); + prop_assert_eq!(result.arity(), 1); + } +} + +// ============================================================================ +// Fusion Tests (Contract(Product(...))) +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + /// Fused join produces same result as naive evaluation + #[test] + fn fused_join_correctness( + t1 in arb_sparse_tensor_with_dims(vec![5, 5], 10), + t2 in arb_sparse_tensor_with_dims(vec![5, 5], 10), + ) { + let vars1 = vec!["x".to_string(), "y".to_string()]; + let vars2 = vec!["y".to_string(), "z".to_string()]; + + // This creates Contract(Product(...)) which gets fused + let (conj_expr, conj_vars) = conjunction( + TensorExpr::leaf(t1.clone()), + &vars1, + TensorExpr::leaf(t2.clone()), + &vars2, + ); + + let (result_expr, _result_vars) = exists(conj_expr, &conj_vars, "y"); + let result = result_expr.materialize(); + + // Verify result is correct by checking each tuple + for tuple in result.iter() { + let x = tuple[0]; + let z = tuple[1]; + + // Should exist some y such that t1(x,y) and t2(y,z) + let mut found = false; + for y in 0..5 { + if t1.contains(&[x, y]) && t2.contains(&[y, z]) { + found = true; + break; + } + } + prop_assert!(found, "tuple {:?} in result but no witness y", tuple); + } + + // And every valid (x,z) should be in result + for x in 0..5 { + for z in 0..5 { + let mut should_be_in_result = false; + for y in 0..5 { + if t1.contains(&[x, y]) && t2.contains(&[y, z]) { + should_be_in_result = true; + break; + } + } + prop_assert_eq!( + result.contains(&[x, z]), + should_be_in_result, + "({}, {}) expected {} but got {}", + x, z, should_be_in_result, result.contains(&[x, z]) + ); + } + } + } +} + +// ============================================================================ +// Disjunction Helper Tests +// ============================================================================ + +proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + /// disjunction_all with empty is scalar false + #[test] + fn disjunction_all_empty(_seed in any::()) { + let (expr, vars) = disjunction_all(vec![]); + let result = expr.materialize(); + + prop_assert!(vars.is_empty()); + prop_assert!(result.is_empty()); + } + + /// disjunction_all with single element is identity + #[test] + fn disjunction_all_single( + tensor in arb_sparse_tensor_with_dims(vec![3, 3], 5) + ) { + let vars = vec!["x".to_string(), "y".to_string()]; + let (expr, result_vars) = disjunction_all(vec![ + (TensorExpr::leaf(tensor.clone()), vars.clone()) + ]); + let result = expr.materialize(); + + prop_assert_eq!(result_vars, vars); + prop_assert_eq!(result, tensor); + } + + /// conjunction_all with empty is scalar true + #[test] + fn conjunction_all_empty(_seed in any::()) { + let (expr, vars) = conjunction_all(vec![]); + let result = expr.materialize(); + + prop_assert!(vars.is_empty()); + prop_assert!(result.contains(&[])); + prop_assert_eq!(result.len(), 1); + } +} diff --git a/tests/proptest_universe.rs b/tests/proptest_universe.rs new file mode 100644 index 0000000..4fbb0ec --- /dev/null +++ b/tests/proptest_universe.rs @@ -0,0 +1,159 @@ +//! Property tests for Universe (UUID ↔ Luid bijection) + +mod generators; + +use geolog::id::{Luid, NumericId, Uuid}; +use geolog::universe::Universe; +use proptest::prelude::*; +use std::collections::HashSet; +use tempfile::tempdir; + +proptest! { + /// Interning the same UUID twice returns the same Luid + #[test] + fn intern_idempotent(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid1 = universe.intern(uuid); + let luid2 = universe.intern(uuid); + + prop_assert_eq!(luid1, luid2); + } + + /// Interning then looking up returns the original UUID + #[test] + fn intern_lookup_roundtrip(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid = universe.intern(uuid); + let retrieved = universe.get(luid); + + prop_assert_eq!(retrieved, Some(uuid)); + } + + /// Reverse lookup (UUID → Luid) works correctly + #[test] + fn reverse_lookup_roundtrip(uuid in generators::arb_uuid()) { + let mut universe = Universe::new(); + + let luid = universe.intern(uuid); + let found_luid = universe.lookup(&uuid); + + prop_assert_eq!(found_luid, Some(luid)); + } + + /// After bulk interning, bijection holds for all entries + #[test] + fn bijection_after_bulk_intern(uuids in proptest::collection::vec(generators::arb_uuid(), 1..50)) { + let mut universe = Universe::new(); + + // Intern all UUIDs + let luids: Vec<_> = uuids.iter().map(|&uuid| universe.intern(uuid)).collect(); + + // Forward direction: Luid → UUID + for (&uuid, &luid) in uuids.iter().zip(luids.iter()) { + prop_assert_eq!(universe.get(luid), Some(uuid)); + } + + // Reverse direction: UUID → Luid + for &uuid in &uuids { + prop_assert!(universe.lookup(&uuid).is_some()); + } + + // Uniqueness: unique UUIDs produce unique Luids + let unique_uuids: HashSet<_> = uuids.iter().collect(); + let unique_luids: HashSet<_> = luids.iter().collect(); + // Note: Luids may have fewer unique values if there are duplicate UUIDs + prop_assert!(unique_luids.len() <= unique_uuids.len()); + } + + /// Luids are assigned sequentially starting from 0 + #[test] + fn luids_sequential(count in 1usize..20) { + let mut universe = Universe::new(); + + for i in 0..count { + let uuid = Uuid::now_v7(); + let luid = universe.intern(uuid); + prop_assert_eq!(luid, Luid::from_usize(i), "Luid {} should be {}", luid, i); + } + } + + /// Save and load preserves all mappings + #[test] + fn save_load_roundtrip(uuids in generators::arb_unique_uuids(10)) { + let dir = tempdir().unwrap(); + let path = dir.path().join("universe.bin"); + + // Save + let original_luids: Vec<_>; + { + let mut universe = Universe::with_path(&path); + original_luids = uuids.iter().map(|&uuid| universe.intern(uuid)).collect(); + universe.save().unwrap(); + } + + // Load + { + let loaded = Universe::load(&path).unwrap(); + + // Check all mappings preserved + for (&uuid, &expected_luid) in uuids.iter().zip(original_luids.iter()) { + let retrieved = loaded.get(expected_luid); + prop_assert_eq!(retrieved, Some(uuid)); + + let found_luid = loaded.lookup(&uuid); + prop_assert_eq!(found_luid, Some(expected_luid)); + } + } + } + + /// Dirty flag is set after intern, cleared after save + #[test] + fn dirty_flag_consistency(uuid in generators::arb_uuid()) { + let dir = tempdir().unwrap(); + let path = dir.path().join("universe.bin"); + + let mut universe = Universe::with_path(&path); + + // Initially clean + prop_assert!(!universe.is_dirty()); + + // Dirty after intern + universe.intern(uuid); + prop_assert!(universe.is_dirty()); + + // Clean after save + universe.save().unwrap(); + prop_assert!(!universe.is_dirty()); + } + + /// Iterator yields all interned UUIDs in order + #[test] + fn iter_yields_all(uuids in generators::arb_unique_uuids(15)) { + let mut universe = Universe::new(); + + for &uuid in &uuids { + universe.intern(uuid); + } + + let iter_results: Vec<_> = universe.iter().collect(); + + prop_assert_eq!(iter_results.len(), uuids.len()); + + for (i, (luid, uuid)) in iter_results.iter().enumerate() { + prop_assert_eq!(*luid, Luid::from_usize(i)); + prop_assert_eq!(*uuid, uuids[i]); + } + } +} + +// Non-property unit tests for edge cases + +#[test] +fn test_load_nonexistent() { + let dir = tempdir().unwrap(); + let path = dir.path().join("nonexistent.bin"); + let universe = Universe::load(&path).expect("load should succeed for nonexistent"); + assert!(universe.is_empty()); +} diff --git a/tests/unit_chase.rs b/tests/unit_chase.rs new file mode 100644 index 0000000..55d514a --- /dev/null +++ b/tests/unit_chase.rs @@ -0,0 +1,426 @@ +//! Unit tests for tensor-backed chase algorithm + +use geolog::core::{ + Context, DerivedSort, Formula, RelationStorage, Sequent, Signature, Structure, Term, Theory, +}; +use geolog::query::chase::chase_fixpoint; +use geolog::universe::Universe; + +/// Create a simple test theory with one sort and one unary relation +fn simple_theory_with_relation() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + Theory { + name: "Simple".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + } +} + +/// Create a preorder-like theory with binary leq relation, reflexivity and transitivity +fn preorder_theory() -> Theory { + let mut sig = Signature::default(); + sig.add_sort("X".to_string()); + + // Binary relation with product domain: leq : [x: X, y: X] -> Prop + let domain = DerivedSort::Product(vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ]); + sig.add_relation("leq".to_string(), domain); + + // Reflexivity axiom: forall x : X. |- [x: x, y: x] leq + let refl_axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + // Transitivity axiom: forall x, y, z : X. [x: x, y: y] leq, [x: y, y: z] leq |- [x: x, y: z] leq + let trans_axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ("z".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ]), + ), + ]), + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("y".to_string(), Term::Var("z".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + Theory { + name: "Preorder".to_string(), + signature: sig, + axioms: vec![refl_axiom, trans_axiom], + axiom_names: vec!["ax/refl".to_string(), "ax/trans".to_string()], + } +} + +#[test] +fn test_chase_adds_relation_from_true_premise() { + // Axiom: forall x : V. |- R(x) + // This should add all elements to R + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add some elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation + structure.init_relations(&[1]); + + // Run chase + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Should add all 3 elements to R + assert_eq!(structure.get_relation(0).len(), 3); + assert!(structure.query_relation(0, &[a])); + assert!(structure.query_relation(0, &[b])); + assert!(structure.query_relation(0, &[c])); + + // Should converge in 2 iterations + assert_eq!(iterations, 2); +} + +#[test] +fn test_chase_fixpoint_empty_structure() { + let theory = simple_theory_with_relation(); + + // Axiom: forall x : V. |- R(x) + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + structure.init_relations(&[1]); + + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &theory.signature, 100).unwrap(); + + // Empty structure: no elements, so nothing to add + assert_eq!(iterations, 1); + assert_eq!(structure.get_relation(0).len(), 0); +} + +#[test] +fn test_chase_preorder_reflexivity() { + // Test that chase correctly computes reflexive closure + let theory = preorder_theory(); + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation with arity 2 + structure.init_relations(&[2]); + + // Run chase + let iterations = chase_fixpoint( + &theory.axioms, + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + // Should have exactly 3 reflexive tuples + let relation = structure.get_relation(0); + assert_eq!(relation.len(), 3, "Should have exactly 3 reflexive tuples"); + + // Check reflexive pairs exist + assert!(structure.query_relation(0, &[a, a]), "Should have (a,a)"); + assert!(structure.query_relation(0, &[b, b]), "Should have (b,b)"); + assert!(structure.query_relation(0, &[c, c]), "Should have (c,c)"); + + // Check non-reflexive pairs do NOT exist + assert!(!structure.query_relation(0, &[a, b]), "Should NOT have (a,b)"); + assert!(!structure.query_relation(0, &[a, c]), "Should NOT have (a,c)"); + assert!(!structure.query_relation(0, &[b, a]), "Should NOT have (b,a)"); + + // Should complete in 2 iterations + assert_eq!(iterations, 2); +} + +#[test] +fn test_chase_transitive_closure() { + // Test that chase correctly computes transitive closure + let theory = preorder_theory(); + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + // Add 3 elements: a < b < c (chain order) + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (c, _) = structure.add_element(&mut universe, 0); + + // Initialize relation with arity 2 + structure.init_relations(&[2]); + + // Manually add initial ordering: a ≤ b and b ≤ c + structure.get_relation_mut(0).insert(vec![a, b]); + structure.get_relation_mut(0).insert(vec![b, c]); + + // Run chase + let _iterations = chase_fixpoint( + &theory.axioms, + &mut structure, + &mut universe, + &theory.signature, + 100, + ).unwrap(); + + // Expected: 3 reflexive + 2 initial + 1 transitive (a,c) = 6 + let relation = structure.get_relation(0); + assert_eq!(relation.len(), 6, "Should have 6 tuples"); + + // Check reflexive pairs + assert!(structure.query_relation(0, &[a, a])); + assert!(structure.query_relation(0, &[b, b])); + assert!(structure.query_relation(0, &[c, c])); + + // Check initial ordering + assert!(structure.query_relation(0, &[a, b])); + assert!(structure.query_relation(0, &[b, c])); + + // Check transitive closure! + assert!(structure.query_relation(0, &[a, c]), "Should have (a,c) from transitivity"); + + // Should NOT have backwards edges + assert!(!structure.query_relation(0, &[b, a])); + assert!(!structure.query_relation(0, &[c, b])); + assert!(!structure.query_relation(0, &[c, a])); +} + +#[test] +fn test_chase_conjunction_in_conclusion() { + // Axiom: forall x : V. |- R(x) ∧ S(x) + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + sig.add_relation("R".to_string(), DerivedSort::Base(0)); + sig.add_relation("S".to_string(), DerivedSort::Base(0)); + + let axiom = Sequent { + context: Context { + vars: vec![("x".to_string(), DerivedSort::Base(0))], + }, + premise: Formula::True, + conclusion: Formula::Conj(vec![ + Formula::Rel(0, Term::Var("x".to_string(), DerivedSort::Base(0))), + Formula::Rel(1, Term::Var("x".to_string(), DerivedSort::Base(0))), + ]), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + structure.init_relations(&[1, 1]); + + let _iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Both relations should have both elements + assert_eq!(structure.get_relation(0).len(), 2); + assert_eq!(structure.get_relation(1).len(), 2); + assert!(structure.query_relation(0, &[a])); + assert!(structure.query_relation(0, &[b])); + assert!(structure.query_relation(1, &[a])); + assert!(structure.query_relation(1, &[b])); +} + +#[test] +fn test_chase_relation_premise() { + // Axiom: forall x, y : V. R(x, y) |- S(x, y) + // Copy tuples from R to S + let mut sig = Signature::default(); + sig.add_sort("V".to_string()); + let domain = DerivedSort::Product(vec![ + ("a".to_string(), DerivedSort::Base(0)), + ("b".to_string(), DerivedSort::Base(0)), + ]); + sig.add_relation("R".to_string(), domain.clone()); + sig.add_relation("S".to_string(), domain); + + let axiom = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), DerivedSort::Base(0)), + ("y".to_string(), DerivedSort::Base(0)), + ], + }, + premise: Formula::Rel( + 0, + Term::Record(vec![ + ("a".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("b".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + conclusion: Formula::Rel( + 1, + Term::Record(vec![ + ("a".to_string(), Term::Var("x".to_string(), DerivedSort::Base(0))), + ("b".to_string(), Term::Var("y".to_string(), DerivedSort::Base(0))), + ]), + ), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(1); + + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + + structure.init_relations(&[2, 2]); + + // Add some tuples to R + structure.get_relation_mut(0).insert(vec![a, b]); + structure.get_relation_mut(0).insert(vec![b, a]); + + let _iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // S should have the same tuples as R + assert_eq!(structure.get_relation(1).len(), 2); + assert!(structure.query_relation(1, &[a, b])); + assert!(structure.query_relation(1, &[b, a])); +} + +/// Test chase with existential premise (the feature that motivated tensor-backed chase!) +#[test] +fn test_chase_existential_premise() { + // Theory: Graph with reachability + // Axiom: forall v0, v1 : V. (exists e : E. src(e) = v0 ∧ tgt(e) = v1) |- reachable(v0, v1) + let mut sig = Signature::default(); + let v_sort = sig.add_sort("V".to_string()); + let e_sort = sig.add_sort("E".to_string()); + + // src, tgt : E -> V + sig.add_function("src".to_string(), DerivedSort::Base(e_sort), DerivedSort::Base(v_sort)); + sig.add_function("tgt".to_string(), DerivedSort::Base(e_sort), DerivedSort::Base(v_sort)); + + // reachable : [from: V, to: V] -> Prop + let reach_domain = DerivedSort::Product(vec![ + ("from".to_string(), DerivedSort::Base(v_sort)), + ("to".to_string(), DerivedSort::Base(v_sort)), + ]); + sig.add_relation("reachable".to_string(), reach_domain); + + // Axiom: (exists e : E. src(e) = v0 ∧ tgt(e) = v1) |- reachable(v0, v1) + let axiom = Sequent { + context: Context { + vars: vec![ + ("v0".to_string(), DerivedSort::Base(v_sort)), + ("v1".to_string(), DerivedSort::Base(v_sort)), + ], + }, + premise: Formula::Exists( + "e".to_string(), + DerivedSort::Base(e_sort), + Box::new(Formula::Conj(vec![ + Formula::Eq( + Term::App(0, Box::new(Term::Var("e".to_string(), DerivedSort::Base(e_sort)))), + Term::Var("v0".to_string(), DerivedSort::Base(v_sort)), + ), + Formula::Eq( + Term::App(1, Box::new(Term::Var("e".to_string(), DerivedSort::Base(e_sort)))), + Term::Var("v1".to_string(), DerivedSort::Base(v_sort)), + ), + ])), + ), + conclusion: Formula::Rel( + 0, + Term::Record(vec![ + ("from".to_string(), Term::Var("v0".to_string(), DerivedSort::Base(v_sort))), + ("to".to_string(), Term::Var("v1".to_string(), DerivedSort::Base(v_sort))), + ]), + ), + }; + + let mut universe = Universe::new(); + let mut structure = Structure::new(2); // 2 sorts: V and E + + // Add vertices: a, b, c + let (a, _) = structure.add_element(&mut universe, v_sort); + let (b, _) = structure.add_element(&mut universe, v_sort); + let (c, _) = structure.add_element(&mut universe, v_sort); + + // Add edges: e1 (a->b), e2 (b->c) + let (e1, _) = structure.add_element(&mut universe, e_sort); + let (e2, _) = structure.add_element(&mut universe, e_sort); + + // Initialize functions and relations + structure.init_functions(&[Some(e_sort), Some(e_sort)]); // src, tgt both have domain E + structure.init_relations(&[2]); // reachable is binary + + // Define src and tgt + structure.define_function(0, e1, a).unwrap(); // src(e1) = a + structure.define_function(1, e1, b).unwrap(); // tgt(e1) = b + structure.define_function(0, e2, b).unwrap(); // src(e2) = b + structure.define_function(1, e2, c).unwrap(); // tgt(e2) = c + + // Run chase + let iterations = chase_fixpoint(&[axiom], &mut structure, &mut universe, &sig, 100).unwrap(); + + // Should derive reachable(a,b) and reachable(b,c) + assert_eq!(structure.get_relation(0).len(), 2, "Should have 2 reachable pairs"); + assert!(structure.query_relation(0, &[a, b]), "Should have reachable(a,b)"); + assert!(structure.query_relation(0, &[b, c]), "Should have reachable(b,c)"); + + // Should NOT have other pairs + assert!(!structure.query_relation(0, &[a, c]), "Should NOT have reachable(a,c) without transitive closure axiom"); + + println!("Chase with existential premise completed in {} iterations", iterations); +} diff --git a/tests/unit_elaborate.rs b/tests/unit_elaborate.rs new file mode 100644 index 0000000..eb3653a --- /dev/null +++ b/tests/unit_elaborate.rs @@ -0,0 +1,837 @@ +//! Unit tests for theory and instance elaboration + +use geolog::ast; +use geolog::core::DerivedSort; +use geolog::elaborate::{ElabError, ElaborationContext, Env, elaborate_instance_ctx, elaborate_theory}; +use geolog::id::{NumericId, Slid}; +use geolog::parse; +use geolog::repl::InstanceEntry; +use geolog::universe::Universe; +use std::collections::HashMap; +use std::rc::Rc; + +#[test] +fn test_elaborate_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + src : P -> T; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "PetriNet"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 1); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_parameterized_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; +} + +theory (N : PetriNet instance) Marking { + token : Sort; + token/of : token -> N/P; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // First elaborate PetriNet + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then elaborate Marking (which depends on PetriNet) + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "Marking"); + assert_eq!(elab.params.len(), 1); + assert_eq!(elab.params[0].name, "N"); + assert_eq!(elab.params[0].theory_name, "PetriNet"); + // Signature now includes param sorts: N/P, N/T (from PetriNet) + token (local) + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!(elab.theory.signature.lookup_sort("N/P").is_some()); + assert!(elab.theory.signature.lookup_sort("N/T").is_some()); + assert!(elab.theory.signature.lookup_sort("token").is_some()); + // Functions: just token/of (PetriNet had no functions in this test) + assert_eq!(elab.theory.signature.functions.len(), 1); + assert!(elab.theory.signature.lookup_func("token/of").is_some()); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_theory_with_axiom() { + let input = r#" +theory Iso { + X : Sort; + Y : Sort; + fwd : X -> Y; + bwd : Y -> X; + fb : forall x : X. |- x fwd bwd = x; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("elaboration failed"); + assert_eq!(elab.theory.name, "Iso"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 2); + assert_eq!(elab.theory.axioms.len(), 1); + + // Check the axiom structure + let ax = &elab.theory.axioms[0]; + assert_eq!(ax.context.vars.len(), 1); + assert_eq!(ax.context.vars[0].0, "x"); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_axiom_function_type_error() { + // x is of sort X, but bwd expects Y + let input = r#" +theory BadIso { + X : Sort; + Y : Sort; + fwd : X -> Y; + bwd : Y -> X; + bad : forall x : X. |- x bwd = x; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let result = elaborate_theory(&mut env, t); + assert!(result.is_err(), "expected type error in axiom"); + + let err = result.unwrap_err(); + match err { + ElabError::TypeMismatch { expected, got } => { + // expected Y (bwd's domain), got X + assert_eq!(expected, DerivedSort::Base(1)); // Y + assert_eq!(got, DerivedSort::Base(0)); // X + } + other => panic!("expected TypeMismatch error, got: {}", other), + } + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_axiom_equality_type_error() { + // LHS is X, RHS is Y — can't compare different sorts + let input = r#" +theory BadEq { + X : Sort; + Y : Sort; + fwd : X -> Y; + bad : forall x : X. |- x = x fwd; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let result = elaborate_theory(&mut env, t); + assert!(result.is_err(), "expected type error in equality"); + + let err = result.unwrap_err(); + match err { + ElabError::TypeMismatch { expected, got } => { + // LHS is X, RHS is Y + assert_eq!(expected, DerivedSort::Base(0)); // X + assert_eq!(got, DerivedSort::Base(1)); // Y + } + other => panic!("expected TypeMismatch error, got: {}", other), + } + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_instance() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + out : Sort; + in/src : in -> P; + in/tgt : in -> T; + out/src : out -> T; + out/tgt : out -> P; +} + +instance ExampleNet : PetriNet = { + A : P; + B : P; + C : P; + ab : T; + ab_in : in; + ab_in in/src = A; + ab_in in/tgt = ab; + ab_out : out; + ab_out out/src = ab; + ab_out out/tgt = B; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // First elaborate PetriNet theory + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then elaborate ExampleNet instance + if let ast::Declaration::Instance(inst) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = + elaborate_instance_ctx(&mut ctx, inst).expect("instance elaboration failed"); + let structure = result.structure; + + // Elements are created in order: A(0), B(1), C(2), ab(3), ab_in(4), ab_out(5) + assert_eq!(structure.len(), 6); // A, B, C, ab, ab_in, ab_out + + // Check carriers + assert_eq!(structure.carrier_size(0), 3); // P: A, B, C + assert_eq!(structure.carrier_size(1), 1); // T: ab + assert_eq!(structure.carrier_size(2), 1); // in: ab_in + assert_eq!(structure.carrier_size(3), 1); // out: ab_out + + // Check function definitions using the new columnar API + // Elements by slid: A=0, B=1, C=2, ab=3, ab_in=4, ab_out=5 + let a_slid = Slid::from_usize(0); + let ab_slid = Slid::from_usize(3); + let ab_in_slid = Slid::from_usize(4); + + // Get the sort-local ID for ab_in + let ab_in_sort_slid = structure.sort_local_id(ab_in_slid); + + // in/src is function 0, ab_in maps to A + assert_eq!(structure.get_function(0, ab_in_sort_slid), Some(a_slid)); + // in/tgt is function 1, ab_in maps to ab + assert_eq!(structure.get_function(1, ab_in_sort_slid), Some(ab_slid)); + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_partial_function_error() { + // This instance is missing the definition for ab_in in/tgt + // (ab_in is in the domain of in/tgt but has no value defined) + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; + in/tgt : in -> T; +} + +instance PartialNet : PetriNet = { + A : P; + ab : T; + ab_in : in; + ab_in in/src = A; + // Missing: ab_in in/tgt = ab; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // First elaborate PetriNet theory + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Then try to elaborate the partial instance — should fail + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected error for partial function"); + + let err = result.unwrap_err(); + match err { + ElabError::PartialFunction { + func_name, + missing_elements, + } => { + assert_eq!(func_name, "in/tgt"); + assert_eq!(missing_elements, vec!["ab_in"]); + } + other => panic!("expected PartialFunction error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_domain_type_error() { + // ab is of sort T, but in/src expects domain sort `in` + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; +} + +instance BadNet : PetriNet = { + A : P; + ab : T; + ab in/src = A; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected domain type error"); + + let err = result.unwrap_err(); + match err { + ElabError::DomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + assert_eq!(func_name, "in/src"); + assert_eq!(element_name, "ab"); + assert_eq!(expected_sort, "in"); + assert_eq!(actual_sort, "T"); + } + other => panic!("expected DomainMismatch error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_codomain_type_error() { + // ab is of sort T, but in/src has codomain P + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + in : Sort; + in/src : in -> P; +} + +instance BadNet : PetriNet = { + A : P; + ab : T; + ab_in : in; + ab_in in/src = ab; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + if let ast::Declaration::Instance(i) = &file.declarations[1].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = elaborate_instance_ctx(&mut ctx, i); + assert!(result.is_err(), "expected codomain type error"); + + let err = result.unwrap_err(); + match err { + ElabError::CodomainMismatch { + func_name, + element_name, + expected_sort, + actual_sort, + } => { + assert_eq!(func_name, "in/src"); + assert_eq!(element_name, "ab"); + assert_eq!(expected_sort, "P"); + assert_eq!(actual_sort, "T"); + } + other => panic!("expected CodomainMismatch error, got: {}", other), + } + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_elaborate_theory_extends() { + // Simple single-level extends + let input = r#" +theory Base { + X : Sort; + f : X -> X; +} + +theory Child extends Base { + Y : Sort; + g : Y -> Base/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child (extends Base) + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + assert_eq!(elab.theory.name, "Child"); + + // Child should have: Base/X (inherited), Y (own) + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert!( + elab.theory.signature.lookup_sort("Base/X").is_some(), + "should have Base/X" + ); + assert!( + elab.theory.signature.lookup_sort("Y").is_some(), + "should have Y" + ); + + // Functions: Base/f (inherited), g (own) + assert_eq!(elab.theory.signature.functions.len(), 2); + assert!( + elab.theory.signature.lookup_func("Base/f").is_some(), + "should have Base/f" + ); + assert!( + elab.theory.signature.lookup_func("g").is_some(), + "should have g" + ); + + // Check g's domain/codomain are correct + let g_id = elab.theory.signature.lookup_func("g").unwrap(); + let g_sym = &elab.theory.signature.functions[g_id]; + let y_id = elab.theory.signature.lookup_sort("Y").unwrap(); + let base_x_id = elab.theory.signature.lookup_sort("Base/X").unwrap(); + assert_eq!(g_sym.domain, DerivedSort::Base(y_id)); + assert_eq!(g_sym.codomain, DerivedSort::Base(base_x_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_elaborate_transitive_extends() { + // Transitive extends with requalification: + // Grandchild extends Child extends Base + // Grandchild should have: Base/X (from grandparent, NOT Child/Base/X), Child/Y, Z + let input = r#" +theory Base { + X : Sort; + f : X -> X; +} + +theory Child extends Base { + Y : Sort; +} + +theory Grandchild extends Child { + Z : Sort; + h : Z -> Base/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Grandchild + if let ast::Declaration::Theory(t) = &file.declarations[2].node { + let elab = elaborate_theory(&mut env, t).expect("Grandchild elaboration failed"); + assert_eq!(elab.theory.name, "Grandchild"); + + // Grandchild should have: Base/X, Child/Y, Z + // NOT: Child/Base/X (that would be wrong requalification) + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("Base/X").is_some(), + "should have Base/X (preserved from grandparent)" + ); + assert!( + elab.theory.signature.lookup_sort("Child/Y").is_some(), + "should have Child/Y" + ); + assert!( + elab.theory.signature.lookup_sort("Z").is_some(), + "should have Z" + ); + + // Should NOT have these wrong names + assert!( + elab.theory.signature.lookup_sort("Child/Base/X").is_none(), + "should NOT have Child/Base/X" + ); + + // Functions: Base/f (preserved), h (own) + assert_eq!(elab.theory.signature.functions.len(), 2); + assert!( + elab.theory.signature.lookup_func("Base/f").is_some(), + "should have Base/f (preserved)" + ); + assert!( + elab.theory.signature.lookup_func("h").is_some(), + "should have h" + ); + + // Check h's domain/codomain + let h_id = elab.theory.signature.lookup_func("h").unwrap(); + let h_sym = &elab.theory.signature.functions[h_id]; + let z_id = elab.theory.signature.lookup_sort("Z").unwrap(); + let base_x_id = elab.theory.signature.lookup_sort("Base/X").unwrap(); + assert_eq!(h_sym.domain, DerivedSort::Base(z_id)); + assert_eq!(h_sym.codomain, DerivedSort::Base(base_x_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_instance_of_extended_theory() { + // Test that instances of extended theories work correctly + let input = r#" +theory Base { + X : Sort; +} + +theory Child extends Base { + Y : Sort; + f : Y -> Base/X; +} + +instance C : Child = { + a : Base/X; + b : Y; + b f = a; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + let mut universe = Universe::new(); + + // Elaborate theories + for decl in &file.declarations[0..2] { + if let ast::Declaration::Theory(t) = &decl.node { + let elab = elaborate_theory(&mut env, t).expect("theory elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + } + + // Elaborate instance + if let ast::Declaration::Instance(inst) = &file.declarations[2].node { + let instances: HashMap = HashMap::new(); + let mut ctx = ElaborationContext { + theories: &env.theories, + instances: &instances, + universe: &mut universe, + siblings: HashMap::new(), + }; + let result = + elaborate_instance_ctx(&mut ctx, inst).expect("instance elaboration failed"); + let structure = result.structure; + + // Should have 2 elements: a and b + assert_eq!(structure.len(), 2); + assert_eq!(structure.carrier_size(0), 1); // Base/X: a + assert_eq!(structure.carrier_size(1), 1); // Y: b + + // Check name mappings + assert!( + result.name_to_slid.contains_key("a"), + "should have element 'a'" + ); + assert!( + result.name_to_slid.contains_key("b"), + "should have element 'b'" + ); + } else { + panic!("expected instance"); + } +} + +#[test] +fn test_nested_parameterized_theories() { + // Test deep nesting: C depends on B which depends on A + // theory A { X : Sort; } + // theory (N : A instance) B { Y : Sort; f : Y -> N/X; } + // theory (M : B instance) C { Z : Sort; g : Z -> M/Y; h : Z -> M/N/X; } + // + // C should have sorts: M/N/X (from A via B), M/Y (from B), Z (own) + let input = r#" +theory A { X : Sort; } + +theory (N : A instance) B { + Y : Sort; + f : Y -> N/X; +} + +theory (M : B instance) C { + Z : Sort; + g : Z -> M/Y; + h : Z -> M/N/X; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate A + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("A elaboration failed"); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate B + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("B elaboration failed"); + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert!(elab.theory.signature.lookup_sort("N/X").is_some()); + assert!(elab.theory.signature.lookup_sort("Y").is_some()); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate C + if let ast::Declaration::Theory(t) = &file.declarations[2].node { + let elab = elaborate_theory(&mut env, t).expect("C elaboration failed"); + assert_eq!(elab.theory.name, "C"); + + // C should have: M/N/X, M/Y, Z + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("M/N/X").is_some(), + "should have M/N/X (from A via B)" + ); + assert!( + elab.theory.signature.lookup_sort("M/Y").is_some(), + "should have M/Y (from B)" + ); + assert!( + elab.theory.signature.lookup_sort("Z").is_some(), + "should have Z (own sort)" + ); + + // Functions: M/f (from B), g, h (own) + assert_eq!(elab.theory.signature.functions.len(), 3); + assert!( + elab.theory.signature.lookup_func("M/f").is_some(), + "should have M/f" + ); + assert!( + elab.theory.signature.lookup_func("g").is_some(), + "should have g" + ); + assert!( + elab.theory.signature.lookup_func("h").is_some(), + "should have h" + ); + + // Check h's domain/codomain are correct + let h_id = elab.theory.signature.lookup_func("h").unwrap(); + let h_sym = &elab.theory.signature.functions[h_id]; + let z_id = elab.theory.signature.lookup_sort("Z").unwrap(); + let mnx_id = elab.theory.signature.lookup_sort("M/N/X").unwrap(); + assert_eq!(h_sym.domain, DerivedSort::Base(z_id)); + assert_eq!(h_sym.codomain, DerivedSort::Base(mnx_id)); + } else { + panic!("expected theory"); + } +} + +#[test] +fn test_extends_with_naming_convention_slashes() { + // This test verifies the fix for the naming convention bug where + // function names like "Func/dom" (using "/" as DomainSort/descriptor) + // were incorrectly treated as grandparent-qualified names. + // + // The fix checks if the prefix before "/" is a sort in the parent theory. + // If so, it's a naming convention, not a grandparent qualifier. + let input = r#" +theory Base { + Func : Sort; + Rel : Sort; + Func/dom : Func -> Rel; + Func/cod : Func -> Rel; + Rel/type : Rel -> Func; +} + +theory Child extends Base { + Op : Sort; + Op/func : Op -> Base/Func; +} +"#; + let file = parse(input).expect("parse failed"); + let mut env = Env::new(); + + // Elaborate Base + if let ast::Declaration::Theory(t) = &file.declarations[0].node { + let elab = elaborate_theory(&mut env, t).expect("Base elaboration failed"); + // Base has 2 sorts (Func, Rel) and 3 functions (Func/dom, Func/cod, Rel/type) + assert_eq!(elab.theory.signature.sorts.len(), 2); + assert_eq!(elab.theory.signature.functions.len(), 3); + env.theories.insert(elab.theory.name.clone(), Rc::new(elab)); + } + + // Elaborate Child + if let ast::Declaration::Theory(t) = &file.declarations[1].node { + let elab = elaborate_theory(&mut env, t).expect("Child elaboration failed"); + assert_eq!(elab.theory.name, "Child"); + + // Child should have: Base/Func, Base/Rel, Op + assert_eq!(elab.theory.signature.sorts.len(), 3); + assert!( + elab.theory.signature.lookup_sort("Base/Func").is_some(), + "should have Base/Func" + ); + assert!( + elab.theory.signature.lookup_sort("Base/Rel").is_some(), + "should have Base/Rel" + ); + assert!( + elab.theory.signature.lookup_sort("Op").is_some(), + "should have Op" + ); + + // Functions should be: Base/Func/dom, Base/Func/cod, Base/Rel/type, Op/func + // NOT: Func/dom (which would be wrong - missing Base/ prefix) + assert_eq!(elab.theory.signature.functions.len(), 4); + assert!( + elab.theory.signature.lookup_func("Base/Func/dom").is_some(), + "should have Base/Func/dom (naming convention slash preserved)" + ); + assert!( + elab.theory.signature.lookup_func("Base/Func/cod").is_some(), + "should have Base/Func/cod" + ); + assert!( + elab.theory.signature.lookup_func("Base/Rel/type").is_some(), + "should have Base/Rel/type" + ); + assert!( + elab.theory.signature.lookup_func("Op/func").is_some(), + "should have Op/func" + ); + + // Should NOT have these wrong names (without Base/ prefix) + assert!( + elab.theory.signature.lookup_func("Func/dom").is_none(), + "should NOT have Func/dom (missing prefix)" + ); + assert!( + elab.theory.signature.lookup_func("Rel/type").is_none(), + "should NOT have Rel/type (missing prefix)" + ); + + // Verify Base/Func/dom has correct domain/codomain + let func_dom_id = elab.theory.signature.lookup_func("Base/Func/dom").unwrap(); + let func_dom_sym = &elab.theory.signature.functions[func_dom_id]; + let base_func_id = elab.theory.signature.lookup_sort("Base/Func").unwrap(); + let base_rel_id = elab.theory.signature.lookup_sort("Base/Rel").unwrap(); + assert_eq!( + func_dom_sym.domain, + DerivedSort::Base(base_func_id), + "Base/Func/dom domain should be Base/Func" + ); + assert_eq!( + func_dom_sym.codomain, + DerivedSort::Base(base_rel_id), + "Base/Func/dom codomain should be Base/Rel" + ); + + // Verify Op/func has correct domain/codomain + let op_func_id = elab.theory.signature.lookup_func("Op/func").unwrap(); + let op_func_sym = &elab.theory.signature.functions[op_func_id]; + let op_id = elab.theory.signature.lookup_sort("Op").unwrap(); + assert_eq!( + op_func_sym.domain, + DerivedSort::Base(op_id), + "Op/func domain should be Op" + ); + assert_eq!( + op_func_sym.codomain, + DerivedSort::Base(base_func_id), + "Op/func codomain should be Base/Func" + ); + } else { + panic!("expected theory"); + } +} diff --git a/tests/unit_meta.rs b/tests/unit_meta.rs new file mode 100644 index 0000000..52105a2 --- /dev/null +++ b/tests/unit_meta.rs @@ -0,0 +1,265 @@ +//! Unit tests for GeologMeta theory and structure conversion + +use geolog::core::{Context, DerivedSort, ElaboratedTheory, Formula, Sequent, Signature, Term, Theory}; +use geolog::meta::{geolog_meta, structure_to_theory, theory_to_structure}; +use geolog::naming::NamingIndex; +use geolog::universe::Universe; + +#[test] +fn test_theory_to_structure() { + // Create a simple theory + let mut sig = Signature::new(); + sig.add_sort("P".to_string()); + sig.add_sort("T".to_string()); + sig.add_function( + "src".to_string(), + DerivedSort::Base(1), // T + DerivedSort::Base(0), // P + ); + + let theory = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "PetriNet".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }; + + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&theory, &mut universe, &mut naming); + + // Check basic structure properties + assert!(!structure.is_empty()); + + // Check we have elements in the structure + // Should have: 1 Theory, 2 Srt, 1 Func, plus DSort/BaseDS elements + assert!( + structure.len() > 5, + "Expected more than 5 elements, got {}", + structure.len() + ); + + // Verify names were registered in naming index + assert!(naming.lookup_unique("PetriNet").is_some()); + assert!(naming.lookup_unique("P").is_some()); + assert!(naming.lookup_unique("T").is_some()); + assert!(naming.lookup_unique("src").is_some()); +} + +#[test] +fn test_geolog_meta_parses() { + // Just ensure GeologMeta itself can be loaded + let meta = geolog_meta(); + assert_eq!(meta.theory.name, "GeologMeta"); + + // Should have lots of sorts and functions (no Name sort anymore) + assert!( + meta.theory.signature.sorts.len() >= 25, + "Expected many sorts, got {}", + meta.theory.signature.sorts.len() + ); + assert!( + meta.theory.signature.functions.len() >= 40, + "Expected many functions, got {}", + meta.theory.signature.functions.len() + ); + assert!( + meta.theory.signature.relations.len() >= 3, + "Expected some relations" + ); +} + +#[test] +fn test_theory_roundtrip() { + // Create a theory with sorts, functions, and a relation + let mut sig = Signature::new(); + let p_id = sig.add_sort("P".to_string()); + let t_id = sig.add_sort("T".to_string()); + sig.add_function( + "src".to_string(), + DerivedSort::Base(t_id), + DerivedSort::Base(p_id), + ); + sig.add_function( + "tgt".to_string(), + DerivedSort::Base(t_id), + DerivedSort::Base(p_id), + ); + // Add a relation with record domain + sig.add_relation( + "enabled".to_string(), + DerivedSort::Product(vec![ + ("place".to_string(), DerivedSort::Base(p_id)), + ("trans".to_string(), DerivedSort::Base(t_id)), + ]), + ); + + let original = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "PetriNet".to_string(), + signature: sig, + axioms: vec![], + axiom_names: vec![], + }, + }; + + // Convert to structure + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&original, &mut universe, &mut naming); + + // Convert back + let reconstructed = + structure_to_theory(&structure, &universe, &naming).expect("roundtrip should succeed"); + + // Verify basic properties match + assert_eq!(reconstructed.theory.name, "PetriNet"); + assert_eq!(reconstructed.theory.signature.sorts.len(), 2); + assert_eq!(reconstructed.theory.signature.functions.len(), 2); + assert_eq!(reconstructed.theory.signature.relations.len(), 1); + + // Verify sort names + assert!(reconstructed.theory.signature.lookup_sort("P").is_some()); + assert!(reconstructed.theory.signature.lookup_sort("T").is_some()); + + // Verify function names + assert!(reconstructed.theory.signature.lookup_func("src").is_some()); + assert!(reconstructed.theory.signature.lookup_func("tgt").is_some()); + + // Verify relation name + assert!( + reconstructed + .theory + .signature + .lookup_rel("enabled") + .is_some() + ); +} + +#[test] +fn test_theory_roundtrip_with_axioms() { + // Create a preorder theory with reflexivity and transitivity axioms + let mut sig = Signature::new(); + let x_id = sig.add_sort("X".to_string()); + let x_sort = DerivedSort::Base(x_id); + + // Add a binary relation: leq : [x: X, y: X] -> Prop + let rel_domain = DerivedSort::Product(vec![ + ("x".to_string(), x_sort.clone()), + ("y".to_string(), x_sort.clone()), + ]); + let rel_id = sig.add_relation("leq".to_string(), rel_domain); + + // Reflexivity axiom: forall x:X. |- leq(x, x) + // Context: [x: X] + // Premise: True + // Conclusion: leq({x: x, y: x}) + let reflexivity = Sequent { + context: Context { + vars: vec![("x".to_string(), x_sort.clone())], + }, + premise: Formula::True, + conclusion: Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ]), + ), + }; + + // Transitivity axiom: forall x,y,z:X. leq(x,y), leq(y,z) |- leq(x,z) + // Context: [x: X, y: X, z: X] + // Premise: leq(x,y) ∧ leq(y,z) + // Conclusion: leq(x,z) + let transitivity = Sequent { + context: Context { + vars: vec![ + ("x".to_string(), x_sort.clone()), + ("y".to_string(), x_sort.clone()), + ("z".to_string(), x_sort.clone()), + ], + }, + premise: Formula::Conj(vec![ + Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("y".to_string(), x_sort.clone())), + ]), + ), + Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("y".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("z".to_string(), x_sort.clone())), + ]), + ), + ]), + conclusion: Formula::Rel( + rel_id, + Term::Record(vec![ + ("x".to_string(), Term::Var("x".to_string(), x_sort.clone())), + ("y".to_string(), Term::Var("z".to_string(), x_sort.clone())), + ]), + ), + }; + + let original = ElaboratedTheory { + params: vec![], + theory: Theory { + name: "Preorder".to_string(), + signature: sig, + axioms: vec![reflexivity, transitivity], + axiom_names: vec!["ax/refl".to_string(), "ax/trans".to_string()], + }, + }; + + // Convert to structure + let mut universe = Universe::new(); + let mut naming = NamingIndex::new(); + let structure = theory_to_structure(&original, &mut universe, &mut naming); + + // Convert back + let reconstructed = + structure_to_theory(&structure, &universe, &naming).expect("roundtrip should succeed"); + + // Verify basic properties match + assert_eq!(reconstructed.theory.name, "Preorder"); + assert_eq!(reconstructed.theory.signature.sorts.len(), 1); + assert_eq!(reconstructed.theory.signature.relations.len(), 1); + assert_eq!( + reconstructed.theory.axioms.len(), + 2, + "Expected 2 axioms, got {}", + reconstructed.theory.axioms.len() + ); + + // Verify sort name + assert!(reconstructed.theory.signature.lookup_sort("X").is_some()); + + // Verify relation name + assert!(reconstructed.theory.signature.lookup_rel("leq").is_some()); + + // Verify axiom names round-trip correctly + assert_eq!( + reconstructed.theory.axiom_names.len(), + 2, + "Expected 2 axiom names, got {}", + reconstructed.theory.axiom_names.len() + ); + assert!( + reconstructed.theory.axiom_names.contains(&"ax/refl".to_string()), + "Expected axiom names to contain 'ax/refl', got {:?}", + reconstructed.theory.axiom_names + ); + assert!( + reconstructed.theory.axiom_names.contains(&"ax/trans".to_string()), + "Expected axiom names to contain 'ax/trans', got {:?}", + reconstructed.theory.axiom_names + ); +} diff --git a/tests/unit_parsing.rs b/tests/unit_parsing.rs new file mode 100644 index 0000000..9abee67 --- /dev/null +++ b/tests/unit_parsing.rs @@ -0,0 +1,163 @@ +//! Unit tests for lexer and parser + +use chumsky::Parser; +use geolog::ast::Declaration; +use geolog::lexer::{Token, lexer}; +use geolog::parse; + +// ============================================================================ +// Lexer tests +// ============================================================================ + +#[test] +fn test_lex_simple() { + let input = "theory PetriNet { P : Sort; }"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Theory, + Token::Ident("PetriNet".to_string()), + Token::LBrace, + Token::Ident("P".to_string()), + Token::Colon, + Token::Sort, + Token::Semicolon, + Token::RBrace, + ] + ); +} + +#[test] +fn test_lex_arrow_and_turnstile() { + let input = "in -> out |- x = y"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Ident("in".to_string()), + Token::Arrow, + Token::Ident("out".to_string()), + Token::Turnstile, + Token::Ident("x".to_string()), + Token::Eq, + Token::Ident("y".to_string()), + ] + ); +} + +#[test] +fn test_lex_path() { + let input = "N/P W/src/arc"; + let result = lexer().parse(input); + assert!(result.is_ok()); + let tokens: Vec<_> = result.unwrap().into_iter().map(|(t, _)| t).collect(); + assert_eq!( + tokens, + vec![ + Token::Ident("N".to_string()), + Token::Slash, + Token::Ident("P".to_string()), + Token::Ident("W".to_string()), + Token::Slash, + Token::Ident("src".to_string()), + Token::Slash, + Token::Ident("arc".to_string()), + ] + ); +} + +// ============================================================================ +// Parser tests +// ============================================================================ + +#[test] +fn test_parse_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + assert_eq!(file.declarations.len(), 1); +} + +#[test] +fn test_parse_function_decl() { + let input = r#" +theory PetriNet { + P : Sort; + in : Sort; + src : in -> P; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); +} + +#[test] +fn test_parse_parameterized_theory() { + let input = r#" +theory (N : PetriNet instance) Marking { + token : Sort; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Theory(t) = &file.declarations[0].node { + assert_eq!(t.params.len(), 1); + assert_eq!(t.params[0].name, "N"); + } else { + panic!("Expected theory declaration"); + } +} + +#[test] +fn test_parse_instance() { + let input = r#" +instance ExampleNet : PetriNet = { + A : P; + B : P; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Instance(i) = &file.declarations[0].node { + assert_eq!(i.name, "ExampleNet"); + assert_eq!(i.body.len(), 2); + } else { + panic!("Expected instance declaration"); + } +} + +#[test] +fn test_parse_nested_instance() { + let input = r#" +instance problem0 : ExampleNet ReachabilityProblem = { + initial_marking = { + t : token; + }; + target_marking = { + t : token; + }; +} +"#; + let result = parse(input); + assert!(result.is_ok(), "Parse error: {:?}", result); + let file = result.unwrap(); + if let Declaration::Instance(i) = &file.declarations[0].node { + assert_eq!(i.name, "problem0"); + assert_eq!(i.body.len(), 2); + } else { + panic!("Expected instance declaration"); + } +} diff --git a/tests/unit_pretty.rs b/tests/unit_pretty.rs new file mode 100644 index 0000000..28fcf33 --- /dev/null +++ b/tests/unit_pretty.rs @@ -0,0 +1,36 @@ +//! Unit tests for pretty-printing roundtrips + +use geolog::parse; +use geolog::pretty::pretty_print; + +#[test] +fn test_roundtrip_simple_theory() { + let input = r#" +theory PetriNet { + P : Sort; + T : Sort; + src : in -> P; +} +"#; + let parsed = parse(input).expect("parse failed"); + let printed = pretty_print(&parsed); + let reparsed = parse(&printed).expect("reparse failed"); + + // Compare structure (ignoring spans) + assert_eq!(parsed.declarations.len(), reparsed.declarations.len()); +} + +#[test] +fn test_roundtrip_instance() { + let input = r#" +instance ExampleNet : PetriNet = { + A : P; + B : P; +} +"#; + let parsed = parse(input).expect("parse failed"); + let printed = pretty_print(&parsed); + let reparsed = parse(&printed).expect("reparse failed"); + + assert_eq!(parsed.declarations.len(), reparsed.declarations.len()); +} diff --git a/tests/unit_relations.rs b/tests/unit_relations.rs new file mode 100644 index 0000000..2ca3b27 --- /dev/null +++ b/tests/unit_relations.rs @@ -0,0 +1,183 @@ +//! Unit tests for relation storage + +use geolog::core::{RelationStorage, Structure, VecRelation}; +use geolog::id::{NumericId, Slid}; +use geolog::universe::Universe; +use geolog::serialize::{load_structure, save_structure}; +use tempfile::tempdir; + +/// Helper to create Slid from integer +fn slid(n: usize) -> Slid { + Slid::from_usize(n) +} + +#[test] +fn test_vec_relation_basic() { + let mut rel = VecRelation::new(2); + + // Insert a tuple + assert!(rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + + // Check containment + assert!(rel.contains(&[slid(0), slid(1)])); + assert!(!rel.contains(&[slid(1), slid(0)])); + assert!(!rel.contains(&[slid(0), slid(0)])); + + // Insert another tuple + assert!(rel.insert(vec![slid(1), slid(0)])); + assert_eq!(rel.len(), 2); + + // Duplicate insert returns false + assert!(!rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 2); +} + +#[test] +fn test_vec_relation_remove() { + let mut rel = VecRelation::new(2); + + rel.insert(vec![slid(0), slid(1)]); + rel.insert(vec![slid(1), slid(2)]); + assert_eq!(rel.len(), 2); + + // Remove existing tuple + assert!(rel.remove(&[slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + assert!(!rel.contains(&[slid(0), slid(1)])); + assert!(rel.contains(&[slid(1), slid(2)])); + + // Remove non-existent tuple + assert!(!rel.remove(&[slid(0), slid(1)])); + assert_eq!(rel.len(), 1); + + // Re-insert removed tuple (should reuse tuple ID) + assert!(rel.insert(vec![slid(0), slid(1)])); + assert_eq!(rel.len(), 2); + assert!(rel.contains(&[slid(0), slid(1)])); +} + +#[test] +fn test_vec_relation_iter() { + let mut rel = VecRelation::new(2); + + rel.insert(vec![slid(0), slid(1)]); + rel.insert(vec![slid(1), slid(2)]); + rel.insert(vec![slid(2), slid(3)]); + + let tuples: Vec<_> = rel.iter().collect(); + assert_eq!(tuples.len(), 3); + + // Remove middle tuple + rel.remove(&[slid(1), slid(2)]); + + let tuples: Vec<_> = rel.iter().collect(); + assert_eq!(tuples.len(), 2); +} + +#[test] +fn test_structure_relations() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements to two sorts + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (x, _) = structure.add_element(&mut universe, 1); + let (y, _) = structure.add_element(&mut universe, 1); + + // Initialize a binary relation (arity 2) + structure.init_relations(&[2]); + + // Assert some tuples + assert!(structure.assert_relation(0, vec![a, x])); + assert!(structure.assert_relation(0, vec![b, y])); + assert_eq!(structure.get_relation(0).len(), 2); + + // Query + assert!(structure.query_relation(0, &[a, x])); + assert!(!structure.query_relation(0, &[a, y])); + + // Retract + assert!(structure.retract_relation(0, &[a, x])); + assert!(!structure.query_relation(0, &[a, x])); +} + +#[test] +fn test_relation_serialization_roundtrip() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 0); + let (x, _) = structure.add_element(&mut universe, 1); + + // Initialize relation and add tuples + structure.init_relations(&[2]); + structure.assert_relation(0, vec![a, x]); + structure.assert_relation(0, vec![b, x]); + + // Serialize and deserialize via StructureData + let data = geolog::serialize::StructureData::from_structure(&structure); + let restored = data.to_structure(); + + // Check relation was preserved + assert_eq!(restored.num_relations(), 1); + assert_eq!(restored.get_relation(0).len(), 2); + assert!(restored.query_relation(0, &[a, x])); + assert!(restored.query_relation(0, &[b, x])); + assert!(!restored.query_relation(0, &[a, b])); +} + +#[test] +fn test_relation_file_roundtrip() { + let mut universe = Universe::new(); + let mut structure = Structure::new(2); + + // Add elements + let (a, _) = structure.add_element(&mut universe, 0); + let (b, _) = structure.add_element(&mut universe, 1); + + // Initialize relation and add tuples + structure.init_relations(&[2]); + structure.assert_relation(0, vec![a, b]); + + // Save to file + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + save_structure(&structure, &path).expect("save should succeed"); + + // Load from file + let loaded = load_structure(&path).expect("load should succeed"); + + // Check relation was preserved + assert_eq!(loaded.num_relations(), 1); + assert!(loaded.query_relation(0, &[a, b])); +} + +#[test] +fn test_unary_relation() { + let mut rel = VecRelation::new(1); + + rel.insert(vec![slid(42)]); + rel.insert(vec![slid(100)]); + + assert!(rel.contains(&[slid(42)])); + assert!(rel.contains(&[slid(100)])); + assert!(!rel.contains(&[slid(0)])); + assert_eq!(rel.len(), 2); +} + +#[test] +fn test_ternary_relation() { + let mut rel = VecRelation::new(3); + + rel.insert(vec![slid(1), slid(2), slid(3)]); + rel.insert(vec![slid(4), slid(5), slid(6)]); + + assert!(rel.contains(&[slid(1), slid(2), slid(3)])); + assert!(rel.contains(&[slid(4), slid(5), slid(6)])); + assert!(!rel.contains(&[slid(1), slid(2), slid(4)])); + assert_eq!(rel.len(), 2); +} diff --git a/tests/unit_version.rs b/tests/unit_version.rs new file mode 100644 index 0000000..79b7bfd --- /dev/null +++ b/tests/unit_version.rs @@ -0,0 +1,133 @@ +//! Unit tests for version control (commits, checkout, patches) + +use geolog::core::Structure; +use geolog::naming::NamingIndex; +use geolog::version::VersionedState; +use std::fs; +use std::path::PathBuf; +use tempfile::tempdir; + +fn temp_dir() -> PathBuf { + let dir = tempdir().unwrap(); + dir.keep() +} + +#[test] +fn test_new_versioned_state() { + let dir = temp_dir(); + let state = VersionedState::new(&dir); + assert!(state.head.is_none()); + assert_eq!(state.num_commits(), 0); + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_commit_and_checkout() { + let dir = temp_dir(); + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + // Create a structure using the state's universe + let mut s1 = Structure::new(2); + let (_, luid1) = s1.add_element(&mut state.universe, 0); + let (_, luid2) = s1.add_element(&mut state.universe, 1); + + // Register names + let uuid1 = state.universe.get(luid1).unwrap(); + let uuid2 = state.universe.get(luid2).unwrap(); + naming.insert(uuid1, vec!["foo".to_string()]); + naming.insert(uuid2, vec!["bar".to_string()]); + + // Commit it + let commit1 = state.commit(&s1, &naming).expect("commit should succeed"); + assert_eq!(state.num_commits(), 1); + assert_eq!(state.head, Some(commit1)); + + // Checkout and verify + let s1_checkout = state.checkout(commit1).expect("checkout should succeed"); + assert_eq!(s1_checkout.len(), 2); + + // Clean up + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_multiple_commits() { + let dir = temp_dir(); + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + // First commit + let mut s1 = Structure::new(2); + let (_, foo_luid) = s1.add_element(&mut state.universe, 0); + let foo_uuid = state.universe.get(foo_luid).unwrap(); + naming.insert(foo_uuid, vec!["foo".to_string()]); + let commit1 = state.commit(&s1, &naming).expect("commit 1"); + + // Second commit with more elements (preserving "foo" via its Luid) + let mut s2 = Structure::new(2); + s2.add_element_with_luid(foo_luid, 0); + let (_, bar_luid) = s2.add_element(&mut state.universe, 1); + let (_, baz_luid) = s2.add_element(&mut state.universe, 0); + + // Register names for new elements + let bar_uuid = state.universe.get(bar_luid).unwrap(); + let baz_uuid = state.universe.get(baz_luid).unwrap(); + naming.insert(bar_uuid, vec!["bar".to_string()]); + naming.insert(baz_uuid, vec!["baz".to_string()]); + + let commit2 = state.commit(&s2, &naming).expect("commit 2"); + + assert_eq!(state.num_commits(), 2); + + // Checkout first commit + let s1_checkout = state.checkout(commit1).expect("checkout commit1"); + assert_eq!(s1_checkout.len(), 1); + + // Checkout second commit + let s2_checkout = state.checkout(commit2).expect("checkout commit2"); + assert_eq!(s2_checkout.len(), 3); + + // List commits + let commits = state.list_commits(); + assert_eq!(commits.len(), 2); + assert_eq!(commits[0], commit1); + assert_eq!(commits[1], commit2); + + // Clean up + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn test_save_and_load_patches() { + let dir = temp_dir(); + + // Create state and commit + let commit_uuid; + { + let mut state = VersionedState::new(&dir); + let mut naming = NamingIndex::new(); + + let mut s = Structure::new(2); + let (_, foo_luid) = s.add_element(&mut state.universe, 0); + let foo_uuid = state.universe.get(foo_luid).unwrap(); + naming.insert(foo_uuid, vec!["foo".to_string()]); + + commit_uuid = state.commit(&s, &naming).expect("commit"); + } + + // Create new state and load + { + let mut state = VersionedState::new(&dir); + state.load_patches().expect("load patches"); + + assert_eq!(state.num_commits(), 1); + assert_eq!(state.head, Some(commit_uuid)); + + let s = state.checkout(commit_uuid).expect("checkout"); + assert_eq!(s.len(), 1); + } + + // Clean up + let _ = fs::remove_dir_all(&dir); +} diff --git a/tests/unit_workspace.rs b/tests/unit_workspace.rs new file mode 100644 index 0000000..ffaebba --- /dev/null +++ b/tests/unit_workspace.rs @@ -0,0 +1,68 @@ +//! Unit tests for structure serialization +//! +//! Tests for save/load functionality in the serialize module. + +use geolog::core::Structure; +use geolog::elaborate::InstanceEntry; +use geolog::id::{NumericId, Slid}; +use geolog::serialize::{load_structure, save_structure, StructureData}; +use geolog::universe::Universe; +use tempfile::tempdir; + +#[test] +fn test_structure_roundtrip() { + let mut universe = Universe::new(); + + let mut structure = Structure::new(2); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + + let data = StructureData::from_structure(&structure); + let restored = data.to_structure(); + + assert_eq!(restored.len(), 3); + assert_eq!(restored.num_sorts(), 2); +} + +#[test] +fn test_save_load_structure() { + let mut universe = Universe::new(); + + let mut structure = Structure::new(2); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 1); + + let dir = tempdir().unwrap(); + let path = dir.path().join("test.structure"); + save_structure(&structure, &path).expect("save should succeed"); + + let loaded = load_structure(&path).expect("load should succeed"); + + assert_eq!(loaded.len(), 2); + assert_eq!(loaded.num_sorts(), 2); +} + +#[test] +fn test_instance_entry_element_management() { + let mut universe = Universe::new(); + + // Create a simple structure + let mut structure = Structure::new(1); + structure.add_element(&mut universe, 0); + structure.add_element(&mut universe, 0); + + // Create instance entry + let mut entry = InstanceEntry::new(structure, "TestTheory".to_string(), "TestTheory".to_string()); + entry.register_element("a".to_string(), Slid::from_usize(0)); + entry.register_element("b".to_string(), Slid::from_usize(1)); + + // Verify element lookup works + assert_eq!(entry.get_element("a"), Some(Slid::from_usize(0))); + assert_eq!(entry.get_element("b"), Some(Slid::from_usize(1))); + assert_eq!(entry.get_element("c"), None); + + // Verify reverse lookup works + assert_eq!(entry.get_name(Slid::from_usize(0)), Some("a")); + assert_eq!(entry.get_name(Slid::from_usize(1)), Some("b")); +} diff --git a/theories/GeologMeta.geolog b/theories/GeologMeta.geolog new file mode 100644 index 0000000..92fa9ba --- /dev/null +++ b/theories/GeologMeta.geolog @@ -0,0 +1,400 @@ +// GeologMeta: A homoiconic representation of geolog theories +// +// An instance of GeologMeta IS a collection of geolog theories, complete with +// signatures, axioms, and well-formedness constraints. +// +// Key design principles: +// - All elements identified by UUID; human-readable names in separate NamingIndex +// - Child pointers go from parent to children (no products in domains) +// - Binding uses upward pointers from variable to binder +// - Transitive closure (ancestor) via Datalog-style axioms +// - Srt/theory, Func/theory enables multi-theory instances and theory parameters +// +// Naming convention: DomainSort/descriptor +// - Embeddings: VarT/term, EqF/formula (target sort) +// - Parent pointers: Srt/theory, Field/prod (container sort) +// - Projections: EqF/lhs, ProjT/field (field reference) + +theory GeologMeta { + + // ============================================================ + // THEORIES + // ============================================================ + + Theory : Sort; + + // Theory parameters: (N : PetriNet) means N is a Param + Param : Sort; + Param/theory : Param -> Theory; // which theory has this parameter + Param/type : Param -> Theory; // must instantiate this theory + + // ============================================================ + // SORTS (renamed to Srt to avoid keyword conflict) + // ============================================================ + + Srt : Sort; + Srt/theory : Srt -> Theory; + + // ============================================================ + // DERIVED SORTS (Base | Product) + // ============================================================ + + DSort : Sort; + + // Base case: wraps a Sort + BaseDS : Sort; + BaseDS/dsort : BaseDS -> DSort; + BaseDS/srt : BaseDS -> Srt; + + // Product case: [x: A, y: B, ...] + ProdDS : Sort; + ProdDS/dsort : ProdDS -> DSort; + + // Product fields (recursive: field type is DSort) + Field : Sort; + Field/prod : Field -> ProdDS; + Field/type : Field -> DSort; + + // ============================================================ + // FUNCTION SYMBOLS + // ============================================================ + + Func : Sort; + Func/theory : Func -> Theory; + Func/dom : Func -> DSort; + Func/cod : Func -> DSort; + + // ============================================================ + // RELATION SYMBOLS (predicates, no codomain) + // ============================================================ + + Rel : Sort; + Rel/theory : Rel -> Theory; + Rel/dom : Rel -> DSort; + + // ============================================================ + // BINDERS (for variable scoping) + // ============================================================ + // Variables point UP to their binder. Binders are introduced by + // Exists quantifiers or context variables. + + Binder : Sort; + Binder/type : Binder -> DSort; + + // ============================================================ + // TERMS + // ============================================================ + + Term : Sort; + + // Variable reference (points to binder) + VarT : Sort; + VarT/term : VarT -> Term; + VarT/binder : VarT -> Binder; // UPWARD pointer to introducing binder + + // Function application (unary - argument may be a record) + AppT : Sort; + AppT/term : AppT -> Term; + AppT/func : AppT -> Func; + AppT/arg : AppT -> Term; + + // Record construction [x = t1, y = t2, ...] + RecordT : Sort; + RecordT/term : RecordT -> Term; + + RecEntry : Sort; + RecEntry/record : RecEntry -> RecordT; + RecEntry/val : RecEntry -> Term; + RecEntry/field : RecEntry -> Field; // which field this entry is for + + // Projection t.field + ProjT : Sort; + ProjT/term : ProjT -> Term; + ProjT/base : ProjT -> Term; + ProjT/field : ProjT -> Field; // which field to project + + // ============================================================ + // FORMULAS + // ============================================================ + + Formula : Sort; + + // Relation application `t R` + RelF : Sort; + RelF/formula : RelF -> Formula; + RelF/arg : RelF -> Term; + RelF/rel : RelF -> Rel; + + // Truth + TrueF : Sort; + TrueF/formula : TrueF -> Formula; + + // Falsity + FalseF : Sort; + FalseF/formula : FalseF -> Formula; + + // Equality t1 = t2 + EqF : Sort; + EqF/formula : EqF -> Formula; + EqF/lhs : EqF -> Term; + EqF/rhs : EqF -> Term; + + // Conjunction (n-ary via arms) + ConjF : Sort; + ConjF/formula : ConjF -> Formula; + + ConjArm : Sort; + ConjArm/conj : ConjArm -> ConjF; + ConjArm/child : ConjArm -> Formula; + + // Disjunction (n-ary via arms) + DisjF : Sort; + DisjF/formula : DisjF -> Formula; + + DisjArm : Sort; + DisjArm/disj : DisjArm -> DisjF; + DisjArm/child : DisjArm -> Formula; + + // Existential quantification + ExistsF : Sort; + ExistsF/formula : ExistsF -> Formula; + ExistsF/binder : ExistsF -> Binder; // introduces this binder + ExistsF/body : ExistsF -> Formula; + + // ============================================================ + // SEQUENTS (axioms) + // ============================================================ + + Sequent : Sort; + Sequent/theory : Sequent -> Theory; + Sequent/premise : Sequent -> Formula; + Sequent/conclusion : Sequent -> Formula; + + // Context variables (universal quantification at sequent level) + CtxVar : Sort; + CtxVar/sequent : CtxVar -> Sequent; + CtxVar/binder : CtxVar -> Binder; // introduces this binder + + // ============================================================ + // NODE UNIVERSE (for ancestry/scoping) + // ============================================================ + // Unified sort for tracking parent-child in formula trees + + Node : Sort; + Term/node : Term -> Node; + Formula/node : Formula -> Node; + + // ============================================================ + // CHILD RELATION + // ============================================================ + // child(p, c) means c is an immediate child of p in the AST + + child : [parent: Node, child: Node] -> Prop; + + // ============================================================ + // ANCESTOR RELATION (transitive closure of child) + // ============================================================ + + ancestor : [anc: Node, desc: Node] -> Prop; + + // Datalog-style transitive closure axioms + ax/anc/base : forall p : Node, c : Node. + [parent: p, child: c] child |- [anc: p, desc: c] ancestor; + + ax/anc/step : forall a : Node, p : Node, c : Node. + [anc: a, desc: p] ancestor, [parent: p, child: c] child |- [anc: a, desc: c] ancestor; + + // ============================================================ + // CHILD AXIOMS (populate child from structure) + // ============================================================ + + // EqF children + ax/child/eq/lhs : forall e : EqF, t : Term. + e EqF/lhs = t |- [parent: e EqF/formula Formula/node, child: t Term/node] child; + ax/child/eq/rhs : forall e : EqF, t : Term. + e EqF/rhs = t |- [parent: e EqF/formula Formula/node, child: t Term/node] child; + + // ExistsF body + ax/child/exists : forall e : ExistsF, f : Formula. + e ExistsF/body = f |- [parent: e ExistsF/formula Formula/node, child: f Formula/node] child; + + // ConjF arms + ax/child/conj : forall a : ConjArm, c : ConjF, f : Formula. + a ConjArm/conj = c, a ConjArm/child = f |- + [parent: c ConjF/formula Formula/node, child: f Formula/node] child; + + // DisjF arms + ax/child/disj : forall a : DisjArm, d : DisjF, f : Formula. + a DisjArm/disj = d, a DisjArm/child = f |- + [parent: d DisjF/formula Formula/node, child: f Formula/node] child; + + // RelF argument + ax/child/rel : forall r : RelF, t : Term. + r RelF/arg = t |- [parent: r RelF/formula Formula/node, child: t Term/node] child; + + // AppT argument + ax/child/app : forall a : AppT, t : Term. + a AppT/arg = t |- [parent: a AppT/term Term/node, child: t Term/node] child; + + // ProjT base + ax/child/proj : forall p : ProjT, t : Term. + p ProjT/base = t |- [parent: p ProjT/term Term/node, child: t Term/node] child; + + // RecEntry value + ax/child/rec : forall e : RecEntry, r : RecordT, t : Term. + e RecEntry/record = r, e RecEntry/val = t |- + [parent: r RecordT/term Term/node, child: t Term/node] child; + + // ============================================================ + // IN-SEQUENT RELATION (for context variable scoping) + // ============================================================ + + in_seq : [node: Node, seq: Sequent] -> Prop; + + ax/in_seq/premise : forall s : Sequent, f : Formula. + s Sequent/premise = f |- [node: f Formula/node, seq: s] in_seq; + + ax/in_seq/conclusion : forall s : Sequent, f : Formula. + s Sequent/conclusion = f |- [node: f Formula/node, seq: s] in_seq; + + ax/in_seq/desc : forall n : Node, m : Node, s : Sequent. + [node: n, seq: s] in_seq, [anc: n, desc: m] ancestor |- [node: m, seq: s] in_seq; + + // ============================================================ + // BINDING WELL-FORMEDNESS CONSTRAINTS + // ============================================================ + // These axioms ensure variables point to valid binders. + // An instance satisfies these iff scoping is correct. + + // Exists-bound: binder's exists must be an ancestor of the var + ax/wf/exists : forall v : VarT, b : Binder, e : ExistsF. + v VarT/binder = b, e ExistsF/binder = b |- + [anc: e ExistsF/formula Formula/node, desc: v VarT/term Term/node] ancestor; + + // Context-bound: var must be in the same sequent as the ctx var + ax/wf/ctx : forall v : VarT, b : Binder, cv : CtxVar, s : Sequent. + v VarT/binder = b, cv CtxVar/binder = b, cv CtxVar/sequent = s |- + [node: v VarT/term Term/node, seq: s] in_seq; + + // ============================================================ + // COMMITS (version control checkpoints) + // ============================================================ + // Commits form a DAG. Each commit represents a point-in-time + // snapshot of all name bindings. + + Commit : Sort; + Commit/parent : Commit -> Commit; // previous commit (optional for initial) + // Note: For merge commits, we'd need a relation for multiple parents + + // ============================================================ + // NAME BINDINGS (mutable pointers via append-only log) + // ============================================================ + // A NameBinding records that, as of a given commit, a name + // points to a specific theory or instance version. + // + // Names are strings stored in NamingIndex (by UUID). + // The "current" binding for a name is the most recent one + // reachable from HEAD commit. + + NameBinding : Sort; + NameBinding/commit : NameBinding -> Commit; // when this binding was made + + // What the name points to (exactly one of these is defined): + NameBinding/theory : NameBinding -> Theory; + NameBinding/instance : NameBinding -> Instance; + + // ============================================================ + // INSTANCES (immutable, patch-based versioning) + // ============================================================ + // An Instance is an immutable snapshot. "Modifying" an instance + // creates a new Instance with parent pointer and delta. + // + // To materialize: chase parent chain, union additions, apply retractions. + + Instance : Sort; + Instance/parent : Instance -> Instance; // base version (optional for v0) + Instance/theory : Instance -> Theory; // which theory this instantiates + + // ============================================================ + // INSTANCE ELEMENTS (delta: additions) + // ============================================================ + // Elements added in a specific instance version. + // The actual UUID is tracked via the element's Luid in Universe. + + Elem : Sort; + Elem/instance : Elem -> Instance; // which version introduced this + Elem/sort : Elem -> Srt; // which sort of the theory + + // ============================================================ + // ELEMENT RETRACTIONS (delta: tombstones) + // ============================================================ + // Marks an element as retracted in a specific version. + // The element still exists in the log, but is filtered from materialized view. + + ElemRetract : Sort; + ElemRetract/instance : ElemRetract -> Instance; // which version retracted + ElemRetract/elem : ElemRetract -> Elem; // what was retracted + + // ============================================================ + // FUNCTION VALUES (delta: additions) + // ============================================================ + // Records a function value: func(arg) = result + + FuncVal : Sort; + FuncVal/instance : FuncVal -> Instance; // which version defined this + FuncVal/func : FuncVal -> Func; // which function + FuncVal/arg : FuncVal -> Elem; // domain element (or product elem) + FuncVal/result : FuncVal -> Elem; // codomain element + + // NOTE: No FuncValRetract sort - function values are IMMUTABLE. + // To "change" a function value, retract the domain element and create a new one. + // This ensures the Monotonic Submodel Property (see incremental_index_design.md). + + // ============================================================ + // RELATION TUPLES (delta: additions) + // ============================================================ + // Records a relation tuple: rel(args...) holds + // + // All relations use product-domain encoding uniformly (even unary). + // Each tuple has RelTupleArg entries for each position in the domain. + + RelTuple : Sort; + RelTuple/instance : RelTuple -> Instance; // which version asserted this + RelTuple/rel : RelTuple -> Rel; // which relation + + // Relation tuple argument components (one per domain field) + RelTupleArg : Sort; + RelTupleArg/tuple : RelTupleArg -> RelTuple; // which tuple this belongs to + RelTupleArg/elem : RelTupleArg -> Elem; // element value for this position + RelTupleArg/position : RelTupleArg -> Field; // which field of the domain product + + // NOTE: No RelTupleRetract sort - relation tuples are IMMUTABLE. + // Relations are boolean-valued functions: R(a,b) is defined at element creation time. + // To "change" a relation value, retract the involved elements and create new ones. + // This ensures the Monotonic Submodel Property (see incremental_index_design.md). + + // ============================================================ + // THEORY VERSIONING (same pattern as instances) + // ============================================================ + // Theories are also immutable and patch-based. + // Theory/parent allows incremental theory extension. + + Theory/parent : Theory -> Theory; // base version (optional for v0) + + // Theory element retractions (for removing sorts/funcs/rels) + SrtRetract : Sort; + SrtRetract/theory : SrtRetract -> Theory; + SrtRetract/srt : SrtRetract -> Srt; + + FuncRetract : Sort; + FuncRetract/theory : FuncRetract -> Theory; + FuncRetract/func : FuncRetract -> Func; + + RelRetract : Sort; + RelRetract/theory : RelRetract -> Theory; + RelRetract/rel : RelRetract -> Rel; + + SequentRetract : Sort; + SequentRetract/theory : SequentRetract -> Theory; + SequentRetract/sequent : SequentRetract -> Sequent; +} diff --git a/theories/RelAlgIR.geolog b/theories/RelAlgIR.geolog new file mode 100644 index 0000000..c54e7b2 --- /dev/null +++ b/theories/RelAlgIR.geolog @@ -0,0 +1,592 @@ +// RelAlgIR: String Diagram IR for Relational Algebra +// +// Query plans are instances of this theory. The string diagram structure: +// - Wire elements are edges (carrying typed data streams) +// - Op elements are boxes (transforming data) +// - Composition is implicit via wire sharing (same Wire as output of one Op and input of another) +// - Cycles are allowed; well-formedness axioms ensure they contain delays +// +// See loose_thoughts/2026-01-19_19:45_relalg_ir_design.md for full design. +// +// This theory extends GeologMeta to get Srt, Func, Elem, etc. +// References use qualified names: GeologMeta/Srt, GeologMeta/Func, etc. + +theory RelAlgIR extends GeologMeta { + + // ============================================================ + // SCHEMAS (types of data on wires) + // ============================================================ + // Schemas describe the "shape" of tuples flowing on a wire. + // They mirror DSort but are specific to the relational algebra context. + + Schema : Sort; + + // Unit schema: empty tuple (for sources with no input) + UnitSchema : Sort; + UnitSchema/schema : UnitSchema -> Schema; + + // Base schema: single column of a given sort + BaseSchema : Sort; + BaseSchema/schema : BaseSchema -> Schema; + BaseSchema/srt : BaseSchema -> GeologMeta/Srt; + + // Product schema: S ⊗ T (concatenation of columns) + ProdSchema : Sort; + ProdSchema/schema : ProdSchema -> Schema; + ProdSchema/left : ProdSchema -> Schema; + ProdSchema/right : ProdSchema -> Schema; + + // ============================================================ + // WIRES (edges in the string diagram) + // ============================================================ + // Wires are first-class citizens. Each wire carries a stream of + // tuples with a given schema. Composition is encoded by the same + // Wire appearing as output of one Op and input of another. + + Wire : Sort; + Wire/schema : Wire -> Schema; + + // ============================================================ + // OPERATIONS (boxes in the string diagram) + // ============================================================ + + Op : Sort; + + // ------------------------------------------------------------ + // Sources (no input wires) + // ------------------------------------------------------------ + + // Scan: emit all elements of a sort + // () → BaseSchema(srt) + ScanOp : Sort; + ScanOp/op : ScanOp -> Op; + ScanOp/srt : ScanOp -> GeologMeta/Srt; + ScanOp/out : ScanOp -> Wire; + + // Constant: emit a single known element + // () → BaseSchema(elem's sort) + ConstOp : Sort; + ConstOp/op : ConstOp -> Op; + ConstOp/elem : ConstOp -> GeologMeta/Elem; + ConstOp/out : ConstOp -> Wire; + + // Empty: emit nothing (identity for union) + // () → S + EmptyOp : Sort; + EmptyOp/op : EmptyOp -> Op; + EmptyOp/out : EmptyOp -> Wire; + + // ------------------------------------------------------------ + // Unary operations (one input wire, one output wire) + // ------------------------------------------------------------ + + // Filter: keep tuples satisfying a predicate + // S → S + FilterOp : Sort; + FilterOp/op : FilterOp -> Op; + FilterOp/in : FilterOp -> Wire; + FilterOp/out : FilterOp -> Wire; + FilterOp/pred : FilterOp -> Pred; + + // Project: select and reorder columns + // S → T + ProjectOp : Sort; + ProjectOp/op : ProjectOp -> Op; + ProjectOp/in : ProjectOp -> Wire; + ProjectOp/out : ProjectOp -> Wire; + ProjectOp/mapping : ProjectOp -> ProjMapping; + + // Distinct: deduplicate tuples (collapse multiplicities to 0/1) + // S → S + DistinctOp : Sort; + DistinctOp/op : DistinctOp -> Op; + DistinctOp/in : DistinctOp -> Wire; + DistinctOp/out : DistinctOp -> Wire; + + // Negate: flip multiplicities (for computing differences) + // S → S + NegateOp : Sort; + NegateOp/op : NegateOp -> Op; + NegateOp/in : NegateOp -> Wire; + NegateOp/out : NegateOp -> Wire; + + // Apply function: add a column by applying a function + // S → S ⊗ BaseSchema(cod) + ApplyOp : Sort; + ApplyOp/op : ApplyOp -> Op; + ApplyOp/in : ApplyOp -> Wire; + ApplyOp/out : ApplyOp -> Wire; + ApplyOp/func : ApplyOp -> GeologMeta/Func; + ApplyOp/arg_col : ApplyOp -> ColRef; + + // ------------------------------------------------------------ + // Binary operations (two input wires, one output wire) + // ------------------------------------------------------------ + + // Join: combine tuples from two sources where condition holds + // S × T → S ⊗ T (filtered) + JoinOp : Sort; + JoinOp/op : JoinOp -> Op; + JoinOp/left_in : JoinOp -> Wire; + JoinOp/right_in : JoinOp -> Wire; + JoinOp/out : JoinOp -> Wire; + JoinOp/cond : JoinOp -> JoinCond; + + // Union: combine tuples from two sources (Z-set addition) + // S × S → S + UnionOp : Sort; + UnionOp/op : UnionOp -> Op; + UnionOp/left_in : UnionOp -> Wire; + UnionOp/right_in : UnionOp -> Wire; + UnionOp/out : UnionOp -> Wire; + + // ------------------------------------------------------------ + // DBSP Temporal Operators + // ------------------------------------------------------------ + // These operate on streams over discrete time. + // They are essential for incremental computation and feedback loops. + + // Delay: z⁻¹, output at time t is input at time t-1 + // S → S + // IMPORTANT: Delays break instantaneous cycles, making feedback well-founded. + DelayOp : Sort; + DelayOp/op : DelayOp -> Op; + DelayOp/in : DelayOp -> Wire; + DelayOp/out : DelayOp -> Wire; + + // Differentiate: δ = 1 - z⁻¹, compute changes since last timestep + // S → S (output is the delta/diff of input) + DiffOp : Sort; + DiffOp/op : DiffOp -> Op; + DiffOp/in : DiffOp -> Wire; + DiffOp/out : DiffOp -> Wire; + + // Integrate: ∫ = Σ, accumulate all inputs over time + // S → S (output is running sum of all inputs) + // NOTE: Has implicit delay semantics, also breaks instantaneous cycles. + IntegrateOp : Sort; + IntegrateOp/op : IntegrateOp -> Op; + IntegrateOp/in : IntegrateOp -> Wire; + IntegrateOp/out : IntegrateOp -> Wire; + + // ============================================================ + // PREDICATES (for filter conditions) + // ============================================================ + + Pred : Sort; + + // True: always satisfied + TruePred : Sort; + TruePred/pred : TruePred -> Pred; + + // False: never satisfied + FalsePred : Sort; + FalsePred/pred : FalsePred -> Pred; + + // Column equality: col_i = col_j + ColEqPred : Sort; + ColEqPred/pred : ColEqPred -> Pred; + ColEqPred/left : ColEqPred -> ColRef; + ColEqPred/right : ColEqPred -> ColRef; + + // Constant equality: col = constant element + ConstEqPred : Sort; + ConstEqPred/pred : ConstEqPred -> Pred; + ConstEqPred/col : ConstEqPred -> ColRef; + ConstEqPred/val : ConstEqPred -> GeologMeta/Elem; + + // Function result equality: f(col_arg) = col_result + FuncEqPred : Sort; + FuncEqPred/pred : FuncEqPred -> Pred; + FuncEqPred/func : FuncEqPred -> GeologMeta/Func; + FuncEqPred/arg : FuncEqPred -> ColRef; + FuncEqPred/result : FuncEqPred -> ColRef; + + // Function result equals constant: f(col_arg) = expected_elem + FuncConstEqPred : Sort; + FuncConstEqPred/pred : FuncConstEqPred -> Pred; + FuncConstEqPred/func : FuncConstEqPred -> GeologMeta/Func; + FuncConstEqPred/arg : FuncConstEqPred -> ColRef; + FuncConstEqPred/expected : FuncConstEqPred -> GeologMeta/Elem; + + // Conjunction: p ∧ q + AndPred : Sort; + AndPred/pred : AndPred -> Pred; + AndPred/left : AndPred -> Pred; + AndPred/right : AndPred -> Pred; + + // Disjunction: p ∨ q + OrPred : Sort; + OrPred/pred : OrPred -> Pred; + OrPred/left : OrPred -> Pred; + OrPred/right : OrPred -> Pred; + + // ============================================================ + // JOIN CONDITIONS + // ============================================================ + + JoinCond : Sort; + + // Equijoin: left.col_i = right.col_j + EquiJoinCond : Sort; + EquiJoinCond/cond : EquiJoinCond -> JoinCond; + EquiJoinCond/left_col : EquiJoinCond -> ColRef; + EquiJoinCond/right_col : EquiJoinCond -> ColRef; + + // Cross join: cartesian product (no condition) + CrossJoinCond : Sort; + CrossJoinCond/cond : CrossJoinCond -> JoinCond; + + // General predicate join + PredJoinCond : Sort; + PredJoinCond/cond : PredJoinCond -> JoinCond; + PredJoinCond/pred : PredJoinCond -> Pred; + + // ============================================================ + // COLUMN REFERENCES + // ============================================================ + // References to specific columns within a schema. + // Used in predicates and projections. + + ColRef : Sort; + ColRef/wire : ColRef -> Wire; // which wire's schema we're referencing + ColRef/path : ColRef -> ColPath; // path into the schema + + // Column path: navigate into nested product schemas + ColPath : Sort; + + // Here: we're at the target + HerePath : Sort; + HerePath/path : HerePath -> ColPath; + + // Left: descend into left of product + LeftPath : Sort; + LeftPath/path : LeftPath -> ColPath; + LeftPath/rest : LeftPath -> ColPath; + + // Right: descend into right of product + RightPath : Sort; + RightPath/path : RightPath -> ColPath; + RightPath/rest : RightPath -> ColPath; + + // ============================================================ + // PROJECTION MAPPINGS + // ============================================================ + // Specifies how to construct output columns from input columns. + + ProjMapping : Sort; + + // Projection entries (which input columns become which output columns) + ProjEntry : Sort; + ProjEntry/mapping : ProjEntry -> ProjMapping; + ProjEntry/source : ProjEntry -> ColRef; + ProjEntry/target_path : ProjEntry -> ColPath; + + // ============================================================ + // REACHABILITY RELATIONS (for cycle analysis) + // ============================================================ + + // w1 reaches w2 via some path through operations + reaches : [from: Wire, to: Wire] -> Prop; + + // Reachability through each operation type + ax/reaches/scan : forall s : ScanOp, w : Wire. + s ScanOp/out = w |- [from: w, to: w] reaches; // trivial self-reach for source + + ax/reaches/filter : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/project : forall p : ProjectOp, w1 : Wire, w2 : Wire. + p ProjectOp/in = w1, p ProjectOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/distinct : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/negate : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/apply : forall a : ApplyOp, w1 : Wire, w2 : Wire. + a ApplyOp/in = w1, a ApplyOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/join/left : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/left_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/join/right : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/right_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/union/left : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/left_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/union/right : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/right_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/delay : forall d : DelayOp, w1 : Wire, w2 : Wire. + d DelayOp/in = w1, d DelayOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/diff : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- [from: w1, to: w2] reaches; + + ax/reaches/integrate : forall i : IntegrateOp, w1 : Wire, w2 : Wire. + i IntegrateOp/in = w1, i IntegrateOp/out = w2 |- [from: w1, to: w2] reaches; + + // Transitive closure + ax/reaches/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [from: w1, to: w2] reaches, [from: w2, to: w3] reaches |- + [from: w1, to: w3] reaches; + + // ============================================================ + // INSTANTANEOUS REACHABILITY (paths without delay) + // ============================================================ + // This relation tracks paths that do NOT go through DelayOp or IntegrateOp. + // Used to detect "bad" feedback loops that would require instantaneous computation. + + reaches_instant : [from: Wire, to: Wire] -> Prop; + + // Same axioms as reaches, EXCEPT for DelayOp and IntegrateOp + ax/reaches_instant/filter : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/project : forall p : ProjectOp, w1 : Wire, w2 : Wire. + p ProjectOp/in = w1, p ProjectOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/distinct : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/negate : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/apply : forall a : ApplyOp, w1 : Wire, w2 : Wire. + a ApplyOp/in = w1, a ApplyOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/join/left : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/left_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/join/right : forall j : JoinOp, w1 : Wire, w2 : Wire. + j JoinOp/right_in = w1, j JoinOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/union/left : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/left_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + ax/reaches_instant/union/right : forall u : UnionOp, w1 : Wire, w2 : Wire. + u UnionOp/right_in = w1, u UnionOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + // NOTE: No axioms for DelayOp or IntegrateOp! + // They break instantaneous reachability. + + // DiffOp is instantaneous (it uses delay internally but outputs immediately) + ax/reaches_instant/diff : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- [from: w1, to: w2] reaches_instant; + + // Transitive closure + ax/reaches_instant/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [from: w1, to: w2] reaches_instant, [from: w2, to: w3] reaches_instant |- + [from: w1, to: w3] reaches_instant; + + // ============================================================ + // WELL-FORMEDNESS: NO INSTANTANEOUS CYCLES + // ============================================================ + // Every cycle must contain at least one DelayOp or IntegrateOp. + // This ensures feedback loops are computable via iteration. + + ax/wf/no_instant_cycle : forall w : Wire. + [from: w, to: w] reaches_instant |- false; + + // ============================================================ + // WELL-FORMEDNESS: SCHEMA CONSISTENCY + // ============================================================ + // Operations must connect wires with compatible schemas. + + // Filter preserves schema + ax/wf/filter_schema : forall f : FilterOp, w1 : Wire, w2 : Wire. + f FilterOp/in = w1, f FilterOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Distinct preserves schema + ax/wf/distinct_schema : forall d : DistinctOp, w1 : Wire, w2 : Wire. + d DistinctOp/in = w1, d DistinctOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Negate preserves schema + ax/wf/negate_schema : forall n : NegateOp, w1 : Wire, w2 : Wire. + n NegateOp/in = w1, n NegateOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Delay preserves schema + ax/wf/delay_schema : forall d : DelayOp, w1 : Wire, w2 : Wire. + d DelayOp/in = w1, d DelayOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Diff preserves schema + ax/wf/diff_schema : forall d : DiffOp, w1 : Wire, w2 : Wire. + d DiffOp/in = w1, d DiffOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Integrate preserves schema + ax/wf/integrate_schema : forall i : IntegrateOp, w1 : Wire, w2 : Wire. + i IntegrateOp/in = w1, i IntegrateOp/out = w2 |- + w1 Wire/schema = w2 Wire/schema; + + // Union requires same schema on both inputs + ax/wf/union_schema_left : forall u : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wo |- + wl Wire/schema = wo Wire/schema; + + ax/wf/union_schema_right : forall u : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wo |- + wr Wire/schema = wo Wire/schema; + + // Scan output schema must be BaseSchema of the scanned sort + // (This requires existential in conclusion, which geometric logic supports) + ax/wf/scan_schema : forall s : ScanOp, srt : GeologMeta/Srt, w : Wire. + s ScanOp/srt = srt, s ScanOp/out = w |- + exists bs : BaseSchema. bs BaseSchema/srt = srt, w Wire/schema = bs BaseSchema/schema; + + // Join output schema is product of input schemas + ax/wf/join_schema : forall j : JoinOp, wl : Wire, wr : Wire, wo : Wire. + j JoinOp/left_in = wl, j JoinOp/right_in = wr, j JoinOp/out = wo |- + exists ps : ProdSchema. + ps ProdSchema/left = wl Wire/schema, + ps ProdSchema/right = wr Wire/schema, + wo Wire/schema = ps ProdSchema/schema; + + // ============================================================ + // ALGEBRAIC LAWS (for query optimization) + // ============================================================ + // These axioms express equivalences between query plans. + // An optimizer uses these to transform plans into more efficient forms. + // + // Notation: We describe semantic equivalence between operators. + // In practice, equivalence means the output wire produces the same Z-set. + // + // These are stated as properties (Prop-valued relations) rather than + // equational axioms, since geolog's geometric logic doesn't have + // built-in equality on terms. The optimizer interprets these as rewrite rules. + + // Wire equivalence: two wires produce the same Z-set + equiv : [a: Wire, b: Wire] -> Prop; + + // Reflexivity + ax/equiv/refl : forall w : Wire. + |- [a: w, b: w] equiv; + + // Symmetry + ax/equiv/sym : forall w1 : Wire, w2 : Wire. + [a: w1, b: w2] equiv |- [a: w2, b: w1] equiv; + + // Transitivity + ax/equiv/trans : forall w1 : Wire, w2 : Wire, w3 : Wire. + [a: w1, b: w2] equiv, [a: w2, b: w3] equiv |- [a: w1, b: w3] equiv; + + // ------------------------------------------------------------ + // Filter Laws + // ------------------------------------------------------------ + + // Filter(True, x) ≡ x + ax/filter_true : forall f : FilterOp, t : TruePred, wi : Wire, wo : Wire. + f FilterOp/pred = t TruePred/pred, f FilterOp/in = wi, f FilterOp/out = wo |- + [a: wo, b: wi] equiv; + + // Filter(False, x) ≡ Empty + // (Every tuple is filtered out, result is empty) + // This would need EmptyOp with matching schema; omitted for simplicity. + + // Filter-Filter Fusion: Filter(p, Filter(q, x)) ≡ Filter(p ∧ q, x) + // Expressed as: If f2.in = f1.out, then there exists a fused filter. + ax/filter_fusion : forall f1 : FilterOp, f2 : FilterOp, + p1 : Pred, p2 : Pred, + wi : Wire, wm : Wire, wo : Wire. + f1 FilterOp/in = wi, f1 FilterOp/out = wm, f1 FilterOp/pred = p1, + f2 FilterOp/in = wm, f2 FilterOp/out = wo, f2 FilterOp/pred = p2 |- + exists f3 : FilterOp, pa : AndPred. + pa AndPred/left = p1, pa AndPred/right = p2, + f3 FilterOp/in = wi, f3 FilterOp/pred = pa AndPred/pred, + [a: wo, b: f3 FilterOp/out] equiv; + + // ------------------------------------------------------------ + // Distinct Laws + // ------------------------------------------------------------ + + // Distinct is idempotent: Distinct(Distinct(x)) ≡ Distinct(x) + ax/distinct_idem : forall d1 : DistinctOp, d2 : DistinctOp, + wi : Wire, wm : Wire, wo : Wire. + d1 DistinctOp/in = wi, d1 DistinctOp/out = wm, + d2 DistinctOp/in = wm, d2 DistinctOp/out = wo |- + [a: wo, b: wm] equiv; + + // ------------------------------------------------------------ + // Union Laws + // ------------------------------------------------------------ + + // Union is commutative: Union(x, y) ≡ Union(y, x) + ax/union_comm : forall u1 : UnionOp, wl : Wire, wr : Wire, wo : Wire. + u1 UnionOp/left_in = wl, u1 UnionOp/right_in = wr, u1 UnionOp/out = wo |- + exists u2 : UnionOp. + u2 UnionOp/left_in = wr, u2 UnionOp/right_in = wl, + [a: wo, b: u2 UnionOp/out] equiv; + + // Union is associative: Union(x, Union(y, z)) ≡ Union(Union(x, y), z) + ax/union_assoc : forall u1 : UnionOp, u2 : UnionOp, + wa : Wire, wb : Wire, wc : Wire, wyz : Wire, wo : Wire. + u2 UnionOp/left_in = wb, u2 UnionOp/right_in = wc, u2 UnionOp/out = wyz, + u1 UnionOp/left_in = wa, u1 UnionOp/right_in = wyz, u1 UnionOp/out = wo |- + exists u3 : UnionOp, u4 : UnionOp, wab : Wire. + u3 UnionOp/left_in = wa, u3 UnionOp/right_in = wb, u3 UnionOp/out = wab, + u4 UnionOp/left_in = wab, u4 UnionOp/right_in = wc, + [a: wo, b: u4 UnionOp/out] equiv; + + // Union with Empty: Union(x, Empty) ≡ x + ax/union_empty_right : forall u : UnionOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, u UnionOp/left_in = wi, u UnionOp/right_in = we, u UnionOp/out = wo |- + [a: wo, b: wi] equiv; + + ax/union_empty_left : forall u : UnionOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, u UnionOp/left_in = we, u UnionOp/right_in = wi, u UnionOp/out = wo |- + [a: wo, b: wi] equiv; + + // ------------------------------------------------------------ + // Negate Laws + // ------------------------------------------------------------ + + // Double negation: Negate(Negate(x)) ≡ x + ax/negate_involution : forall n1 : NegateOp, n2 : NegateOp, + wi : Wire, wm : Wire, wo : Wire. + n1 NegateOp/in = wi, n1 NegateOp/out = wm, + n2 NegateOp/in = wm, n2 NegateOp/out = wo |- + [a: wo, b: wi] equiv; + + // ------------------------------------------------------------ + // Join Laws + // ------------------------------------------------------------ + + // Cross join is commutative (up to column reordering) + // Note: The output schemas differ, so this needs a projection to swap columns. + // Omitted for now as it requires more complex schema manipulation. + + // Join with Empty: Join(x, Empty) ≡ Empty + ax/join_empty_right : forall j : JoinOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, j JoinOp/left_in = wi, j JoinOp/right_in = we, j JoinOp/out = wo |- + exists e2 : EmptyOp. [a: wo, b: e2 EmptyOp/out] equiv; + + ax/join_empty_left : forall j : JoinOp, e : EmptyOp, wi : Wire, we : Wire, wo : Wire. + e EmptyOp/out = we, j JoinOp/left_in = we, j JoinOp/right_in = wi, j JoinOp/out = wo |- + exists e2 : EmptyOp. [a: wo, b: e2 EmptyOp/out] equiv; + + // ------------------------------------------------------------ + // DBSP Laws + // ------------------------------------------------------------ + + // Differentiation is inverse of integration (for streams of changes) + // Diff(Integrate(x)) ≡ x (for Δ-streams) + // Integrate(Diff(x)) ≡ x - x₀ (up to initial value) + // These are more subtle and depend on stream semantics; omitted for now. + + // Delay respects Union: z⁻¹(x ∪ y) ≡ z⁻¹(x) ∪ z⁻¹(y) + ax/delay_union : forall u : UnionOp, d : DelayOp, + wl : Wire, wr : Wire, wu : Wire, wo : Wire. + u UnionOp/left_in = wl, u UnionOp/right_in = wr, u UnionOp/out = wu, + d DelayOp/in = wu, d DelayOp/out = wo |- + exists dl : DelayOp, dr : DelayOp, u2 : UnionOp. + dl DelayOp/in = wl, dr DelayOp/in = wr, + u2 UnionOp/left_in = dl DelayOp/out, u2 UnionOp/right_in = dr DelayOp/out, + [a: wo, b: u2 UnionOp/out] equiv; +}