2026-06-04 11:51:25 +02:00
|
|
|
|
//! Wire format shared by every byte-oriented backend in this crate.
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! The encoding is hand-rolled (no `serde`, no `bincode`) so that the
|
|
|
|
|
|
//! generated bytes are stable and inspectable. It is **not** versioned: adding
|
|
|
|
|
|
//! a new [`Value`] variant invalidates previously-stored data. That is fine
|
|
|
|
|
|
//! for a playground; production code would prepend a format byte.
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! ## Row Format
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! `[count: u32 LE] [val × count]`
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! ## Value Format
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! `[tag: u8] [payload]`
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! | Tag | Variant | Payload |
|
|
|
|
|
|
//! |--------|---------------|--------------------------------------|
|
|
|
|
|
|
//! | `0x00` | `Value::Int` | `i64 LE` (8 bytes) |
|
|
|
|
|
|
//! | `0x01` | `Value::Str` | `[len: u32 LE] [bytes]` |
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! ## Row Key Format
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! Synthetic row IDs are `u64` encoded big-endian so lexicographic key order
|
|
|
|
|
|
//! matches insertion order. Backends with named sub-stores per relation can
|
|
|
|
|
|
//! use this directly as the key.
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! ## Metadata Format
|
|
|
|
|
|
//!
|
|
|
|
|
|
//! Per-relation metadata is `[arity: u32 LE] [next_id: u64 LE]` = 12 bytes.
|
|
|
|
|
|
|
2026-06-04 12:16:30 +02:00
|
|
|
|
use crate::value::Value;
|
2026-06-04 11:51:25 +02:00
|
|
|
|
|
|
|
|
|
|
/// Errors raised by [`decode_row`] and [`decode_meta`].
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
|
pub enum CodecError {
|
|
|
|
|
|
/// The byte slice ended before the expected number of fields was read.
|
|
|
|
|
|
UnexpectedEof,
|
|
|
|
|
|
/// A value tag byte was unrecognized.
|
|
|
|
|
|
UnknownTag(u8),
|
|
|
|
|
|
/// A length field declared more bytes than the slice contains.
|
|
|
|
|
|
LengthOverrun { declared: usize, available: usize },
|
|
|
|
|
|
/// A UTF-8 string payload could not be decoded.
|
|
|
|
|
|
InvalidUtf8,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl std::fmt::Display for CodecError {
|
|
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
|
|
match self {
|
|
|
|
|
|
Self::UnexpectedEof => write!(f, "unexpected end of bytes"),
|
|
|
|
|
|
Self::UnknownTag(t) => write!(f, "unknown value tag: 0x{t:02x}"),
|
|
|
|
|
|
Self::LengthOverrun {
|
|
|
|
|
|
declared,
|
|
|
|
|
|
available,
|
|
|
|
|
|
} => write!(
|
|
|
|
|
|
f,
|
|
|
|
|
|
"declared length {declared} exceeds available {available} bytes"
|
|
|
|
|
|
),
|
|
|
|
|
|
Self::InvalidUtf8 => write!(f, "invalid UTF-8 in string payload"),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl std::error::Error for CodecError {}
|
|
|
|
|
|
|
|
|
|
|
|
/// Encode a row of [`Value`]s to bytes.
|
|
|
|
|
|
#[must_use]
|
|
|
|
|
|
pub fn encode_row(row: &[Value]) -> Vec<u8> {
|
|
|
|
|
|
let mut out = Vec::with_capacity(4 + row.len() * 9);
|
|
|
|
|
|
out.extend_from_slice(&u32::try_from(row.len()).unwrap_or(u32::MAX).to_le_bytes());
|
|
|
|
|
|
for value in row {
|
|
|
|
|
|
match value {
|
|
|
|
|
|
Value::Int(i) => {
|
|
|
|
|
|
out.push(0x00);
|
|
|
|
|
|
out.extend_from_slice(&i.to_le_bytes());
|
|
|
|
|
|
}
|
|
|
|
|
|
Value::Str(s) => {
|
|
|
|
|
|
out.push(0x01);
|
|
|
|
|
|
let bytes = s.as_bytes();
|
|
|
|
|
|
out.extend_from_slice(
|
|
|
|
|
|
&u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(),
|
|
|
|
|
|
);
|
|
|
|
|
|
out.extend_from_slice(bytes);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
out
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Decode a row of [`Value`]s from bytes.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// # Errors
|
|
|
|
|
|
/// Returns [`CodecError`] if the byte slice is malformed.
|
|
|
|
|
|
pub fn decode_row(mut bytes: &[u8]) -> Result<Vec<Value>, CodecError> {
|
|
|
|
|
|
let count = read_u32(&mut bytes)? as usize;
|
|
|
|
|
|
let mut row = Vec::with_capacity(count);
|
|
|
|
|
|
for _ in 0..count {
|
|
|
|
|
|
row.push(read_value(&mut bytes)?);
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(row)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn read_value(bytes: &mut &[u8]) -> Result<Value, CodecError> {
|
|
|
|
|
|
let tag = read_u8(bytes)?;
|
|
|
|
|
|
match tag {
|
|
|
|
|
|
0x00 => {
|
|
|
|
|
|
let i = read_i64(bytes)?;
|
|
|
|
|
|
Ok(Value::Int(i))
|
|
|
|
|
|
}
|
|
|
|
|
|
0x01 => {
|
|
|
|
|
|
let len = read_u32(bytes)? as usize;
|
|
|
|
|
|
if bytes.len() < len {
|
|
|
|
|
|
return Err(CodecError::LengthOverrun {
|
|
|
|
|
|
declared: len,
|
|
|
|
|
|
available: bytes.len(),
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
let (head, tail) = bytes.split_at(len);
|
|
|
|
|
|
*bytes = tail;
|
|
|
|
|
|
let s = std::str::from_utf8(head)
|
|
|
|
|
|
.map_err(|_| CodecError::InvalidUtf8)?
|
|
|
|
|
|
.to_string();
|
|
|
|
|
|
Ok(Value::Str(s))
|
|
|
|
|
|
}
|
|
|
|
|
|
other => Err(CodecError::UnknownTag(other)),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn read_u8(bytes: &mut &[u8]) -> Result<u8, CodecError> {
|
|
|
|
|
|
let (head, tail) = bytes.split_first().ok_or(CodecError::UnexpectedEof)?;
|
|
|
|
|
|
*bytes = tail;
|
|
|
|
|
|
Ok(*head)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn read_u32(bytes: &mut &[u8]) -> Result<u32, CodecError> {
|
|
|
|
|
|
if bytes.len() < 4 {
|
|
|
|
|
|
return Err(CodecError::UnexpectedEof);
|
|
|
|
|
|
}
|
|
|
|
|
|
let (head, tail) = bytes.split_at(4);
|
|
|
|
|
|
*bytes = tail;
|
|
|
|
|
|
let mut buf = [0u8; 4];
|
|
|
|
|
|
buf.copy_from_slice(head);
|
|
|
|
|
|
Ok(u32::from_le_bytes(buf))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn read_u64(bytes: &mut &[u8]) -> Result<u64, CodecError> {
|
|
|
|
|
|
if bytes.len() < 8 {
|
|
|
|
|
|
return Err(CodecError::UnexpectedEof);
|
|
|
|
|
|
}
|
|
|
|
|
|
let (head, tail) = bytes.split_at(8);
|
|
|
|
|
|
*bytes = tail;
|
|
|
|
|
|
let mut buf = [0u8; 8];
|
|
|
|
|
|
buf.copy_from_slice(head);
|
|
|
|
|
|
Ok(u64::from_le_bytes(buf))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn read_i64(bytes: &mut &[u8]) -> Result<i64, CodecError> {
|
|
|
|
|
|
if bytes.len() < 8 {
|
|
|
|
|
|
return Err(CodecError::UnexpectedEof);
|
|
|
|
|
|
}
|
|
|
|
|
|
let (head, tail) = bytes.split_at(8);
|
|
|
|
|
|
*bytes = tail;
|
|
|
|
|
|
let mut buf = [0u8; 8];
|
|
|
|
|
|
buf.copy_from_slice(head);
|
|
|
|
|
|
Ok(i64::from_le_bytes(buf))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Encode a row key from a synthetic u64 ID.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Big-endian so lexicographic key order matches insertion order.
|
|
|
|
|
|
#[must_use]
|
|
|
|
|
|
pub fn row_key(id: u64) -> [u8; 8] {
|
|
|
|
|
|
id.to_be_bytes()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Encode per-relation metadata: arity and next row ID.
|
|
|
|
|
|
#[must_use]
|
|
|
|
|
|
pub fn encode_meta(arity: u32, next_id: u64) -> [u8; 12] {
|
|
|
|
|
|
let mut out = [0u8; 12];
|
|
|
|
|
|
out[0..4].copy_from_slice(&arity.to_le_bytes());
|
|
|
|
|
|
out[4..12].copy_from_slice(&next_id.to_le_bytes());
|
|
|
|
|
|
out
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Decode per-relation metadata.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// # Errors
|
|
|
|
|
|
/// Returns [`CodecError::UnexpectedEof`] if the slice is shorter than 12 bytes.
|
|
|
|
|
|
pub fn decode_meta(mut bytes: &[u8]) -> Result<(u32, u64), CodecError> {
|
|
|
|
|
|
let arity = read_u32(&mut bytes)?;
|
|
|
|
|
|
let next_id = read_u64(&mut bytes)?;
|
|
|
|
|
|
Ok((arity, next_id))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
mod tests {
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
|
|
fn i(x: i64) -> Value {
|
|
|
|
|
|
Value::Int(x)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn s(x: &str) -> Value {
|
|
|
|
|
|
Value::Str(x.to_string())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn encode_decode_int_only_row() -> Result<(), CodecError> {
|
|
|
|
|
|
let row = vec![i(1), i(-2), i(i64::MAX)];
|
|
|
|
|
|
let bytes = encode_row(&row);
|
|
|
|
|
|
let decoded = decode_row(&bytes)?;
|
|
|
|
|
|
assert_eq!(decoded, row);
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn encode_decode_mixed_row() -> Result<(), CodecError> {
|
|
|
|
|
|
let row = vec![s("Alice"), i(42), s("a longer string with spaces")];
|
|
|
|
|
|
let bytes = encode_row(&row);
|
|
|
|
|
|
let decoded = decode_row(&bytes)?;
|
|
|
|
|
|
assert_eq!(decoded, row);
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn encode_decode_empty_row() -> Result<(), CodecError> {
|
|
|
|
|
|
let bytes = encode_row(&[]);
|
|
|
|
|
|
let decoded = decode_row(&bytes)?;
|
|
|
|
|
|
assert!(decoded.is_empty());
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn decode_unknown_tag_fails() {
|
|
|
|
|
|
let bytes = vec![1, 0, 0, 0, 0xFF];
|
|
|
|
|
|
assert!(matches!(
|
|
|
|
|
|
decode_row(&bytes),
|
|
|
|
|
|
Err(CodecError::UnknownTag(0xFF))
|
|
|
|
|
|
));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn decode_truncated_fails() {
|
|
|
|
|
|
let bytes = vec![1, 0, 0, 0, 0x00, 0x01];
|
|
|
|
|
|
assert!(matches!(decode_row(&bytes), Err(CodecError::UnexpectedEof)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn row_key_preserves_order() {
|
|
|
|
|
|
assert!(row_key(1) < row_key(2));
|
|
|
|
|
|
assert!(row_key(255) < row_key(256));
|
|
|
|
|
|
assert!(row_key(u64::MAX - 1) < row_key(u64::MAX));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn meta_roundtrip() -> Result<(), CodecError> {
|
|
|
|
|
|
let encoded = encode_meta(3, 12345);
|
|
|
|
|
|
let (arity, next_id) = decode_meta(&encoded)?;
|
|
|
|
|
|
assert_eq!(arity, 3);
|
|
|
|
|
|
assert_eq!(next_id, 12345);
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|