//! Wire format shared by every byte-oriented backend in this crate. //! //! The encoding is hand-rolled (no `serde`, no `bincode`) so that the //! generated bytes are stable and inspectable. It is **not** versioned: adding //! a new [`Value`] variant invalidates previously-stored data. That is fine //! for a playground; production code would prepend a format byte. //! //! ## Row Format //! //! `[count: u32 LE] [val × count]` //! //! ## Value Format //! //! `[tag: u8] [payload]` //! //! | Tag | Variant | Payload | //! |--------|---------------|--------------------------------------| //! | `0x00` | `Value::Int` | `i64 LE` (8 bytes) | //! | `0x01` | `Value::Str` | `[len: u32 LE] [bytes]` | //! //! ## Row Key Format //! //! Synthetic row IDs are `u64` encoded big-endian so lexicographic key order //! matches insertion order. Backends with named sub-stores per relation can //! use this directly as the key. //! //! ## Metadata Format //! //! Per-relation metadata is `[arity: u32 LE] [next_id: u64 LE]` = 12 bytes. use crate::value::Value; /// Errors raised by [`decode_row`] and [`decode_meta`]. #[derive(Debug)] pub enum CodecError { /// The byte slice ended before the expected number of fields was read. UnexpectedEof, /// A value tag byte was unrecognized. UnknownTag(u8), /// A length field declared more bytes than the slice contains. LengthOverrun { declared: usize, available: usize }, /// A UTF-8 string payload could not be decoded. InvalidUtf8, } impl std::fmt::Display for CodecError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::UnexpectedEof => write!(f, "unexpected end of bytes"), Self::UnknownTag(t) => write!(f, "unknown value tag: 0x{t:02x}"), Self::LengthOverrun { declared, available, } => write!( f, "declared length {declared} exceeds available {available} bytes" ), Self::InvalidUtf8 => write!(f, "invalid UTF-8 in string payload"), } } } impl std::error::Error for CodecError {} /// Encode a row of [`Value`]s to bytes. #[must_use] pub fn encode_row(row: &[Value]) -> Vec { let mut out = Vec::with_capacity(4 + row.len() * 9); out.extend_from_slice(&u32::try_from(row.len()).unwrap_or(u32::MAX).to_le_bytes()); for value in row { match value { Value::Int(i) => { out.push(0x00); out.extend_from_slice(&i.to_le_bytes()); } Value::Str(s) => { out.push(0x01); let bytes = s.as_bytes(); out.extend_from_slice( &u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(), ); out.extend_from_slice(bytes); } } } out } /// Decode a row of [`Value`]s from bytes. /// /// # Errors /// Returns [`CodecError`] if the byte slice is malformed. pub fn decode_row(mut bytes: &[u8]) -> Result, CodecError> { let count = read_u32(&mut bytes)? as usize; let mut row = Vec::with_capacity(count); for _ in 0..count { row.push(read_value(&mut bytes)?); } Ok(row) } fn read_value(bytes: &mut &[u8]) -> Result { let tag = read_u8(bytes)?; match tag { 0x00 => { let i = read_i64(bytes)?; Ok(Value::Int(i)) } 0x01 => { let len = read_u32(bytes)? as usize; if bytes.len() < len { return Err(CodecError::LengthOverrun { declared: len, available: bytes.len(), }); } let (head, tail) = bytes.split_at(len); *bytes = tail; let s = std::str::from_utf8(head) .map_err(|_| CodecError::InvalidUtf8)? .to_string(); Ok(Value::Str(s)) } other => Err(CodecError::UnknownTag(other)), } } fn read_u8(bytes: &mut &[u8]) -> Result { let (head, tail) = bytes.split_first().ok_or(CodecError::UnexpectedEof)?; *bytes = tail; Ok(*head) } fn read_u32(bytes: &mut &[u8]) -> Result { if bytes.len() < 4 { return Err(CodecError::UnexpectedEof); } let (head, tail) = bytes.split_at(4); *bytes = tail; let mut buf = [0u8; 4]; buf.copy_from_slice(head); Ok(u32::from_le_bytes(buf)) } fn read_u64(bytes: &mut &[u8]) -> Result { if bytes.len() < 8 { return Err(CodecError::UnexpectedEof); } let (head, tail) = bytes.split_at(8); *bytes = tail; let mut buf = [0u8; 8]; buf.copy_from_slice(head); Ok(u64::from_le_bytes(buf)) } fn read_i64(bytes: &mut &[u8]) -> Result { if bytes.len() < 8 { return Err(CodecError::UnexpectedEof); } let (head, tail) = bytes.split_at(8); *bytes = tail; let mut buf = [0u8; 8]; buf.copy_from_slice(head); Ok(i64::from_le_bytes(buf)) } /// Encode a row key from a synthetic u64 ID. /// /// Big-endian so lexicographic key order matches insertion order. #[must_use] pub fn row_key(id: u64) -> [u8; 8] { id.to_be_bytes() } /// Encode per-relation metadata: arity and next row ID. #[must_use] pub fn encode_meta(arity: u32, next_id: u64) -> [u8; 12] { let mut out = [0u8; 12]; out[0..4].copy_from_slice(&arity.to_le_bytes()); out[4..12].copy_from_slice(&next_id.to_le_bytes()); out } /// Decode per-relation metadata. /// /// # Errors /// Returns [`CodecError::UnexpectedEof`] if the slice is shorter than 12 bytes. pub fn decode_meta(mut bytes: &[u8]) -> Result<(u32, u64), CodecError> { let arity = read_u32(&mut bytes)?; let next_id = read_u64(&mut bytes)?; Ok((arity, next_id)) } #[cfg(test)] mod tests { use super::*; fn i(x: i64) -> Value { Value::Int(x) } fn s(x: &str) -> Value { Value::Str(x.to_string()) } #[test] fn encode_decode_int_only_row() -> Result<(), CodecError> { let row = vec![i(1), i(-2), i(i64::MAX)]; let bytes = encode_row(&row); let decoded = decode_row(&bytes)?; assert_eq!(decoded, row); Ok(()) } #[test] fn encode_decode_mixed_row() -> Result<(), CodecError> { let row = vec![s("Alice"), i(42), s("a longer string with spaces")]; let bytes = encode_row(&row); let decoded = decode_row(&bytes)?; assert_eq!(decoded, row); Ok(()) } #[test] fn encode_decode_empty_row() -> Result<(), CodecError> { let bytes = encode_row(&[]); let decoded = decode_row(&bytes)?; assert!(decoded.is_empty()); Ok(()) } #[test] fn decode_unknown_tag_fails() { let bytes = vec![1, 0, 0, 0, 0xFF]; assert!(matches!( decode_row(&bytes), Err(CodecError::UnknownTag(0xFF)) )); } #[test] fn decode_truncated_fails() { let bytes = vec![1, 0, 0, 0, 0x00, 0x01]; assert!(matches!(decode_row(&bytes), Err(CodecError::UnexpectedEof))); } #[test] fn row_key_preserves_order() { assert!(row_key(1) < row_key(2)); assert!(row_key(255) < row_key(256)); assert!(row_key(u64::MAX - 1) < row_key(u64::MAX)); } #[test] fn meta_roundtrip() -> Result<(), CodecError> { let encoded = encode_meta(3, 12345); let (arity, next_id) = decode_meta(&encoded)?; assert_eq!(arity, 3); assert_eq!(next_id, 12345); Ok(()) } }