2026-06-04 15:35:38 +02:00

285 lines
8.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Wire format shared by every byte-oriented backend in this crate.
//!
//! The encoding is hand-rolled (no `serde`, no `bincode`) so that the
//! generated bytes are stable and inspectable. It is **not** versioned: adding
//! a new [`Value`] variant invalidates previously-stored data. That is fine
//! for a playground; production code would prepend a format byte.
//!
//! ## Row Format
//!
//! `[count: u32 LE] [val × count]`
//!
//! ## Value Format
//!
//! `[tag: u8] [payload]`
//!
//! | Tag | Variant | Payload |
//! |--------|---------------|--------------------------------------|
//! | `0x00` | `Value::Int` | `i64 LE` (8 bytes) |
//! | `0x01` | `Value::Str` | `[len: u32 LE] [bytes]` |
//! | `0x02` | `Value::Id` | `[len: u32 LE] [bytes]` |
//!
//! ## Row Key Format
//!
//! Synthetic row IDs are `u64` encoded big-endian so lexicographic key order
//! matches insertion order. Backends with named sub-stores per relation can
//! use this directly as the key.
//!
//! ## Metadata Format
//!
//! Per-relation metadata is `[arity: u32 LE] [next_id: u64 LE]` = 12 bytes.
use crate::id::RowId;
use crate::value::Value;
/// Errors raised by [`decode_row`] and [`decode_meta`].
#[derive(Debug)]
pub enum CodecError {
/// The byte slice ended before the expected number of fields was read.
UnexpectedEof,
/// A value tag byte was unrecognized.
UnknownTag(u8),
/// A length field declared more bytes than the slice contains.
LengthOverrun { declared: usize, available: usize },
/// A UTF-8 string payload could not be decoded.
InvalidUtf8,
}
impl std::fmt::Display for CodecError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedEof => write!(f, "unexpected end of bytes"),
Self::UnknownTag(t) => write!(f, "unknown value tag: 0x{t:02x}"),
Self::LengthOverrun {
declared,
available,
} => write!(
f,
"declared length {declared} exceeds available {available} bytes"
),
Self::InvalidUtf8 => write!(f, "invalid UTF-8 in string payload"),
}
}
}
impl std::error::Error for CodecError {}
/// Encode a row of [`Value`]s to bytes.
#[must_use]
pub fn encode_row(row: &[Value]) -> Vec<u8> {
let mut out = Vec::with_capacity(4 + row.len() * 9);
out.extend_from_slice(&u32::try_from(row.len()).unwrap_or(u32::MAX).to_le_bytes());
for value in row {
match value {
Value::Int(i) => {
out.push(0x00);
out.extend_from_slice(&i.to_le_bytes());
}
Value::Str(s) => {
out.push(0x01);
let bytes = s.as_bytes();
out.extend_from_slice(
&u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(),
);
out.extend_from_slice(bytes);
}
Value::Id(id) => {
out.push(0x02);
let bytes = id.as_bytes();
out.extend_from_slice(
&u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(),
);
out.extend_from_slice(bytes);
}
}
}
out
}
/// Decode a row of [`Value`]s from bytes.
///
/// # Errors
/// Returns [`CodecError`] if the byte slice is malformed.
pub fn decode_row(mut bytes: &[u8]) -> Result<Vec<Value>, CodecError> {
let count = read_u32(&mut bytes)? as usize;
let mut row = Vec::with_capacity(count);
for _ in 0..count {
row.push(read_value(&mut bytes)?);
}
Ok(row)
}
fn read_value(bytes: &mut &[u8]) -> Result<Value, CodecError> {
let tag = read_u8(bytes)?;
match tag {
0x00 => {
let i = read_i64(bytes)?;
Ok(Value::Int(i))
}
0x01 => {
let len = read_u32(bytes)? as usize;
if bytes.len() < len {
return Err(CodecError::LengthOverrun {
declared: len,
available: bytes.len(),
});
}
let (head, tail) = bytes.split_at(len);
*bytes = tail;
let s = std::str::from_utf8(head)
.map_err(|_| CodecError::InvalidUtf8)?
.to_string();
Ok(Value::Str(s))
}
0x02 => {
let len = read_u32(bytes)? as usize;
if bytes.len() < len {
return Err(CodecError::LengthOverrun {
declared: len,
available: bytes.len(),
});
}
let (head, tail) = bytes.split_at(len);
*bytes = tail;
Ok(Value::Id(RowId::new(head)))
}
other => Err(CodecError::UnknownTag(other)),
}
}
fn read_u8(bytes: &mut &[u8]) -> Result<u8, CodecError> {
let (head, tail) = bytes.split_first().ok_or(CodecError::UnexpectedEof)?;
*bytes = tail;
Ok(*head)
}
fn read_u32(bytes: &mut &[u8]) -> Result<u32, CodecError> {
if bytes.len() < 4 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(4);
*bytes = tail;
let mut buf = [0u8; 4];
buf.copy_from_slice(head);
Ok(u32::from_le_bytes(buf))
}
fn read_u64(bytes: &mut &[u8]) -> Result<u64, CodecError> {
if bytes.len() < 8 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(8);
*bytes = tail;
let mut buf = [0u8; 8];
buf.copy_from_slice(head);
Ok(u64::from_le_bytes(buf))
}
fn read_i64(bytes: &mut &[u8]) -> Result<i64, CodecError> {
if bytes.len() < 8 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(8);
*bytes = tail;
let mut buf = [0u8; 8];
buf.copy_from_slice(head);
Ok(i64::from_le_bytes(buf))
}
/// Encode a row key from a synthetic u64 ID.
///
/// Big-endian so lexicographic key order matches insertion order.
#[must_use]
pub fn row_key(id: u64) -> [u8; 8] {
id.to_be_bytes()
}
/// Encode per-relation metadata: arity and next row ID.
#[must_use]
pub fn encode_meta(arity: u32, next_id: u64) -> [u8; 12] {
let mut out = [0u8; 12];
out[0..4].copy_from_slice(&arity.to_le_bytes());
out[4..12].copy_from_slice(&next_id.to_le_bytes());
out
}
/// Decode per-relation metadata.
///
/// # Errors
/// Returns [`CodecError::UnexpectedEof`] if the slice is shorter than 12 bytes.
pub fn decode_meta(mut bytes: &[u8]) -> Result<(u32, u64), CodecError> {
let arity = read_u32(&mut bytes)?;
let next_id = read_u64(&mut bytes)?;
Ok((arity, next_id))
}
#[cfg(test)]
mod tests {
use super::*;
fn i(x: i64) -> Value {
Value::Int(x)
}
fn s(x: &str) -> Value {
Value::Str(x.to_string())
}
#[test]
fn encode_decode_int_only_row() -> Result<(), CodecError> {
let row = vec![i(1), i(-2), i(i64::MAX)];
let bytes = encode_row(&row);
let decoded = decode_row(&bytes)?;
assert_eq!(decoded, row);
Ok(())
}
#[test]
fn encode_decode_mixed_row() -> Result<(), CodecError> {
let row = vec![s("Alice"), i(42), s("a longer string with spaces")];
let bytes = encode_row(&row);
let decoded = decode_row(&bytes)?;
assert_eq!(decoded, row);
Ok(())
}
#[test]
fn encode_decode_empty_row() -> Result<(), CodecError> {
let bytes = encode_row(&[]);
let decoded = decode_row(&bytes)?;
assert!(decoded.is_empty());
Ok(())
}
#[test]
fn decode_unknown_tag_fails() {
let bytes = vec![1, 0, 0, 0, 0xFF];
assert!(matches!(
decode_row(&bytes),
Err(CodecError::UnknownTag(0xFF))
));
}
#[test]
fn decode_truncated_fails() {
let bytes = vec![1, 0, 0, 0, 0x00, 0x01];
assert!(matches!(decode_row(&bytes), Err(CodecError::UnexpectedEof)));
}
#[test]
fn row_key_preserves_order() {
assert!(row_key(1) < row_key(2));
assert!(row_key(255) < row_key(256));
assert!(row_key(u64::MAX - 1) < row_key(u64::MAX));
}
#[test]
fn meta_roundtrip() -> Result<(), CodecError> {
let encoded = encode_meta(3, 12345);
let (arity, next_id) = decode_meta(&encoded)?;
assert_eq!(arity, 3);
assert_eq!(next_id, 12345);
Ok(())
}
}