263 lines
7.6 KiB
Rust
Raw Normal View History

//! Wire format shared by every byte-oriented backend in this crate.
//!
//! The encoding is hand-rolled (no `serde`, no `bincode`) so that the
//! generated bytes are stable and inspectable. It is **not** versioned: adding
//! a new [`Value`] variant invalidates previously-stored data. That is fine
//! for a playground; production code would prepend a format byte.
//!
//! ## Row Format
//!
//! `[count: u32 LE] [val × count]`
//!
//! ## Value Format
//!
//! `[tag: u8] [payload]`
//!
//! | Tag | Variant | Payload |
//! |--------|---------------|--------------------------------------|
//! | `0x00` | `Value::Int` | `i64 LE` (8 bytes) |
//! | `0x01` | `Value::Str` | `[len: u32 LE] [bytes]` |
//!
//! ## Row Key Format
//!
//! Synthetic row IDs are `u64` encoded big-endian so lexicographic key order
//! matches insertion order. Backends with named sub-stores per relation can
//! use this directly as the key.
//!
//! ## Metadata Format
//!
//! Per-relation metadata is `[arity: u32 LE] [next_id: u64 LE]` = 12 bytes.
use query_ops::value::Value;
/// Errors raised by [`decode_row`] and [`decode_meta`].
#[derive(Debug)]
pub enum CodecError {
/// The byte slice ended before the expected number of fields was read.
UnexpectedEof,
/// A value tag byte was unrecognized.
UnknownTag(u8),
/// A length field declared more bytes than the slice contains.
LengthOverrun { declared: usize, available: usize },
/// A UTF-8 string payload could not be decoded.
InvalidUtf8,
}
impl std::fmt::Display for CodecError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedEof => write!(f, "unexpected end of bytes"),
Self::UnknownTag(t) => write!(f, "unknown value tag: 0x{t:02x}"),
Self::LengthOverrun {
declared,
available,
} => write!(
f,
"declared length {declared} exceeds available {available} bytes"
),
Self::InvalidUtf8 => write!(f, "invalid UTF-8 in string payload"),
}
}
}
impl std::error::Error for CodecError {}
/// Encode a row of [`Value`]s to bytes.
#[must_use]
pub fn encode_row(row: &[Value]) -> Vec<u8> {
let mut out = Vec::with_capacity(4 + row.len() * 9);
out.extend_from_slice(&u32::try_from(row.len()).unwrap_or(u32::MAX).to_le_bytes());
for value in row {
match value {
Value::Int(i) => {
out.push(0x00);
out.extend_from_slice(&i.to_le_bytes());
}
Value::Str(s) => {
out.push(0x01);
let bytes = s.as_bytes();
out.extend_from_slice(
&u32::try_from(bytes.len()).unwrap_or(u32::MAX).to_le_bytes(),
);
out.extend_from_slice(bytes);
}
}
}
out
}
/// Decode a row of [`Value`]s from bytes.
///
/// # Errors
/// Returns [`CodecError`] if the byte slice is malformed.
pub fn decode_row(mut bytes: &[u8]) -> Result<Vec<Value>, CodecError> {
let count = read_u32(&mut bytes)? as usize;
let mut row = Vec::with_capacity(count);
for _ in 0..count {
row.push(read_value(&mut bytes)?);
}
Ok(row)
}
fn read_value(bytes: &mut &[u8]) -> Result<Value, CodecError> {
let tag = read_u8(bytes)?;
match tag {
0x00 => {
let i = read_i64(bytes)?;
Ok(Value::Int(i))
}
0x01 => {
let len = read_u32(bytes)? as usize;
if bytes.len() < len {
return Err(CodecError::LengthOverrun {
declared: len,
available: bytes.len(),
});
}
let (head, tail) = bytes.split_at(len);
*bytes = tail;
let s = std::str::from_utf8(head)
.map_err(|_| CodecError::InvalidUtf8)?
.to_string();
Ok(Value::Str(s))
}
other => Err(CodecError::UnknownTag(other)),
}
}
fn read_u8(bytes: &mut &[u8]) -> Result<u8, CodecError> {
let (head, tail) = bytes.split_first().ok_or(CodecError::UnexpectedEof)?;
*bytes = tail;
Ok(*head)
}
fn read_u32(bytes: &mut &[u8]) -> Result<u32, CodecError> {
if bytes.len() < 4 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(4);
*bytes = tail;
let mut buf = [0u8; 4];
buf.copy_from_slice(head);
Ok(u32::from_le_bytes(buf))
}
fn read_u64(bytes: &mut &[u8]) -> Result<u64, CodecError> {
if bytes.len() < 8 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(8);
*bytes = tail;
let mut buf = [0u8; 8];
buf.copy_from_slice(head);
Ok(u64::from_le_bytes(buf))
}
fn read_i64(bytes: &mut &[u8]) -> Result<i64, CodecError> {
if bytes.len() < 8 {
return Err(CodecError::UnexpectedEof);
}
let (head, tail) = bytes.split_at(8);
*bytes = tail;
let mut buf = [0u8; 8];
buf.copy_from_slice(head);
Ok(i64::from_le_bytes(buf))
}
/// Encode a row key from a synthetic u64 ID.
///
/// Big-endian so lexicographic key order matches insertion order.
#[must_use]
pub fn row_key(id: u64) -> [u8; 8] {
id.to_be_bytes()
}
/// Encode per-relation metadata: arity and next row ID.
#[must_use]
pub fn encode_meta(arity: u32, next_id: u64) -> [u8; 12] {
let mut out = [0u8; 12];
out[0..4].copy_from_slice(&arity.to_le_bytes());
out[4..12].copy_from_slice(&next_id.to_le_bytes());
out
}
/// Decode per-relation metadata.
///
/// # Errors
/// Returns [`CodecError::UnexpectedEof`] if the slice is shorter than 12 bytes.
pub fn decode_meta(mut bytes: &[u8]) -> Result<(u32, u64), CodecError> {
let arity = read_u32(&mut bytes)?;
let next_id = read_u64(&mut bytes)?;
Ok((arity, next_id))
}
#[cfg(test)]
mod tests {
use super::*;
fn i(x: i64) -> Value {
Value::Int(x)
}
fn s(x: &str) -> Value {
Value::Str(x.to_string())
}
#[test]
fn encode_decode_int_only_row() -> Result<(), CodecError> {
let row = vec![i(1), i(-2), i(i64::MAX)];
let bytes = encode_row(&row);
let decoded = decode_row(&bytes)?;
assert_eq!(decoded, row);
Ok(())
}
#[test]
fn encode_decode_mixed_row() -> Result<(), CodecError> {
let row = vec![s("Alice"), i(42), s("a longer string with spaces")];
let bytes = encode_row(&row);
let decoded = decode_row(&bytes)?;
assert_eq!(decoded, row);
Ok(())
}
#[test]
fn encode_decode_empty_row() -> Result<(), CodecError> {
let bytes = encode_row(&[]);
let decoded = decode_row(&bytes)?;
assert!(decoded.is_empty());
Ok(())
}
#[test]
fn decode_unknown_tag_fails() {
let bytes = vec![1, 0, 0, 0, 0xFF];
assert!(matches!(
decode_row(&bytes),
Err(CodecError::UnknownTag(0xFF))
));
}
#[test]
fn decode_truncated_fails() {
let bytes = vec![1, 0, 0, 0, 0x00, 0x01];
assert!(matches!(decode_row(&bytes), Err(CodecError::UnexpectedEof)));
}
#[test]
fn row_key_preserves_order() {
assert!(row_key(1) < row_key(2));
assert!(row_key(255) < row_key(256));
assert!(row_key(u64::MAX - 1) < row_key(u64::MAX));
}
#[test]
fn meta_roundtrip() -> Result<(), CodecError> {
let encoded = encode_meta(3, 12345);
let (arity, next_id) = decode_meta(&encoded)?;
assert_eq!(arity, 3);
assert_eq!(next_id, 12345);
Ok(())
}
}