13
mini-lsm/Cargo.toml
Normal file
13
mini-lsm/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "mini-lsm"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
homepage = { workspace = true }
|
||||
keywords = { workspace = true }
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
bytes = "1"
|
||||
132
mini-lsm/src/block.rs
Normal file
132
mini-lsm/src/block.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
mod builder;
|
||||
mod iterator;
|
||||
|
||||
use bytes::{Buf, BufMut, Bytes};
|
||||
|
||||
pub use builder::BlockBuilder;
|
||||
pub use iterator::BlockIterator;
|
||||
|
||||
pub const SIZEOF_U16: usize = std::mem::size_of::<u16>();
|
||||
|
||||
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs.
|
||||
pub struct Block {
|
||||
pub(self) data: Vec<u8>,
|
||||
pub(self) offsets: Vec<u16>,
|
||||
}
|
||||
|
||||
impl Block {
|
||||
pub fn encode(&self) -> Bytes {
|
||||
let mut buf = self.data.clone();
|
||||
let offsets_len = self.offsets.len();
|
||||
for offset in &self.offsets {
|
||||
buf.put_u16(*offset);
|
||||
}
|
||||
buf.put_u16(offsets_len as u16);
|
||||
buf.into()
|
||||
}
|
||||
|
||||
pub fn decode(data: &[u8]) -> Self {
|
||||
let entry_offsets_len = (&data[data.len() - SIZEOF_U16..]).get_u16() as usize;
|
||||
let data_end = data.len() - SIZEOF_U16 - entry_offsets_len * SIZEOF_U16;
|
||||
let offsets_raw = &data[data_end..data.len() - SIZEOF_U16];
|
||||
let offsets = offsets_raw
|
||||
.chunks(SIZEOF_U16)
|
||||
.map(|mut x| x.get_u16())
|
||||
.collect();
|
||||
let data = data[0..data_end].to_vec();
|
||||
Self { data, offsets }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{builder::BlockBuilder, iterator::BlockIterator, *};
|
||||
|
||||
#[test]
|
||||
fn test_block_build_single_key() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"233", b"233333"));
|
||||
builder.build();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_build_full() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"11", b"11"));
|
||||
assert!(!builder.add(b"22", b"22"));
|
||||
builder.build();
|
||||
}
|
||||
|
||||
fn key_of(idx: usize) -> Vec<u8> {
|
||||
format!("key_{:03}", idx).into_bytes()
|
||||
}
|
||||
|
||||
fn value_of(idx: usize) -> Vec<u8> {
|
||||
format!("value_{:010}", idx).into_bytes()
|
||||
}
|
||||
|
||||
fn num_of_keys() -> usize {
|
||||
100
|
||||
}
|
||||
|
||||
fn generate_block() -> Block {
|
||||
let mut builder = BlockBuilder::new(10000);
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
assert!(builder.add(&key[..], &value[..]));
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_build_all() {
|
||||
generate_block();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_encode() {
|
||||
let block = generate_block();
|
||||
block.encode();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_decode() {
|
||||
let block = generate_block();
|
||||
let encoded = block.encode();
|
||||
let decoded_block = Block::decode(&encoded);
|
||||
assert_eq!(block.offsets, decoded_block.offsets);
|
||||
assert_eq!(block.data, decoded_block.data);
|
||||
}
|
||||
|
||||
fn as_bytes(x: &[u8]) -> Bytes {
|
||||
Bytes::copy_from_slice(x)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_iterator() {
|
||||
let block = Arc::new(generate_block());
|
||||
let mut iter = BlockIterator::create_and_seek_to_first(block);
|
||||
for i in 0..num_of_keys() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
value_of(i),
|
||||
"expected value: {:?}, actual value: {:?}",
|
||||
as_bytes(&value_of(i)),
|
||||
as_bytes(value)
|
||||
);
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
57
mini-lsm/src/block/builder.rs
Normal file
57
mini-lsm/src/block/builder.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use bytes::BufMut;
|
||||
|
||||
use super::{Block, SIZEOF_U16};
|
||||
|
||||
/// Builds a block
|
||||
pub struct BlockBuilder {
|
||||
offsets: Vec<u16>,
|
||||
data: Vec<u8>,
|
||||
target_size: usize,
|
||||
}
|
||||
|
||||
impl BlockBuilder {
|
||||
/// Creates a new block builder
|
||||
pub fn new(target_size: usize) -> Self {
|
||||
Self {
|
||||
offsets: Vec::new(),
|
||||
data: Vec::new(),
|
||||
target_size,
|
||||
}
|
||||
}
|
||||
|
||||
fn estimated_size(&self) -> usize {
|
||||
self.offsets.len() * SIZEOF_U16 + self.data.len() + SIZEOF_U16
|
||||
}
|
||||
|
||||
/// Adds a key-value pair to the block
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
assert!(!key.is_empty(), "key must not be empty");
|
||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.target_size
|
||||
&& !self.is_empty()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
self.offsets.push(self.data.len() as u16);
|
||||
self.data.put_u16(key.len() as u16);
|
||||
self.data.put(key);
|
||||
self.data.put_u16(value.len() as u16);
|
||||
self.data.put(value);
|
||||
true
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.offsets.is_empty()
|
||||
}
|
||||
|
||||
/// Builds a block
|
||||
pub fn build(self) -> Block {
|
||||
if self.is_empty() {
|
||||
panic!("block should not be empty");
|
||||
}
|
||||
Block {
|
||||
data: self.data,
|
||||
offsets: self.offsets,
|
||||
}
|
||||
}
|
||||
}
|
||||
110
mini-lsm/src/block/iterator.rs
Normal file
110
mini-lsm/src/block/iterator.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Buf;
|
||||
|
||||
use super::Block;
|
||||
|
||||
pub struct BlockIterator {
|
||||
block: Arc<Block>,
|
||||
key: Vec<u8>,
|
||||
value: Vec<u8>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl BlockIterator {
|
||||
fn new(block: Arc<Block>) -> Self {
|
||||
Self {
|
||||
block,
|
||||
key: Vec::new(),
|
||||
value: Vec::new(),
|
||||
idx: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
|
||||
let mut iter = Self::new(block);
|
||||
iter.seek_to_first();
|
||||
iter
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
|
||||
let mut iter = Self::new(block);
|
||||
iter.seek_to_key(key);
|
||||
iter
|
||||
}
|
||||
|
||||
pub fn key(&self) -> &[u8] {
|
||||
debug_assert!(!self.key.is_empty(), "invalid iterator");
|
||||
&self.key
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &[u8] {
|
||||
debug_assert!(!self.key.is_empty(), "invalid iterator");
|
||||
&self.value
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
!self.key.is_empty()
|
||||
}
|
||||
|
||||
pub fn seek_to_first(&mut self) {
|
||||
self.seek_to(0);
|
||||
}
|
||||
|
||||
pub fn seek_to_last(&mut self) {
|
||||
self.seek_to(self.block.offsets.len() - 1);
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.block.offsets.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.block.offsets.is_empty()
|
||||
}
|
||||
|
||||
pub fn seek_to(&mut self, idx: usize) {
|
||||
if idx >= self.block.offsets.len() {
|
||||
self.key.clear();
|
||||
self.value.clear();
|
||||
return;
|
||||
}
|
||||
let offset = self.block.offsets[idx] as usize;
|
||||
self.seek_to_offset(offset);
|
||||
self.idx = idx;
|
||||
}
|
||||
|
||||
pub fn next(&mut self) {
|
||||
self.idx += 1;
|
||||
self.seek_to(self.idx);
|
||||
}
|
||||
|
||||
fn seek_to_offset(&mut self, offset: usize) {
|
||||
let mut entry = &self.block.data[offset..];
|
||||
let key_len = entry.get_u16() as usize;
|
||||
let key = entry[..key_len].to_vec();
|
||||
entry.advance(key_len);
|
||||
self.key.clear();
|
||||
self.key.extend(key);
|
||||
let value_len = entry.get_u16() as usize;
|
||||
let value = entry[..value_len].to_vec();
|
||||
entry.advance(value_len);
|
||||
self.value.clear();
|
||||
self.value.extend(value);
|
||||
}
|
||||
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||
let mut low = 0;
|
||||
let mut high = self.block.offsets.len();
|
||||
while low < high {
|
||||
let mid = low + (high - low) / 2;
|
||||
self.seek_to(mid);
|
||||
assert!(self.is_valid());
|
||||
match self.key().cmp(key) {
|
||||
std::cmp::Ordering::Less => low = mid + 1,
|
||||
std::cmp::Ordering::Greater => high = mid,
|
||||
std::cmp::Ordering::Equal => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1
mini-lsm/src/lib.rs
Normal file
1
mini-lsm/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod block;
|
||||
Reference in New Issue
Block a user