From 72cee6ac133fde2571b93d03d9ac9ef8161f5e35 Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Fri, 23 Dec 2022 21:14:11 -0500 Subject: [PATCH] feat(code): add sst iterator / day 2 Signed-off-by: Alex Chi --- Cargo.lock | 1 + README.md | 7 ++ mini-lsm-starter/src/lib.rs | 1 + mini-lsm-starter/src/table.rs | 1 + mini-lsm-starter/src/table/builder.rs | 0 mini-lsm-starter/src/table/iterator.rs | 0 mini-lsm-starter/src/table/tests.rs | 0 mini-lsm/Cargo.toml | 1 + mini-lsm/src/block.rs | 4 +- mini-lsm/src/block/builder.rs | 9 +- mini-lsm/src/block/iterator.rs | 5 +- mini-lsm/src/block/tests.rs | 69 +++++++++---- mini-lsm/src/lib.rs | 1 + mini-lsm/src/table.rs | 113 +++++++++++++++++++++ mini-lsm/src/table/builder.rs | 75 ++++++++++++++ mini-lsm/src/table/iterator.rs | 91 +++++++++++++++++ mini-lsm/src/table/tests.rs | 134 +++++++++++++++++++++++++ 17 files changed, 483 insertions(+), 29 deletions(-) create mode 100644 mini-lsm-starter/src/table.rs create mode 100644 mini-lsm-starter/src/table/builder.rs create mode 100644 mini-lsm-starter/src/table/iterator.rs create mode 100644 mini-lsm-starter/src/table/tests.rs create mode 100644 mini-lsm/src/table.rs create mode 100644 mini-lsm/src/table/builder.rs create mode 100644 mini-lsm/src/table/iterator.rs create mode 100644 mini-lsm/src/table/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 4876ce4..b4d1676 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,6 +175,7 @@ checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" name = "mini-lsm" version = "0.1.0" dependencies = [ + "anyhow", "bytes", ] diff --git a/README.md b/README.md index 87cf70b..4431461 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,14 @@ # LSM in a Week +[![CI (main)](https://github.com/skyzh/mini-lsm/actions/workflows/main.yml/badge.svg)](https://github.com/skyzh/mini-lsm/actions/workflows/main.yml) + Build a simple key-value storage engine in a week! +## Tutorial + +The tutorial is available at [https://skyzh.github.io/mini-lsm](https://skyzh.github.io/mini-lsm). You can use the provided starter +code to kick off your project, and follow the tutorial to implement the LSM tree. + ## Development ``` diff --git a/mini-lsm-starter/src/lib.rs b/mini-lsm-starter/src/lib.rs index a863eaa..cd9e46c 100644 --- a/mini-lsm-starter/src/lib.rs +++ b/mini-lsm-starter/src/lib.rs @@ -1 +1,2 @@ pub mod block; +pub mod table; diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/mini-lsm-starter/src/table.rs @@ -0,0 +1 @@ + diff --git a/mini-lsm-starter/src/table/builder.rs b/mini-lsm-starter/src/table/builder.rs new file mode 100644 index 0000000..e69de29 diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs new file mode 100644 index 0000000..e69de29 diff --git a/mini-lsm-starter/src/table/tests.rs b/mini-lsm-starter/src/table/tests.rs new file mode 100644 index 0000000..e69de29 diff --git a/mini-lsm/Cargo.toml b/mini-lsm/Cargo.toml index 82a6f48..dbe801e 100644 --- a/mini-lsm/Cargo.toml +++ b/mini-lsm/Cargo.toml @@ -11,4 +11,5 @@ description = "A tutorial for building an LSM tree storage engine in a week." # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1" bytes = "1" diff --git a/mini-lsm/src/block.rs b/mini-lsm/src/block.rs index b472932..97e072a 100644 --- a/mini-lsm/src/block.rs +++ b/mini-lsm/src/block.rs @@ -10,8 +10,8 @@ pub const SIZEOF_U16: usize = std::mem::size_of::(); /// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs. pub struct Block { - pub(self) data: Vec, - pub(self) offsets: Vec, + data: Vec, + offsets: Vec, } impl Block { diff --git a/mini-lsm/src/block/builder.rs b/mini-lsm/src/block/builder.rs index 2ab036b..3f5b166 100644 --- a/mini-lsm/src/block/builder.rs +++ b/mini-lsm/src/block/builder.rs @@ -6,16 +6,16 @@ use super::{Block, SIZEOF_U16}; pub struct BlockBuilder { offsets: Vec, data: Vec, - target_size: usize, + block_size: usize, } impl BlockBuilder { /// Creates a new block builder - pub fn new(target_size: usize) -> Self { + pub fn new(block_size: usize) -> Self { Self { offsets: Vec::new(), data: Vec::new(), - target_size, + block_size, } } @@ -27,7 +27,8 @@ impl BlockBuilder { #[must_use] pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { assert!(!key.is_empty(), "key must not be empty"); - if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.target_size + assert!(!value.is_empty(), "value must not be empty"); + if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.block_size && !self.is_empty() { return false; diff --git a/mini-lsm/src/block/iterator.rs b/mini-lsm/src/block/iterator.rs index 62645d2..87d76dd 100644 --- a/mini-lsm/src/block/iterator.rs +++ b/mini-lsm/src/block/iterator.rs @@ -51,10 +51,6 @@ impl BlockIterator { self.seek_to(0); } - pub fn seek_to_last(&mut self) { - self.seek_to(self.block.offsets.len() - 1); - } - pub fn len(&self) -> usize { self.block.offsets.len() } @@ -106,5 +102,6 @@ impl BlockIterator { std::cmp::Ordering::Equal => return, } } + self.seek_to(low); } } diff --git a/mini-lsm/src/block/tests.rs b/mini-lsm/src/block/tests.rs index 51edfeb..ae52135 100644 --- a/mini-lsm/src/block/tests.rs +++ b/mini-lsm/src/block/tests.rs @@ -18,7 +18,7 @@ fn test_block_build_full() { } fn key_of(idx: usize) -> Vec { - format!("key_{:03}", idx).into_bytes() + format!("key_{:03}", idx * 5).into_bytes() } fn value_of(idx: usize) -> Vec { @@ -67,23 +67,54 @@ fn as_bytes(x: &[u8]) -> Bytes { fn test_block_iterator() { let block = Arc::new(generate_block()); let mut iter = BlockIterator::create_and_seek_to_first(block); - for i in 0..num_of_keys() { - let key = iter.key(); - let value = iter.value(); - assert_eq!( - key, - key_of(i), - "expected key: {:?}, actual key: {:?}", - as_bytes(&key_of(i)), - as_bytes(key) - ); - assert_eq!( - value, - value_of(i), - "expected value: {:?}, actual value: {:?}", - as_bytes(&value_of(i)), - as_bytes(value) - ); - iter.next(); + for _ in 0..5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.next(); + } + iter.seek_to_first(); + } +} + +#[test] +fn test_block_seek_key() { + let block = Arc::new(generate_block()); + let mut iter = BlockIterator::create_and_seek_to_key(block, &key_of(0)); + for offset in 1..=5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes()); + } + iter.seek_to_key(b"k"); } } diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs index a863eaa..cd9e46c 100644 --- a/mini-lsm/src/lib.rs +++ b/mini-lsm/src/lib.rs @@ -1 +1,2 @@ pub mod block; +pub mod table; diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs new file mode 100644 index 0000000..0a73851 --- /dev/null +++ b/mini-lsm/src/table.rs @@ -0,0 +1,113 @@ +mod builder; +mod iterator; + +use std::{path::Path, sync::Arc}; + +pub use builder::SsTableBuilder; +use bytes::{Buf, BufMut, Bytes}; +pub use iterator::SsTableIterator; + +use crate::block::Block; +use anyhow::Result; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BlockMeta { + pub offset: usize, + pub first_key: Bytes, +} + +impl BlockMeta { + pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec) { + let mut estimated_size = 0; + for meta in block_meta { + estimated_size += std::mem::size_of::(); + estimated_size += std::mem::size_of::(); + estimated_size += meta.first_key.len(); + } + buf.reserve(estimated_size); + let original_len = buf.len(); + for meta in block_meta { + buf.put_u32(meta.offset as u32); + buf.put_u16(meta.first_key.len() as u16); + buf.put_slice(&meta.first_key); + } + assert_eq!(estimated_size, buf.len() - original_len); + } + + pub fn decode_block_meta(mut buf: impl Buf) -> Vec { + let mut block_meta = Vec::new(); + while buf.has_remaining() { + let offset = buf.get_u32() as usize; + let first_key_len = buf.get_u16() as usize; + let first_key = buf.copy_to_bytes(first_key_len); + block_meta.push(BlockMeta { offset, first_key }); + } + block_meta + } +} + +pub struct FileObject(Bytes); + +impl FileObject { + pub fn read(&self, offset: u64, len: u64) -> Result> { + Ok(self.0[offset as usize..(offset + len) as usize].to_vec()) + } + + pub fn size(&self) -> u64 { + self.0.len() as u64 + } + + pub fn create(_path: &Path, data: Vec) -> Result { + Ok(FileObject(data.into())) + } + + pub fn open(_path: &Path) -> Result { + unimplemented!() + } +} + +pub struct SsTable { + file: FileObject, + block_metas: Vec, + block_meta_offset: usize, +} + +impl SsTable { + pub fn open(file: FileObject) -> Result { + let len = file.size(); + let raw_meta_offset = file.read(len - 4, 4)?; + let block_meta_offset = (&raw_meta_offset[..]).get_u32() as u64; + let raw_meta = file.read(block_meta_offset, len - 4 - block_meta_offset)?; + Ok(Self { + file, + block_metas: BlockMeta::decode_block_meta(&raw_meta[..]), + block_meta_offset: block_meta_offset as usize, + }) + } + + fn read_block(&self, block_idx: usize) -> Result> { + let offset = self.block_metas[block_idx].offset; + let offset_end = self + .block_metas + .get(block_idx + 1) + .map(|x| x.offset) + .unwrap_or(self.block_meta_offset); + let block_data = self + .file + .read(offset as u64, (offset_end - offset) as u64)?; + Ok(Arc::new(Block::decode(&block_data[..]))) + } + + fn find_block_idx(&self, key: &[u8]) -> usize { + self.block_metas + .partition_point(|meta| meta.first_key <= key) + .saturating_sub(1) + } + + fn num_of_blocks(&self) -> usize { + self.block_metas.len() + } +} + +#[cfg(test)] +mod tests; diff --git a/mini-lsm/src/table/builder.rs b/mini-lsm/src/table/builder.rs new file mode 100644 index 0000000..db71360 --- /dev/null +++ b/mini-lsm/src/table/builder.rs @@ -0,0 +1,75 @@ +use anyhow::Result; +use bytes::BufMut; +use std::path::Path; + +use super::{BlockMeta, FileObject, SsTable}; +use crate::block::BlockBuilder; + +pub struct SsTableBuilder { + builder: BlockBuilder, + first_key: Vec, + data: Vec, + pub(super) meta: Vec, + target_size: usize, + block_size: usize, +} + +impl SsTableBuilder { + pub fn new(target_size: usize, block_size: usize) -> Self { + Self { + data: Vec::new(), + meta: Vec::new(), + first_key: Vec::new(), + target_size, + block_size, + builder: BlockBuilder::new(block_size), + } + } + + #[must_use] + pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { + if self.data.len() > self.target_size { + return false; + } + + if self.first_key.is_empty() { + self.first_key = key.to_vec(); + } + + if self.builder.add(key, value) { + return true; + } + // create a new block builder and append block data + self.finish_block(); + + // add the key-value pair to the next block + assert!(self.builder.add(key, value)); + self.first_key = key.to_vec(); + + true + } + + fn finish_block(&mut self) { + let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size)); + let encoded_block = builder.build().encode(); + self.meta.push(BlockMeta { + offset: self.data.len(), + first_key: std::mem::take(&mut self.first_key).into(), + }); + self.data.extend(encoded_block); + } + + pub fn build(mut self, path: impl AsRef) -> Result { + self.finish_block(); + let mut buf = self.data; + let meta_offset = buf.len(); + BlockMeta::encode_block_meta(&self.meta, &mut buf); + buf.put_u32(meta_offset as u32); + let file = FileObject::create(path.as_ref(), buf)?; + Ok(SsTable { + file, + block_metas: self.meta, + block_meta_offset: meta_offset, + }) + } +} diff --git a/mini-lsm/src/table/iterator.rs b/mini-lsm/src/table/iterator.rs new file mode 100644 index 0000000..d8b3cc6 --- /dev/null +++ b/mini-lsm/src/table/iterator.rs @@ -0,0 +1,91 @@ +use anyhow::Result; +use std::sync::Arc; + +use super::SsTable; +use crate::block::BlockIterator; + +pub struct SsTableIterator { + table: Arc, + blk_iter: BlockIterator, + blk_idx: usize, +} + +impl SsTableIterator { + fn seek_to_first_inner(table: &Arc) -> Result<(usize, BlockIterator)> { + Ok(( + 0, + BlockIterator::create_and_seek_to_first(table.read_block(0)?), + )) + } + + pub fn create_and_seek_to_first(table: Arc) -> Result { + let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?; + let iter = Self { + blk_iter, + table, + blk_idx, + }; + Ok(iter) + } + + pub fn seek_to_first(&mut self) -> Result<()> { + let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?; + self.blk_idx = blk_idx; + self.blk_iter = blk_iter; + Ok(()) + } + + fn seek_to_key_inner(table: &Arc, key: &[u8]) -> Result<(usize, BlockIterator)> { + let mut blk_idx = table.find_block_idx(key); + let mut blk_iter = BlockIterator::create_and_seek_to_key(table.read_block(blk_idx)?, key); + if !blk_iter.is_valid() { + blk_idx += 1; + if blk_idx < table.num_of_blocks() { + blk_iter = BlockIterator::create_and_seek_to_first(table.read_block(blk_idx)?); + } + } + Ok((blk_idx, blk_iter)) + } + + pub fn create_and_seek_to_key(table: Arc, key: &[u8]) -> Result { + let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?; + let iter = Self { + blk_iter, + table, + blk_idx, + }; + Ok(iter) + } + + pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { + let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?; + self.blk_iter = blk_iter; + self.blk_idx = blk_idx; + Ok(()) + } + + pub fn key(&self) -> &[u8] { + self.blk_iter.key() + } + + pub fn value(&self) -> &[u8] { + self.blk_iter.value() + } + + pub fn is_valid(&self) -> bool { + self.blk_iter.is_valid() + } + + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result<()> { + self.blk_iter.next(); + if !self.blk_iter.is_valid() { + self.blk_idx += 1; + if self.blk_idx < self.table.num_of_blocks() { + self.blk_iter = + BlockIterator::create_and_seek_to_first(self.table.read_block(self.blk_idx)?); + } + } + Ok(()) + } +} diff --git a/mini-lsm/src/table/tests.rs b/mini-lsm/src/table/tests.rs new file mode 100644 index 0000000..7c7dc67 --- /dev/null +++ b/mini-lsm/src/table/tests.rs @@ -0,0 +1,134 @@ +use std::sync::Arc; + +use bytes::Bytes; + +use crate::table::SsTableBuilder; + +use super::{SsTable, SsTableIterator}; + +#[test] +fn test_sst_build_single_key() { + let mut builder = SsTableBuilder::new(16, 16); + assert!(builder.add(b"233", b"233333")); + builder.build("").unwrap(); +} + +#[test] +fn test_sst_build_two_blocks() { + let mut builder = SsTableBuilder::new(1024, 16); + assert!(builder.add(b"11", b"11")); + assert!(builder.add(b"22", b"22")); + assert!(builder.add(b"33", b"11")); + assert!(builder.add(b"44", b"22")); + assert!(builder.add(b"55", b"11")); + assert!(builder.add(b"66", b"22")); + assert!(builder.meta.len() >= 2); + builder.build("").unwrap(); +} + +#[test] +fn test_sst_build_full() { + let mut builder = SsTableBuilder::new(32, 16); + assert!(builder.add(b"11", b"11")); + assert!(builder.add(b"22", b"22")); + assert!(builder.add(b"33", b"11")); + assert!(builder.add(b"44", b"22")); + assert!(!builder.add(b"55", b"11")); + builder.build("").unwrap(); +} + +fn key_of(idx: usize) -> Vec { + format!("key_{:03}", idx * 5).into_bytes() +} + +fn value_of(idx: usize) -> Vec { + format!("value_{:010}", idx).into_bytes() +} + +fn num_of_keys() -> usize { + 100 +} + +fn generate_sst() -> SsTable { + let mut builder = SsTableBuilder::new(65536, 128); + for idx in 0..num_of_keys() { + let key = key_of(idx); + let value = value_of(idx); + assert!(builder.add(&key[..], &value[..])); + } + builder.build("").unwrap() +} + +#[test] +fn test_sst_build_all() { + generate_sst(); +} + +#[test] +fn test_sst_decode() { + let sst = generate_sst(); + let meta = sst.block_metas.clone(); + let new_sst = SsTable::open(sst.file).unwrap(); + assert_eq!(new_sst.block_metas, meta); +} + +fn as_bytes(x: &[u8]) -> Bytes { + Bytes::copy_from_slice(x) +} + +#[test] +fn test_sst_iterator() { + let sst = Arc::new(generate_sst()); + let mut iter = SsTableIterator::create_and_seek_to_first(sst).unwrap(); + for _ in 0..5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.next().unwrap(); + } + iter.seek_to_first().unwrap(); + } +} + +#[test] +fn test_sst_seek_key() { + let sst = Arc::new(generate_sst()); + let mut iter = SsTableIterator::create_and_seek_to_key(sst, &key_of(0)).unwrap(); + for offset in 1..=5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes()) + .unwrap(); + } + iter.seek_to_key(b"k").unwrap(); + } +}