From f88394a6868f29e714a3445beb9da93eece433de Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Sun, 21 Jan 2024 13:55:49 +0800 Subject: [PATCH] add week 1 day 3 blocks Signed-off-by: Alex Chi Z --- README.md | 2 +- mini-lsm-book/src/week1-02-merge-iterator.md | 6 +- mini-lsm-book/src/week1-03-block.md | 75 +++++++++- mini-lsm-starter/src/block.rs | 18 +-- mini-lsm-starter/src/block/iterator.rs | 6 +- mini-lsm-starter/src/block/tests.rs | 1 - mini-lsm-starter/src/table.rs | 3 - mini-lsm-starter/src/table/tests.rs | 1 - mini-lsm/src/tests/day3.rs | 136 +++++++++++++++++++ 9 files changed, 220 insertions(+), 28 deletions(-) delete mode 100644 mini-lsm-starter/src/block/tests.rs delete mode 100644 mini-lsm-starter/src/table/tests.rs create mode 100644 mini-lsm/src/tests/day3.rs diff --git a/README.md b/README.md index c254a71..ad43b94 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ We are working on a new version of the mini-lsm tutorial that is split into 3 we | -------------- | ----------------------------------------------- | -------- | ------------ | ------- | | 1.1 | Memtables | ✅ | ✅ | ✅ | | 1.2 | Merge Iterators | ✅ | ✅ | ✅ | -| 1.3 | Block Format | ✅ | 🚧 | 🚧 | +| 1.3 | Block Format | ✅ | ✅ | ✅ | | 1.4 | Table Format | ✅ | 🚧 | 🚧 | | 1.5 | Storage Engine - Read Path | ✅ | 🚧 | 🚧 | | 1.6 | Storage Engine - Write Path | ✅ | 🚧 | 🚧 | diff --git a/mini-lsm-book/src/week1-02-merge-iterator.md b/mini-lsm-book/src/week1-02-merge-iterator.md index f39973c..428e49d 100644 --- a/mini-lsm-book/src/week1-02-merge-iterator.md +++ b/mini-lsm-book/src/week1-02-merge-iterator.md @@ -78,7 +78,7 @@ The constructor of the merge iterator takes a vector of iterators. We assume the One common pitfall is on error handling. For example, -```rust +```rust,no_run let Some(mut inner_iter) = self.iters.peek_mut() { inner_iter.next()?; // <- will cause problem } @@ -116,7 +116,7 @@ In this task, you will need to modify: src/iterators/lsm_storage.rs ``` -We are finally there -- with all iterators you have implemented, you can finally implement the `scan` interface of the LSM engine. +We are finally there -- with all iterators you have implemented, you can finally implement the `scan` interface of the LSM engine. You can simply construct an LSM iterator with the memtable iterators (remember to put the latest memtable at the front of the merge iterator), and your storage engine will be able to handle the scan request. ## Test Your Understanding @@ -133,7 +133,7 @@ We are finally there -- with all iterators you have implemented, you can finally We do not provide reference answers to the questions, and feel free to discuss about them in the Discord community. -## Bonus Task +## Bonus Tasks * **Foreground Iterator.** In this tutorial we assumed that all operations are short, so that we can hold reference to mem-table in the iterator. If an iterator is held by users for a long time, the whole mem-table (which might be 256MB) will stay in the memory even if it has been flushed to disk. To solve this, we can provide a `ForegroundIterator` / `LongIterator` to our user. The iterator will periodically create new underlying storage iterator so as to allow garbage collection of the resources. diff --git a/mini-lsm-book/src/week1-03-block.md b/mini-lsm-book/src/week1-03-block.md index 8ff0c4e..3e81d01 100644 --- a/mini-lsm-book/src/week1-03-block.md +++ b/mini-lsm-book/src/week1-03-block.md @@ -9,8 +9,81 @@ In this chapter, you will: ## Task 1: Block Builder +You have already implemented all in-memory structures for an LSM storage engine in the previous two chapters. Now it's time to build the on-disk structures. The basic unit of the on-disk structure is blocks. Blocks are usually of 4-KB size (the size may vary depending on the storage medium), which is equivalent to the page size in the operating system and the page size on an SSD. A block stores ordered key-value pairs. An SST is composed of multiple blocks. When the number of memtables exceed the system limit, it will flush the memtable as an SST. In this chapter, you will implement the encoding and decoding of a block. + +In this task, you will need to modify: + +``` +src/block/builder.rs +src/block.rs +``` + +The block encoding format in our tutorial is as follows: + +```plaintext +---------------------------------------------------------------------------------------------------- +| Data Section | Offset Section | Extra | +---------------------------------------------------------------------------------------------------- +| Entry #1 | Entry #2 | ... | Entry #N | Offset #1 | Offset #2 | ... | Offset #N | num_of_elements | +---------------------------------------------------------------------------------------------------- +``` + +Each entry is a key-value pair. + +```plaintext +----------------------------------------------------------------------- +| Entry #1 | ... | +----------------------------------------------------------------------- +| key_len (2B) | key (keylen) | value_len (2B) | value (varlen) | ... | +----------------------------------------------------------------------- +``` + +Key length and value length are both 2 bytes, which means their maximum lengths are 65535. (Internally stored as `u16`) + +We assume that keys will never be empty, and values can be empty. An empty value means that the corresponding key has been deleted in the view of other parts of the system. For the `BlockBuilder` and `BlockIterator`, we just treat the empty value as-is. + +At the end of each block, we will store the offsets of each entry and the total number of entries. For example, if +the first entry is at 0th position of the block, and the second entry is at 12th position of the block. + +``` +------------------------------- +|offset|offset|num_of_elements| +------------------------------- +| 0 | 12 | 2 | +------------------------------- +``` + +The footer of the block will be as above. Each of the number is stored as `u16`. + +The block has a size limit, which is `target_size`. Unless the first key-value pair exceeds the target block size, you should ensure that the encoded block size is always less than or equal to `target_size`. (In the provided code, the `target_size` here is essentially the `block_size`) + +The `BlockBuilder` will produce the data part and unencoded entry offsets when `build` is called. The information will be stored in the `Block` structure. As key-value entries are stored in raw format and offsets are stored in a separate vector, this reduces unnecessary memory allocations and processing overhead when decoding data —— what you need to do is to simply copy the raw block data to the `data` vector and decode the entry offsets every 2 bytes, *instead of* creating something like `Vec<(Vec, Vec)>` to store all the key-value pairs in one block in memory. This compact memory layout is very efficient. + +In `Block::encode` and `Block::decode`, you will need to encode/decode the block in the format as indicated above. + ## Task 2: Block Iterator +In this task, you will need to modify: + +``` +src/block/iterator.rs +``` + +Now that we have an encoded block, we will need to implement the `StorageIterator` interface, so that the user can lookup/scan keys in the block. + +`BlockIterator` can be created with an `Arc`. If `create_and_seek_to_first` is called, it will be positioned at the first key in the block. If `create_and_seek_to_key` is called, the iterator will be positioned at the first key that is `>=` the provided key. For example, if `1, 3, 5` is in a block. + +```rust,no_run +let mut iter = BlockIterator::create_and_seek_to_key(block, b"2"); +assert_eq!(iter.key(), b"3"); +``` + +The above `seek 2` will make the iterator to be positioned at the next available key of `2`, which in this case is `3`. + +The iterator should copy `key` from the block and store them inside the iterator (we will have key compression in the future and you will have to do so). For the value, you should only store the begin/end offset in the iterator without copying them. + +When `next` is called, the iterator will move to the next position. If we reach the end of the block, we can set `key` to empty and return `false` from `is_valid`, so that the caller can switch to another block if possible. + ## Test Your Understanding * What is the time complexity of seeking a key in the block? @@ -27,6 +100,6 @@ We do not provide reference answers to the questions, and feel free to discuss a ## Bonus Tasks -* **Backward Iterators.** +* **Backward Iterators.** You may implement `prev` for your `BlockIterator` so that you will be able to iterate the key-value pairs reversely. You may also have a variant of backward merge iterator and backward SST iterator (in the next chapter) so that your storage engine can do a reverse scan. {{#include copyright.md}} diff --git a/mini-lsm-starter/src/block.rs b/mini-lsm-starter/src/block.rs index d87513f..41e3df2 100644 --- a/mini-lsm-starter/src/block.rs +++ b/mini-lsm-starter/src/block.rs @@ -5,22 +5,13 @@ mod builder; mod iterator; pub use builder::BlockBuilder; -/// You may want to check `bytes::BufMut` out when manipulating continuous chunks of memory use bytes::Bytes; pub use iterator::BlockIterator; -/// A block is the smallest unit of read and caching in LSM tree. -/// It is a collection of sorted key-value pairs. -/// The `actual` storage format is as below (After `Block::encode`): -/// -/// ---------------------------------------------------------------------------------------------------- -/// | Data Section | Offset Section | Extra | -/// ---------------------------------------------------------------------------------------------------- -/// | Entry #1 | Entry #2 | ... | Entry #N | Offset #1 | Offset #2 | ... | Offset #N | num_of_elements | -/// ---------------------------------------------------------------------------------------------------- +/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs. pub struct Block { - data: Vec, - offsets: Vec, + pub(crate) data: Vec, + pub(crate) offsets: Vec, } impl Block { @@ -35,6 +26,3 @@ impl Block { unimplemented!() } } - -#[cfg(test)] -mod tests; diff --git a/mini-lsm-starter/src/block/iterator.rs b/mini-lsm-starter/src/block/iterator.rs index 728dbc8..4e69678 100644 --- a/mini-lsm-starter/src/block/iterator.rs +++ b/mini-lsm-starter/src/block/iterator.rs @@ -11,8 +11,8 @@ pub struct BlockIterator { block: Arc, /// The current key, empty represents the iterator is invalid key: Vec, - /// The corresponding value, can be empty - value: Vec, + /// the value range from the block + value_range: (usize, usize), /// Current index of the key-value pair, should be in range of [0, num_of_elements) idx: usize, } @@ -22,7 +22,7 @@ impl BlockIterator { Self { block, key: Vec::new(), - value: Vec::new(), + value_range: (0, 0), idx: 0, } } diff --git a/mini-lsm-starter/src/block/tests.rs b/mini-lsm-starter/src/block/tests.rs deleted file mode 100644 index a6eb840..0000000 --- a/mini-lsm-starter/src/block/tests.rs +++ /dev/null @@ -1 +0,0 @@ -//! Please copy `mini-lsm/src/block/tests.rs` here so that you can run tests. diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index 8173a87..39d0b0b 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -159,6 +159,3 @@ impl SsTable { self.id } } - -#[cfg(test)] -mod tests; diff --git a/mini-lsm-starter/src/table/tests.rs b/mini-lsm-starter/src/table/tests.rs deleted file mode 100644 index cc38899..0000000 --- a/mini-lsm-starter/src/table/tests.rs +++ /dev/null @@ -1 +0,0 @@ -//! Please copy `mini-lsm/src/table/tests.rs` here so that you can run tests. diff --git a/mini-lsm/src/tests/day3.rs b/mini-lsm/src/tests/day3.rs new file mode 100644 index 0000000..850f969 --- /dev/null +++ b/mini-lsm/src/tests/day3.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use bytes::Bytes; + +use crate::block::{Block, BlockBuilder, BlockIterator}; + +#[test] +fn test_block_build_single_key() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"233", b"233333")); + builder.build(); +} + +#[test] +fn test_block_build_full() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"11", b"11")); + assert!(!builder.add(b"22", b"22")); + builder.build(); +} + +#[test] +fn test_block_build_large_1() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"11", &b"1".repeat(100))); + builder.build(); +} + +#[test] +fn test_block_build_large_2() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"11", b"1")); + assert!(!builder.add(b"11", &b"1".repeat(100))); +} + +fn key_of(idx: usize) -> Vec { + format!("key_{:03}", idx * 5).into_bytes() +} + +fn value_of(idx: usize) -> Vec { + format!("value_{:010}", idx).into_bytes() +} + +fn num_of_keys() -> usize { + 100 +} + +fn generate_block() -> Block { + let mut builder = BlockBuilder::new(10000); + for idx in 0..num_of_keys() { + let key = key_of(idx); + let value = value_of(idx); + assert!(builder.add(&key[..], &value[..])); + } + builder.build() +} + +#[test] +fn test_block_build_all() { + generate_block(); +} + +#[test] +fn test_block_encode() { + let block = generate_block(); + block.encode(); +} + +#[test] +fn test_block_decode() { + let block = generate_block(); + let encoded = block.encode(); + let decoded_block = Block::decode(&encoded); + assert_eq!(block.offsets, decoded_block.offsets); + assert_eq!(block.data, decoded_block.data); +} + +fn as_bytes(x: &[u8]) -> Bytes { + Bytes::copy_from_slice(x) +} + +#[test] +fn test_block_iterator() { + let block = Arc::new(generate_block()); + let mut iter = BlockIterator::create_and_seek_to_first(block); + for _ in 0..5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.next(); + } + iter.seek_to_first(); + } +} + +#[test] +fn test_block_seek_key() { + let block = Arc::new(generate_block()); + let mut iter = BlockIterator::create_and_seek_to_key(block, &key_of(0)); + for offset in 1..=5 { + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes()); + } + iter.seek_to_key(b"k"); + } +}