From d109882d89718b75afb0cb39c04473d5302d7a75 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Wed, 10 Jan 2024 14:25:23 +0800 Subject: [PATCH] add compaction skeleton (#16) * add compaction skeleton Signed-off-by: Alex Chi * remove tombstone when compact to bottom-most level Signed-off-by: Alex Chi * new plan Signed-off-by: Alex Chi Z --------- Signed-off-by: Alex Chi Signed-off-by: Alex Chi Z --- .cargo/config.toml | 1 + README.md | 35 ++++++++++++------ mini-lsm-book/src/SUMMARY.md | 7 +++- mini-lsm/src/compact.rs | 72 ++++++++++++++++++++++++++++++++++++ mini-lsm/src/lib.rs | 1 + mini-lsm/src/lsm_storage.rs | 23 +++++++----- rustfmt.toml | 12 ++++++ 7 files changed, 128 insertions(+), 23 deletions(-) create mode 100644 mini-lsm/src/compact.rs create mode 100644 rustfmt.toml diff --git a/.cargo/config.toml b/.cargo/config.toml index 776c9c9..f43e1c4 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,3 +1,4 @@ [alias] xtask = "run --package mini-lsm-xtask --" x = "run --package mini-lsm-xtask --" +test = "nextest run" diff --git a/README.md b/README.md index 1810c8a..747b232 100644 --- a/README.md +++ b/README.md @@ -22,16 +22,29 @@ To do this, use `cargo x sync`. ## Progress -The tutorial has 8 parts (which can be finished in 7 days): +We are working on a new version of the mini-lsm tutorial that is split into 3 weeks. -* Day 1: Block encoding. SSTs are composed of multiple data blocks. We will implement the block encoding. -* Day 2: SST encoding. -* Day 3: MemTable and Merge Iterators. -* Day 4: Block cache and Engine. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache - for the LSM tree. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `scan`, - `delete` API. -* Day 5: Compaction. Now it's time to maintain a leveled structure for SSTs. -* Day 6: Recovery. We will implement WAL and manifest so that the engine can recover after restart. -* Day 7: Bloom filter and key compression. They are widely-used optimizations in LSM tree structures. +* Week 1: Storage Format + Engine Skeleton +* Week 2: Compaction and Persistence +* Week 3: Week 3 -- Multi-Version Concurrency Control -We have reference solution up to day 4 and tutorial up to day 4 for now. +| Week + Chapter | Topic | Solution | Starter Code | Writeup | +| ---- | ------------------ | --------------- | ----------------- | --------- | +| 1.1 | Block Format | ✅ | ✅ | ✅ | +| 1.2 | Table Format | ✅ | ✅ | ✅ | | +| 1.3 | Memtables | ✅ | ✅ | ✅ | | +| 1.4 | Merge Iterators | ✅ | ✅ | ✅ | +| 1.5 | Storage Engine - Read Path | ✅ | ✅ | ✅ | +| 1.6 | Storage Engine - Write Path | ✅ | ✅ | ✅ | +| 2.1 | Compaction Framework | ✅ | 🚧 | 🚧 | +| 2.2 | Compaction Strategy | 🚧 | | | +| 2.3 | Write-Ahead Log | | | | +| 2.4 | Manifest | | | | +| 2.5 | Bloom Filter | | | | +| 2.6 | Key Compression | | | | +| 3.1 | Timestamp Encoding | | | | +| 3.2 | Prefix Bloom Filter | | | | +| 3.3 | Snapshot Read | | | | +| 3.4 | Watermark | | | | +| 3.5 | Garbage Collection | | | | +| 3.6 | Serializable Snapshot Isolation | | | | \ No newline at end of file diff --git a/mini-lsm-book/src/SUMMARY.md b/mini-lsm-book/src/SUMMARY.md index 6e8190f..049345e 100644 --- a/mini-lsm-book/src/SUMMARY.md +++ b/mini-lsm-book/src/SUMMARY.md @@ -5,18 +5,21 @@ --- -# Tutorial +# Checkpoint 1 - [Store key-value pairs in little blocks](./01-block.md) - [And make them into an SST](./02-sst.md) - [Now it's time to merge everything](./03-memtable.md) - [The engine is on fire](./04-engine.md) +# Checkpoint 2 + +- [Let's do something in the background](./05-compaction.md) + --- # WIP Chapters -- [Let's do something in the background](./05-compaction.md) - [Be careful when the system crashes](./06-recovery.md) - [A good bloom filter makes life easier](./07-bloom-filter.md) - [Save some space, hopefully](./08-key-compression.md) diff --git a/mini-lsm/src/compact.rs b/mini-lsm/src/compact.rs new file mode 100644 index 0000000..eaaa78f --- /dev/null +++ b/mini-lsm/src/compact.rs @@ -0,0 +1,72 @@ +use std::sync::Arc; + +use anyhow::Result; + +use crate::{ + iterators::{merge_iterator::MergeIterator, StorageIterator}, + lsm_storage::LsmStorage, + table::{SsTable, SsTableBuilder, SsTableIterator}, +}; + +struct CompactOptions { + block_size: usize, + target_sst_size: usize, + compact_to_bottom_level: bool, +} + +impl LsmStorage { + #[allow(dead_code)] + fn compact( + &self, + tables: Vec>, + options: CompactOptions, + ) -> Result>> { + let mut iters = Vec::new(); + iters.reserve(tables.len()); + for table in tables.iter() { + iters.push(Box::new(SsTableIterator::create_and_seek_to_first( + table.clone(), + )?)); + } + let mut iter = MergeIterator::create(iters); + + let mut builder = None; + let mut new_sst = vec![]; + + while iter.is_valid() { + if builder.is_none() { + builder = Some(SsTableBuilder::new(options.block_size)); + } + let builder_inner = builder.as_mut().unwrap(); + if options.compact_to_bottom_level { + if !iter.value().is_empty() { + builder_inner.add(iter.key(), iter.value()); + } + } else { + builder_inner.add(iter.key(), iter.value()); + } + iter.next()?; + + if builder_inner.estimated_size() >= options.target_sst_size { + let sst_id = self.next_sst_id(); // lock dropped here + let builder = builder.take().unwrap(); + let sst = Arc::new(builder.build( + sst_id, + Some(self.block_cache.clone()), + self.path_of_sst(sst_id), + )?); + new_sst.push(sst); + } + } + if let Some(builder) = builder { + let sst_id = self.next_sst_id(); // lock dropped here + let sst = Arc::new(builder.build( + sst_id, + Some(self.block_cache.clone()), + self.path_of_sst(sst_id), + )?); + new_sst.push(sst); + } + Ok(new_sst) + } +} diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs index 40ff84a..2912bec 100644 --- a/mini-lsm/src/lib.rs +++ b/mini-lsm/src/lib.rs @@ -1,4 +1,5 @@ pub mod block; +mod compact; pub mod iterators; pub mod lsm_iterator; pub mod lsm_storage; diff --git a/mini-lsm/src/lsm_storage.rs b/mini-lsm/src/lsm_storage.rs index b8996fe..538234d 100644 --- a/mini-lsm/src/lsm_storage.rs +++ b/mini-lsm/src/lsm_storage.rs @@ -1,5 +1,6 @@ use std::ops::Bound; use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicUsize; use std::sync::Arc; use anyhow::Result; @@ -27,8 +28,6 @@ pub struct LsmStorageInner { /// L1 - L6 SsTables, sorted by key range. #[allow(dead_code)] levels: Vec>>, - /// The next SSTable ID. - next_sst_id: usize, } impl LsmStorageInner { @@ -38,26 +37,32 @@ impl LsmStorageInner { imm_memtables: vec![], l0_sstables: vec![], levels: vec![], - next_sst_id: 1, } } } /// The storage interface of the LSM tree. pub struct LsmStorage { - inner: Arc>>, + pub(crate) inner: Arc>>, flush_lock: Mutex<()>, path: PathBuf, - block_cache: Arc, + pub(crate) block_cache: Arc, + next_sst_id: AtomicUsize, } impl LsmStorage { + pub(crate) fn next_sst_id(&self) -> usize { + self.next_sst_id + .fetch_add(1, std::sync::atomic::Ordering::SeqCst) + } + pub fn open(path: impl AsRef) -> Result { Ok(Self { inner: Arc::new(RwLock::new(Arc::new(LsmStorageInner::create()))), flush_lock: Mutex::new(()), path: path.as_ref().to_path_buf(), - block_cache: Arc::new(BlockCache::new(1 << 20)), // 4GB block cache + block_cache: Arc::new(BlockCache::new(1 << 20)), // 4GB block cache, + next_sst_id: AtomicUsize::new(1), }) } @@ -121,7 +126,7 @@ impl LsmStorage { Ok(()) } - fn path_of_sst(&self, id: usize) -> PathBuf { + pub(crate) fn path_of_sst(&self, id: usize) -> PathBuf { self.path.join(format!("{:05}.sst", id)) } @@ -142,7 +147,7 @@ impl LsmStorage { let mut snapshot = guard.as_ref().clone(); let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create())); flush_memtable = memtable.clone(); - sst_id = snapshot.next_sst_id; + sst_id = self.next_sst_id(); // Add the memtable to the immutable memtables. snapshot.imm_memtables.push(memtable); // Update the snapshot. @@ -169,8 +174,6 @@ impl LsmStorage { snapshot.imm_memtables.pop(); // Add L0 table snapshot.l0_sstables.push(sst); - // Update SST ID - snapshot.next_sst_id += 1; // Update the snapshot. *guard = Arc::new(snapshot); } diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..64caf7b --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,12 @@ +comment_width = 120 +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true +normalize_comments = true +normalize_doc_attributes = true +imports_granularity = "Module" +group_imports = "StdExternalCrate" +reorder_impl_items = true +reorder_imports = true +tab_spaces = 4 +wrap_comments = true \ No newline at end of file