add compaction skeleton (#16)
* add compaction skeleton Signed-off-by: Alex Chi <iskyzh@gmail.com> * remove tombstone when compact to bottom-most level Signed-off-by: Alex Chi <iskyzh@gmail.com> * new plan Signed-off-by: Alex Chi Z <iskyzh@gmail.com> --------- Signed-off-by: Alex Chi <iskyzh@gmail.com> Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
[alias]
|
[alias]
|
||||||
xtask = "run --package mini-lsm-xtask --"
|
xtask = "run --package mini-lsm-xtask --"
|
||||||
x = "run --package mini-lsm-xtask --"
|
x = "run --package mini-lsm-xtask --"
|
||||||
|
test = "nextest run"
|
||||||
|
|||||||
35
README.md
35
README.md
@@ -22,16 +22,29 @@ To do this, use `cargo x sync`.
|
|||||||
|
|
||||||
## Progress
|
## Progress
|
||||||
|
|
||||||
The tutorial has 8 parts (which can be finished in 7 days):
|
We are working on a new version of the mini-lsm tutorial that is split into 3 weeks.
|
||||||
|
|
||||||
* Day 1: Block encoding. SSTs are composed of multiple data blocks. We will implement the block encoding.
|
* Week 1: Storage Format + Engine Skeleton
|
||||||
* Day 2: SST encoding.
|
* Week 2: Compaction and Persistence
|
||||||
* Day 3: MemTable and Merge Iterators.
|
* Week 3: Week 3 -- Multi-Version Concurrency Control
|
||||||
* Day 4: Block cache and Engine. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache
|
|
||||||
for the LSM tree. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `scan`,
|
|
||||||
`delete` API.
|
|
||||||
* Day 5: Compaction. Now it's time to maintain a leveled structure for SSTs.
|
|
||||||
* Day 6: Recovery. We will implement WAL and manifest so that the engine can recover after restart.
|
|
||||||
* Day 7: Bloom filter and key compression. They are widely-used optimizations in LSM tree structures.
|
|
||||||
|
|
||||||
We have reference solution up to day 4 and tutorial up to day 4 for now.
|
| Week + Chapter | Topic | Solution | Starter Code | Writeup |
|
||||||
|
| ---- | ------------------ | --------------- | ----------------- | --------- |
|
||||||
|
| 1.1 | Block Format | ✅ | ✅ | ✅ |
|
||||||
|
| 1.2 | Table Format | ✅ | ✅ | ✅ | |
|
||||||
|
| 1.3 | Memtables | ✅ | ✅ | ✅ | |
|
||||||
|
| 1.4 | Merge Iterators | ✅ | ✅ | ✅ |
|
||||||
|
| 1.5 | Storage Engine - Read Path | ✅ | ✅ | ✅ |
|
||||||
|
| 1.6 | Storage Engine - Write Path | ✅ | ✅ | ✅ |
|
||||||
|
| 2.1 | Compaction Framework | ✅ | 🚧 | 🚧 |
|
||||||
|
| 2.2 | Compaction Strategy | 🚧 | | |
|
||||||
|
| 2.3 | Write-Ahead Log | | | |
|
||||||
|
| 2.4 | Manifest | | | |
|
||||||
|
| 2.5 | Bloom Filter | | | |
|
||||||
|
| 2.6 | Key Compression | | | |
|
||||||
|
| 3.1 | Timestamp Encoding | | | |
|
||||||
|
| 3.2 | Prefix Bloom Filter | | | |
|
||||||
|
| 3.3 | Snapshot Read | | | |
|
||||||
|
| 3.4 | Watermark | | | |
|
||||||
|
| 3.5 | Garbage Collection | | | |
|
||||||
|
| 3.6 | Serializable Snapshot Isolation | | | |
|
||||||
@@ -5,18 +5,21 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
# Tutorial
|
# Checkpoint 1
|
||||||
|
|
||||||
- [Store key-value pairs in little blocks](./01-block.md)
|
- [Store key-value pairs in little blocks](./01-block.md)
|
||||||
- [And make them into an SST](./02-sst.md)
|
- [And make them into an SST](./02-sst.md)
|
||||||
- [Now it's time to merge everything](./03-memtable.md)
|
- [Now it's time to merge everything](./03-memtable.md)
|
||||||
- [The engine is on fire](./04-engine.md)
|
- [The engine is on fire](./04-engine.md)
|
||||||
|
|
||||||
|
# Checkpoint 2
|
||||||
|
|
||||||
|
- [Let's do something in the background](./05-compaction.md)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
# WIP Chapters
|
# WIP Chapters
|
||||||
|
|
||||||
- [Let's do something in the background](./05-compaction.md)
|
|
||||||
- [Be careful when the system crashes](./06-recovery.md)
|
- [Be careful when the system crashes](./06-recovery.md)
|
||||||
- [A good bloom filter makes life easier](./07-bloom-filter.md)
|
- [A good bloom filter makes life easier](./07-bloom-filter.md)
|
||||||
- [Save some space, hopefully](./08-key-compression.md)
|
- [Save some space, hopefully](./08-key-compression.md)
|
||||||
|
|||||||
72
mini-lsm/src/compact.rs
Normal file
72
mini-lsm/src/compact.rs
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
iterators::{merge_iterator::MergeIterator, StorageIterator},
|
||||||
|
lsm_storage::LsmStorage,
|
||||||
|
table::{SsTable, SsTableBuilder, SsTableIterator},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CompactOptions {
|
||||||
|
block_size: usize,
|
||||||
|
target_sst_size: usize,
|
||||||
|
compact_to_bottom_level: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LsmStorage {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn compact(
|
||||||
|
&self,
|
||||||
|
tables: Vec<Arc<SsTable>>,
|
||||||
|
options: CompactOptions,
|
||||||
|
) -> Result<Vec<Arc<SsTable>>> {
|
||||||
|
let mut iters = Vec::new();
|
||||||
|
iters.reserve(tables.len());
|
||||||
|
for table in tables.iter() {
|
||||||
|
iters.push(Box::new(SsTableIterator::create_and_seek_to_first(
|
||||||
|
table.clone(),
|
||||||
|
)?));
|
||||||
|
}
|
||||||
|
let mut iter = MergeIterator::create(iters);
|
||||||
|
|
||||||
|
let mut builder = None;
|
||||||
|
let mut new_sst = vec![];
|
||||||
|
|
||||||
|
while iter.is_valid() {
|
||||||
|
if builder.is_none() {
|
||||||
|
builder = Some(SsTableBuilder::new(options.block_size));
|
||||||
|
}
|
||||||
|
let builder_inner = builder.as_mut().unwrap();
|
||||||
|
if options.compact_to_bottom_level {
|
||||||
|
if !iter.value().is_empty() {
|
||||||
|
builder_inner.add(iter.key(), iter.value());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder_inner.add(iter.key(), iter.value());
|
||||||
|
}
|
||||||
|
iter.next()?;
|
||||||
|
|
||||||
|
if builder_inner.estimated_size() >= options.target_sst_size {
|
||||||
|
let sst_id = self.next_sst_id(); // lock dropped here
|
||||||
|
let builder = builder.take().unwrap();
|
||||||
|
let sst = Arc::new(builder.build(
|
||||||
|
sst_id,
|
||||||
|
Some(self.block_cache.clone()),
|
||||||
|
self.path_of_sst(sst_id),
|
||||||
|
)?);
|
||||||
|
new_sst.push(sst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(builder) = builder {
|
||||||
|
let sst_id = self.next_sst_id(); // lock dropped here
|
||||||
|
let sst = Arc::new(builder.build(
|
||||||
|
sst_id,
|
||||||
|
Some(self.block_cache.clone()),
|
||||||
|
self.path_of_sst(sst_id),
|
||||||
|
)?);
|
||||||
|
new_sst.push(sst);
|
||||||
|
}
|
||||||
|
Ok(new_sst)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
pub mod block;
|
pub mod block;
|
||||||
|
mod compact;
|
||||||
pub mod iterators;
|
pub mod iterators;
|
||||||
pub mod lsm_iterator;
|
pub mod lsm_iterator;
|
||||||
pub mod lsm_storage;
|
pub mod lsm_storage;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::atomic::AtomicUsize;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
@@ -27,8 +28,6 @@ pub struct LsmStorageInner {
|
|||||||
/// L1 - L6 SsTables, sorted by key range.
|
/// L1 - L6 SsTables, sorted by key range.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
levels: Vec<Vec<Arc<SsTable>>>,
|
levels: Vec<Vec<Arc<SsTable>>>,
|
||||||
/// The next SSTable ID.
|
|
||||||
next_sst_id: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LsmStorageInner {
|
impl LsmStorageInner {
|
||||||
@@ -38,26 +37,32 @@ impl LsmStorageInner {
|
|||||||
imm_memtables: vec![],
|
imm_memtables: vec![],
|
||||||
l0_sstables: vec![],
|
l0_sstables: vec![],
|
||||||
levels: vec![],
|
levels: vec![],
|
||||||
next_sst_id: 1,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The storage interface of the LSM tree.
|
/// The storage interface of the LSM tree.
|
||||||
pub struct LsmStorage {
|
pub struct LsmStorage {
|
||||||
inner: Arc<RwLock<Arc<LsmStorageInner>>>,
|
pub(crate) inner: Arc<RwLock<Arc<LsmStorageInner>>>,
|
||||||
flush_lock: Mutex<()>,
|
flush_lock: Mutex<()>,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
block_cache: Arc<BlockCache>,
|
pub(crate) block_cache: Arc<BlockCache>,
|
||||||
|
next_sst_id: AtomicUsize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LsmStorage {
|
impl LsmStorage {
|
||||||
|
pub(crate) fn next_sst_id(&self) -> usize {
|
||||||
|
self.next_sst_id
|
||||||
|
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
|
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
inner: Arc::new(RwLock::new(Arc::new(LsmStorageInner::create()))),
|
inner: Arc::new(RwLock::new(Arc::new(LsmStorageInner::create()))),
|
||||||
flush_lock: Mutex::new(()),
|
flush_lock: Mutex::new(()),
|
||||||
path: path.as_ref().to_path_buf(),
|
path: path.as_ref().to_path_buf(),
|
||||||
block_cache: Arc::new(BlockCache::new(1 << 20)), // 4GB block cache
|
block_cache: Arc::new(BlockCache::new(1 << 20)), // 4GB block cache,
|
||||||
|
next_sst_id: AtomicUsize::new(1),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,7 +126,7 @@ impl LsmStorage {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn path_of_sst(&self, id: usize) -> PathBuf {
|
pub(crate) fn path_of_sst(&self, id: usize) -> PathBuf {
|
||||||
self.path.join(format!("{:05}.sst", id))
|
self.path.join(format!("{:05}.sst", id))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,7 +147,7 @@ impl LsmStorage {
|
|||||||
let mut snapshot = guard.as_ref().clone();
|
let mut snapshot = guard.as_ref().clone();
|
||||||
let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create()));
|
let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create()));
|
||||||
flush_memtable = memtable.clone();
|
flush_memtable = memtable.clone();
|
||||||
sst_id = snapshot.next_sst_id;
|
sst_id = self.next_sst_id();
|
||||||
// Add the memtable to the immutable memtables.
|
// Add the memtable to the immutable memtables.
|
||||||
snapshot.imm_memtables.push(memtable);
|
snapshot.imm_memtables.push(memtable);
|
||||||
// Update the snapshot.
|
// Update the snapshot.
|
||||||
@@ -169,8 +174,6 @@ impl LsmStorage {
|
|||||||
snapshot.imm_memtables.pop();
|
snapshot.imm_memtables.pop();
|
||||||
// Add L0 table
|
// Add L0 table
|
||||||
snapshot.l0_sstables.push(sst);
|
snapshot.l0_sstables.push(sst);
|
||||||
// Update SST ID
|
|
||||||
snapshot.next_sst_id += 1;
|
|
||||||
// Update the snapshot.
|
// Update the snapshot.
|
||||||
*guard = Arc::new(snapshot);
|
*guard = Arc::new(snapshot);
|
||||||
}
|
}
|
||||||
|
|||||||
12
rustfmt.toml
Normal file
12
rustfmt.toml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
comment_width = 120
|
||||||
|
format_code_in_doc_comments = true
|
||||||
|
format_macro_bodies = true
|
||||||
|
format_macro_matchers = true
|
||||||
|
normalize_comments = true
|
||||||
|
normalize_doc_attributes = true
|
||||||
|
imports_granularity = "Module"
|
||||||
|
group_imports = "StdExternalCrate"
|
||||||
|
reorder_impl_items = true
|
||||||
|
reorder_imports = true
|
||||||
|
tab_spaces = 4
|
||||||
|
wrap_comments = true
|
||||||
Reference in New Issue
Block a user