12
README.md
12
README.md
@@ -71,18 +71,6 @@ We are working on chapter 3 and more test cases for all existing contents.
|
||||
| 3.5 | Transactions and Optimistic Concurrency Control | | | |
|
||||
| 3.6 | Serializable Snapshot Isolation | | | |
|
||||
| 3.7 | TTL (Time-to-Live) Entries | | | |
|
||||
| 4.1 | Benchmarking | | | |
|
||||
| 4.2 | Block Compression | | | |
|
||||
| 4.3 | Trivial Move and Parallel Compaction | | | |
|
||||
| 4.4 | Alternative Block Encodings | | | |
|
||||
| 4.5 | Rate Limiter and I/O Optimizations | | | |
|
||||
| 4.6 | Build Your Own Block Cache | | | |
|
||||
| 4.7 | Build Your Own SkipList | | | |
|
||||
| 4.8 | Async Engine | | | |
|
||||
| 4.9 | Key-Value Separation | | | |
|
||||
| 4.10 | Column Families | | | |
|
||||
| 4.11 | Sharding | | | |
|
||||
| 4.12 | SQL over Mini-LSM | | | |
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
<!--  -->
|
||||
|
||||
at the end of each week, we will have some easy, not important, while interesting things
|
||||
In the previous chapter, you already built a full LSM-based storage engine with. At the end of this week, we will implement some easy but important optimizations of the storage engine. Welcome to Mini-LSM's week w snack time!
|
||||
|
||||
In this chapter, you will:
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@ This is an advanced part that deep dives into optimizations and applications of
|
||||
| 4.7 | Build Your Own SkipList | | | |
|
||||
| 4.8 | Async Engine | | | |
|
||||
| 4.9 | IO-uring-based I/O engine | | | |
|
||||
| 4.10 | Key-Value Separation | | | |
|
||||
| 4.11 | Column Families | | | |
|
||||
| 4.12 | Sharding | | | |
|
||||
| 4.13 | SQL over Mini-LSM | | | |
|
||||
| 4.10 | Prefetching | | | |
|
||||
| 4.11 | Key-Value Separation | | | |
|
||||
| 4.12 | Column Families | | | |
|
||||
| 4.13 | Sharding | | | |
|
||||
| 4.14 | SQL over Mini-LSM | | | |
|
||||
|
||||
@@ -128,7 +128,7 @@ impl LsmStorageInner {
|
||||
let builder_inner = builder.as_mut().unwrap();
|
||||
builder_inner.add(iter.key(), iter.value());
|
||||
|
||||
let same_as_last_key = iter.key().key_ref() == &last_key;
|
||||
let same_as_last_key = iter.key().key_ref() == last_key;
|
||||
|
||||
if builder_inner.estimated_size() >= self.options.target_sst_size && !same_as_last_key {
|
||||
let sst_id = self.next_sst_id();
|
||||
|
||||
@@ -68,6 +68,7 @@ impl LsmStorageState {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LsmStorageOptions {
|
||||
// Block size in bytes
|
||||
pub block_size: usize,
|
||||
|
||||
@@ -64,6 +64,7 @@ impl LsmStorageState {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LsmStorageOptions {
|
||||
// Block size in bytes
|
||||
pub block_size: usize,
|
||||
|
||||
@@ -253,6 +253,7 @@ impl LsmStorageInner {
|
||||
let CompactionOptions::NoCompaction = self.options.compaction_options else {
|
||||
panic!("full compaction can only be called with compaction is not enabled")
|
||||
};
|
||||
|
||||
let snapshot = {
|
||||
let state = self.state.read();
|
||||
state.clone()
|
||||
@@ -264,23 +265,26 @@ impl LsmStorageInner {
|
||||
l0_sstables: l0_sstables.clone(),
|
||||
l1_sstables: l1_sstables.clone(),
|
||||
};
|
||||
|
||||
println!("force full compaction: {:?}", compaction_task);
|
||||
|
||||
let sstables = self.compact(&compaction_task)?;
|
||||
let mut ids = Vec::with_capacity(sstables.len());
|
||||
|
||||
{
|
||||
let _state_lock = self.state_lock.lock();
|
||||
let state_lock = self.state_lock.lock();
|
||||
let mut state = self.state.read().as_ref().clone();
|
||||
for sst in l0_sstables.iter().chain(l1_sstables.iter()) {
|
||||
let result = state.sstables.remove(sst);
|
||||
assert!(result.is_some());
|
||||
}
|
||||
let mut ids = Vec::with_capacity(sstables.len());
|
||||
for new_sst in sstables {
|
||||
ids.push(new_sst.sst_id());
|
||||
let result = state.sstables.insert(new_sst.sst_id(), new_sst);
|
||||
assert!(result.is_none());
|
||||
}
|
||||
assert_eq!(l1_sstables, state.levels[0].1);
|
||||
state.levels[0].1 = ids;
|
||||
state.levels[0].1 = ids.clone();
|
||||
let mut l0_sstables_map = l0_sstables.iter().copied().collect::<HashSet<_>>();
|
||||
state.l0_sstables = state
|
||||
.l0_sstables
|
||||
@@ -290,10 +294,18 @@ impl LsmStorageInner {
|
||||
.collect::<Vec<_>>();
|
||||
assert!(l0_sstables_map.is_empty());
|
||||
*self.state.write() = Arc::new(state);
|
||||
self.sync_dir()?;
|
||||
self.manifest.as_ref().unwrap().add_record(
|
||||
&state_lock,
|
||||
ManifestRecord::Compaction(compaction_task, ids.clone()),
|
||||
)?;
|
||||
}
|
||||
for sst in l0_sstables.iter().chain(l1_sstables.iter()) {
|
||||
std::fs::remove_file(self.path_of_sst(*sst))?;
|
||||
}
|
||||
|
||||
println!("force full compaction done, new SSTs: {:?}", ids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -308,6 +320,7 @@ impl LsmStorageInner {
|
||||
let Some(task) = task else {
|
||||
return Ok(());
|
||||
};
|
||||
self.dump_structure();
|
||||
println!("running compaction task: {:?}", task);
|
||||
let sstables = self.compact(&task)?;
|
||||
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
|
||||
|
||||
@@ -68,6 +68,7 @@ impl LsmStorageState {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LsmStorageOptions {
|
||||
// Block size in bytes
|
||||
pub block_size: usize,
|
||||
@@ -363,7 +364,7 @@ impl LsmStorageInner {
|
||||
table_id,
|
||||
Some(block_cache.clone()),
|
||||
FileObject::open(&Self::path_of_sst_static(path, table_id))
|
||||
.context("failed to open SST")?,
|
||||
.with_context(|| format!("failed to open SST: {}", table_id))?,
|
||||
)?;
|
||||
state.sstables.insert(table_id, Arc::new(sst));
|
||||
sst_cnt += 1;
|
||||
|
||||
@@ -10,3 +10,5 @@ mod week2_day1;
|
||||
mod week2_day2;
|
||||
mod week2_day3;
|
||||
mod week2_day4;
|
||||
mod week2_day5;
|
||||
mod week2_day6;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::{collections::BTreeMap, path::Path, sync::Arc, time::Duration};
|
||||
use std::{collections::BTreeMap, ops::Bound, path::Path, sync::Arc, time::Duration};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use bytes::Bytes;
|
||||
@@ -9,7 +9,7 @@ use crate::{
|
||||
TieredCompactionOptions,
|
||||
},
|
||||
iterators::StorageIterator,
|
||||
key::KeySlice,
|
||||
key::{KeySlice, TS_ENABLED},
|
||||
lsm_storage::{BlockCache, LsmStorageInner, MiniLsm},
|
||||
table::{SsTable, SsTableBuilder},
|
||||
};
|
||||
@@ -171,11 +171,12 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
|
||||
let gen_key = |i| format!("{:010}", i); // 10B
|
||||
let gen_value = |i| format!("{:0110}", i); // 110B
|
||||
let mut max_key = 0;
|
||||
let overlaps = if TS_ENABLED { 10000 } else { 20000 };
|
||||
for iter in 0..10 {
|
||||
let range_begin = iter * 5000;
|
||||
for i in range_begin..(range_begin + 10000) {
|
||||
for i in range_begin..(range_begin + overlaps) {
|
||||
// 120B per key, 4MB data populated
|
||||
let key = gen_key(i);
|
||||
let key: String = gen_key(i);
|
||||
let version = key_map.get(&i).copied().unwrap_or_default() + 1;
|
||||
let value = gen_value(version);
|
||||
key_map.insert(i, version);
|
||||
@@ -184,17 +185,24 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
|
||||
}
|
||||
}
|
||||
|
||||
let mut expected_key_value_pairs = Vec::new();
|
||||
for i in 0..(max_key + 40000) {
|
||||
let key = gen_key(i);
|
||||
let value = storage.get(key.as_bytes()).unwrap();
|
||||
if let Some(val) = key_map.get(&i) {
|
||||
let expected_value = gen_value(*val);
|
||||
assert_eq!(value, Some(Bytes::from(expected_value)));
|
||||
assert_eq!(value, Some(Bytes::from(expected_value.clone())));
|
||||
expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value)));
|
||||
} else {
|
||||
assert!(value.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
|
||||
expected_key_value_pairs,
|
||||
);
|
||||
|
||||
while {
|
||||
let snapshot = storage.inner.state.read();
|
||||
!snapshot.imm_memtables.is_empty()
|
||||
@@ -324,3 +332,9 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump_files_in_dir(path: impl AsRef<Path>) {
|
||||
for f in path.as_ref().read_dir().unwrap() {
|
||||
println!("{}", f.unwrap().path().display())
|
||||
}
|
||||
}
|
||||
|
||||
81
mini-lsm/src/tests/week2_day5.rs
Normal file
81
mini-lsm/src/tests/week2_day5.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
compact::{
|
||||
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
|
||||
TieredCompactionOptions,
|
||||
},
|
||||
lsm_storage::{LsmStorageOptions, MiniLsm},
|
||||
tests::harness::dump_files_in_dir,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_integration_leveled() {
|
||||
test_integration(CompactionOptions::Leveled(LeveledCompactionOptions {
|
||||
level_size_multiplier: 2,
|
||||
level0_file_num_compaction_trigger: 2,
|
||||
max_levels: 3,
|
||||
base_level_size_mb: 1,
|
||||
}))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_integration_tiered() {
|
||||
test_integration(CompactionOptions::Tiered(TieredCompactionOptions {
|
||||
num_tiers: 3,
|
||||
max_size_amplification_percent: 200,
|
||||
size_ratio: 1,
|
||||
min_merge_width: 3,
|
||||
}))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_integration_simple() {
|
||||
test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions {
|
||||
size_ratio_percent: 200,
|
||||
level0_file_num_compaction_trigger: 2,
|
||||
max_levels: 3,
|
||||
}));
|
||||
}
|
||||
|
||||
fn test_integration(compaction_options: CompactionOptions) {
|
||||
let dir = tempdir().unwrap();
|
||||
let storage = MiniLsm::open(
|
||||
&dir,
|
||||
LsmStorageOptions::default_for_week2_test(compaction_options.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
for i in 0..=20 {
|
||||
storage.put(b"0", format!("v{}", i).as_bytes()).unwrap();
|
||||
if i % 2 == 0 {
|
||||
storage.put(b"1", format!("v{}", i).as_bytes()).unwrap();
|
||||
} else {
|
||||
storage.delete(b"1").unwrap();
|
||||
}
|
||||
if i % 2 == 1 {
|
||||
storage.put(b"2", format!("v{}", i).as_bytes()).unwrap();
|
||||
} else {
|
||||
storage.delete(b"2").unwrap();
|
||||
}
|
||||
storage
|
||||
.inner
|
||||
.force_freeze_memtable(&storage.inner.state_lock.lock())
|
||||
.unwrap();
|
||||
}
|
||||
storage.close().unwrap();
|
||||
// ensure all SSTs are flushed
|
||||
assert!(storage.inner.state.read().memtable.is_empty());
|
||||
assert!(storage.inner.state.read().imm_memtables.is_empty());
|
||||
storage.dump_structure();
|
||||
drop(storage);
|
||||
dump_files_in_dir(&dir);
|
||||
|
||||
let storage = MiniLsm::open(
|
||||
&dir,
|
||||
LsmStorageOptions::default_for_week2_test(compaction_options.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice());
|
||||
assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice());
|
||||
assert_eq!(storage.get(b"2").unwrap(), None);
|
||||
}
|
||||
77
mini-lsm/src/tests/week2_day6.rs
Normal file
77
mini-lsm/src/tests/week2_day6.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
compact::{
|
||||
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
|
||||
TieredCompactionOptions,
|
||||
},
|
||||
lsm_storage::{LsmStorageOptions, MiniLsm},
|
||||
tests::harness::dump_files_in_dir,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_integration_leveled() {
|
||||
test_integration(CompactionOptions::Leveled(LeveledCompactionOptions {
|
||||
level_size_multiplier: 2,
|
||||
level0_file_num_compaction_trigger: 2,
|
||||
max_levels: 3,
|
||||
base_level_size_mb: 1,
|
||||
}))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_integration_tiered() {
|
||||
test_integration(CompactionOptions::Tiered(TieredCompactionOptions {
|
||||
num_tiers: 3,
|
||||
max_size_amplification_percent: 200,
|
||||
size_ratio: 1,
|
||||
min_merge_width: 3,
|
||||
}))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_integration_simple() {
|
||||
test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions {
|
||||
size_ratio_percent: 200,
|
||||
level0_file_num_compaction_trigger: 2,
|
||||
max_levels: 3,
|
||||
}));
|
||||
}
|
||||
|
||||
fn test_integration(compaction_options: CompactionOptions) {
|
||||
let dir = tempdir().unwrap();
|
||||
let mut options = LsmStorageOptions::default_for_week2_test(compaction_options);
|
||||
options.enable_wal = true;
|
||||
let storage = MiniLsm::open(&dir, options.clone()).unwrap();
|
||||
for i in 0..=20 {
|
||||
storage.put(b"0", format!("v{}", i).as_bytes()).unwrap();
|
||||
if i % 2 == 0 {
|
||||
storage.put(b"1", format!("v{}", i).as_bytes()).unwrap();
|
||||
} else {
|
||||
storage.delete(b"1").unwrap();
|
||||
}
|
||||
if i % 2 == 1 {
|
||||
storage.put(b"2", format!("v{}", i).as_bytes()).unwrap();
|
||||
} else {
|
||||
storage.delete(b"2").unwrap();
|
||||
}
|
||||
storage
|
||||
.inner
|
||||
.force_freeze_memtable(&storage.inner.state_lock.lock())
|
||||
.unwrap();
|
||||
}
|
||||
storage.close().unwrap();
|
||||
// ensure some SSTs are not flushed
|
||||
assert!(
|
||||
!storage.inner.state.read().memtable.is_empty()
|
||||
|| !storage.inner.state.read().imm_memtables.is_empty()
|
||||
);
|
||||
storage.dump_structure();
|
||||
drop(storage);
|
||||
dump_files_in_dir(&dir);
|
||||
|
||||
let storage = MiniLsm::open(&dir, options).unwrap();
|
||||
assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice());
|
||||
assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice());
|
||||
assert_eq!(storage.get(b"2").unwrap(), None);
|
||||
}
|
||||
Reference in New Issue
Block a user