From 14c3be390c9b540a6a09747ffb5d36a47e8dabe9 Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Fri, 26 Jan 2024 15:19:56 +0800 Subject: [PATCH] add week 2 day 5 + 6 tests Signed-off-by: Alex Chi --- README.md | 12 ----- mini-lsm-book/src/week2-07-snacks.md | 2 +- mini-lsm-book/src/week4-overview.md | 9 ++-- mini-lsm-mvcc/src/compact.rs | 2 +- mini-lsm-mvcc/src/lsm_storage.rs | 1 + mini-lsm-starter/src/lsm_storage.rs | 1 + mini-lsm/src/compact.rs | 19 +++++-- mini-lsm/src/lsm_storage.rs | 3 +- mini-lsm/src/tests.rs | 2 + mini-lsm/src/tests/harness.rs | 24 +++++++-- mini-lsm/src/tests/week2_day5.rs | 81 ++++++++++++++++++++++++++++ mini-lsm/src/tests/week2_day6.rs | 77 ++++++++++++++++++++++++++ 12 files changed, 206 insertions(+), 27 deletions(-) create mode 100644 mini-lsm/src/tests/week2_day5.rs create mode 100644 mini-lsm/src/tests/week2_day6.rs diff --git a/README.md b/README.md index 1813aa0..c02203f 100644 --- a/README.md +++ b/README.md @@ -71,18 +71,6 @@ We are working on chapter 3 and more test cases for all existing contents. | 3.5 | Transactions and Optimistic Concurrency Control | | | | | 3.6 | Serializable Snapshot Isolation | | | | | 3.7 | TTL (Time-to-Live) Entries | | | | -| 4.1 | Benchmarking | | | | -| 4.2 | Block Compression | | | | -| 4.3 | Trivial Move and Parallel Compaction | | | | -| 4.4 | Alternative Block Encodings | | | | -| 4.5 | Rate Limiter and I/O Optimizations | | | | -| 4.6 | Build Your Own Block Cache | | | | -| 4.7 | Build Your Own SkipList | | | | -| 4.8 | Async Engine | | | | -| 4.9 | Key-Value Separation | | | | -| 4.10 | Column Families | | | | -| 4.11 | Sharding | | | | -| 4.12 | SQL over Mini-LSM | | | | ## License diff --git a/mini-lsm-book/src/week2-07-snacks.md b/mini-lsm-book/src/week2-07-snacks.md index cacd46e..f556f11 100644 --- a/mini-lsm-book/src/week2-07-snacks.md +++ b/mini-lsm-book/src/week2-07-snacks.md @@ -2,7 +2,7 @@ -at the end of each week, we will have some easy, not important, while interesting things +In the previous chapter, you already built a full LSM-based storage engine with. At the end of this week, we will implement some easy but important optimizations of the storage engine. Welcome to Mini-LSM's week w snack time! In this chapter, you will: diff --git a/mini-lsm-book/src/week4-overview.md b/mini-lsm-book/src/week4-overview.md index e4e4eaf..5b474d2 100644 --- a/mini-lsm-book/src/week4-overview.md +++ b/mini-lsm-book/src/week4-overview.md @@ -13,7 +13,8 @@ This is an advanced part that deep dives into optimizations and applications of | 4.7 | Build Your Own SkipList | | | | | 4.8 | Async Engine | | | | | 4.9 | IO-uring-based I/O engine | | | | -| 4.10 | Key-Value Separation | | | | -| 4.11 | Column Families | | | | -| 4.12 | Sharding | | | | -| 4.13 | SQL over Mini-LSM | | | | +| 4.10 | Prefetching | | | | +| 4.11 | Key-Value Separation | | | | +| 4.12 | Column Families | | | | +| 4.13 | Sharding | | | | +| 4.14 | SQL over Mini-LSM | | | | diff --git a/mini-lsm-mvcc/src/compact.rs b/mini-lsm-mvcc/src/compact.rs index 83ddeea..8613bf2 100644 --- a/mini-lsm-mvcc/src/compact.rs +++ b/mini-lsm-mvcc/src/compact.rs @@ -128,7 +128,7 @@ impl LsmStorageInner { let builder_inner = builder.as_mut().unwrap(); builder_inner.add(iter.key(), iter.value()); - let same_as_last_key = iter.key().key_ref() == &last_key; + let same_as_last_key = iter.key().key_ref() == last_key; if builder_inner.estimated_size() >= self.options.target_sst_size && !same_as_last_key { let sst_id = self.next_sst_id(); diff --git a/mini-lsm-mvcc/src/lsm_storage.rs b/mini-lsm-mvcc/src/lsm_storage.rs index 901ae62..89d8ca2 100644 --- a/mini-lsm-mvcc/src/lsm_storage.rs +++ b/mini-lsm-mvcc/src/lsm_storage.rs @@ -68,6 +68,7 @@ impl LsmStorageState { } } +#[derive(Debug, Clone)] pub struct LsmStorageOptions { // Block size in bytes pub block_size: usize, diff --git a/mini-lsm-starter/src/lsm_storage.rs b/mini-lsm-starter/src/lsm_storage.rs index e3f8853..e77dc7b 100644 --- a/mini-lsm-starter/src/lsm_storage.rs +++ b/mini-lsm-starter/src/lsm_storage.rs @@ -64,6 +64,7 @@ impl LsmStorageState { } } +#[derive(Debug, Clone)] pub struct LsmStorageOptions { // Block size in bytes pub block_size: usize, diff --git a/mini-lsm/src/compact.rs b/mini-lsm/src/compact.rs index e21f6f0..f095a19 100644 --- a/mini-lsm/src/compact.rs +++ b/mini-lsm/src/compact.rs @@ -253,6 +253,7 @@ impl LsmStorageInner { let CompactionOptions::NoCompaction = self.options.compaction_options else { panic!("full compaction can only be called with compaction is not enabled") }; + let snapshot = { let state = self.state.read(); state.clone() @@ -264,23 +265,26 @@ impl LsmStorageInner { l0_sstables: l0_sstables.clone(), l1_sstables: l1_sstables.clone(), }; + + println!("force full compaction: {:?}", compaction_task); + let sstables = self.compact(&compaction_task)?; + let mut ids = Vec::with_capacity(sstables.len()); { - let _state_lock = self.state_lock.lock(); + let state_lock = self.state_lock.lock(); let mut state = self.state.read().as_ref().clone(); for sst in l0_sstables.iter().chain(l1_sstables.iter()) { let result = state.sstables.remove(sst); assert!(result.is_some()); } - let mut ids = Vec::with_capacity(sstables.len()); for new_sst in sstables { ids.push(new_sst.sst_id()); let result = state.sstables.insert(new_sst.sst_id(), new_sst); assert!(result.is_none()); } assert_eq!(l1_sstables, state.levels[0].1); - state.levels[0].1 = ids; + state.levels[0].1 = ids.clone(); let mut l0_sstables_map = l0_sstables.iter().copied().collect::>(); state.l0_sstables = state .l0_sstables @@ -290,10 +294,18 @@ impl LsmStorageInner { .collect::>(); assert!(l0_sstables_map.is_empty()); *self.state.write() = Arc::new(state); + self.sync_dir()?; + self.manifest.as_ref().unwrap().add_record( + &state_lock, + ManifestRecord::Compaction(compaction_task, ids.clone()), + )?; } for sst in l0_sstables.iter().chain(l1_sstables.iter()) { std::fs::remove_file(self.path_of_sst(*sst))?; } + + println!("force full compaction done, new SSTs: {:?}", ids); + Ok(()) } @@ -308,6 +320,7 @@ impl LsmStorageInner { let Some(task) = task else { return Ok(()); }; + self.dump_structure(); println!("running compaction task: {:?}", task); let sstables = self.compact(&task)?; let output = sstables.iter().map(|x| x.sst_id()).collect::>(); diff --git a/mini-lsm/src/lsm_storage.rs b/mini-lsm/src/lsm_storage.rs index 383a7c4..8c499a1 100644 --- a/mini-lsm/src/lsm_storage.rs +++ b/mini-lsm/src/lsm_storage.rs @@ -68,6 +68,7 @@ impl LsmStorageState { } } +#[derive(Debug, Clone)] pub struct LsmStorageOptions { // Block size in bytes pub block_size: usize, @@ -363,7 +364,7 @@ impl LsmStorageInner { table_id, Some(block_cache.clone()), FileObject::open(&Self::path_of_sst_static(path, table_id)) - .context("failed to open SST")?, + .with_context(|| format!("failed to open SST: {}", table_id))?, )?; state.sstables.insert(table_id, Arc::new(sst)); sst_cnt += 1; diff --git a/mini-lsm/src/tests.rs b/mini-lsm/src/tests.rs index fb295e9..51d4f13 100644 --- a/mini-lsm/src/tests.rs +++ b/mini-lsm/src/tests.rs @@ -10,3 +10,5 @@ mod week2_day1; mod week2_day2; mod week2_day3; mod week2_day4; +mod week2_day5; +mod week2_day6; diff --git a/mini-lsm/src/tests/harness.rs b/mini-lsm/src/tests/harness.rs index 822f075..183fbc6 100644 --- a/mini-lsm/src/tests/harness.rs +++ b/mini-lsm/src/tests/harness.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, path::Path, sync::Arc, time::Duration}; +use std::{collections::BTreeMap, ops::Bound, path::Path, sync::Arc, time::Duration}; use anyhow::{bail, Result}; use bytes::Bytes; @@ -9,7 +9,7 @@ use crate::{ TieredCompactionOptions, }, iterators::StorageIterator, - key::KeySlice, + key::{KeySlice, TS_ENABLED}, lsm_storage::{BlockCache, LsmStorageInner, MiniLsm}, table::{SsTable, SsTableBuilder}, }; @@ -171,11 +171,12 @@ pub fn compaction_bench(storage: Arc) { let gen_key = |i| format!("{:010}", i); // 10B let gen_value = |i| format!("{:0110}", i); // 110B let mut max_key = 0; + let overlaps = if TS_ENABLED { 10000 } else { 20000 }; for iter in 0..10 { let range_begin = iter * 5000; - for i in range_begin..(range_begin + 10000) { + for i in range_begin..(range_begin + overlaps) { // 120B per key, 4MB data populated - let key = gen_key(i); + let key: String = gen_key(i); let version = key_map.get(&i).copied().unwrap_or_default() + 1; let value = gen_value(version); key_map.insert(i, version); @@ -184,17 +185,24 @@ pub fn compaction_bench(storage: Arc) { } } + let mut expected_key_value_pairs = Vec::new(); for i in 0..(max_key + 40000) { let key = gen_key(i); let value = storage.get(key.as_bytes()).unwrap(); if let Some(val) = key_map.get(&i) { let expected_value = gen_value(*val); - assert_eq!(value, Some(Bytes::from(expected_value))); + assert_eq!(value, Some(Bytes::from(expected_value.clone()))); + expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value))); } else { assert!(value.is_none()); } } + check_lsm_iter_result_by_key( + &mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(), + expected_key_value_pairs, + ); + while { let snapshot = storage.inner.state.read(); !snapshot.imm_memtables.is_empty() @@ -324,3 +332,9 @@ pub fn check_compaction_ratio(storage: Arc) { } } } + +pub fn dump_files_in_dir(path: impl AsRef) { + for f in path.as_ref().read_dir().unwrap() { + println!("{}", f.unwrap().path().display()) + } +} diff --git a/mini-lsm/src/tests/week2_day5.rs b/mini-lsm/src/tests/week2_day5.rs new file mode 100644 index 0000000..9ceedb3 --- /dev/null +++ b/mini-lsm/src/tests/week2_day5.rs @@ -0,0 +1,81 @@ +use tempfile::tempdir; + +use crate::{ + compact::{ + CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions, + TieredCompactionOptions, + }, + lsm_storage::{LsmStorageOptions, MiniLsm}, + tests::harness::dump_files_in_dir, +}; + +#[test] +fn test_integration_leveled() { + test_integration(CompactionOptions::Leveled(LeveledCompactionOptions { + level_size_multiplier: 2, + level0_file_num_compaction_trigger: 2, + max_levels: 3, + base_level_size_mb: 1, + })) +} + +#[test] +fn test_integration_tiered() { + test_integration(CompactionOptions::Tiered(TieredCompactionOptions { + num_tiers: 3, + max_size_amplification_percent: 200, + size_ratio: 1, + min_merge_width: 3, + })) +} + +#[test] +fn test_integration_simple() { + test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions { + size_ratio_percent: 200, + level0_file_num_compaction_trigger: 2, + max_levels: 3, + })); +} + +fn test_integration(compaction_options: CompactionOptions) { + let dir = tempdir().unwrap(); + let storage = MiniLsm::open( + &dir, + LsmStorageOptions::default_for_week2_test(compaction_options.clone()), + ) + .unwrap(); + for i in 0..=20 { + storage.put(b"0", format!("v{}", i).as_bytes()).unwrap(); + if i % 2 == 0 { + storage.put(b"1", format!("v{}", i).as_bytes()).unwrap(); + } else { + storage.delete(b"1").unwrap(); + } + if i % 2 == 1 { + storage.put(b"2", format!("v{}", i).as_bytes()).unwrap(); + } else { + storage.delete(b"2").unwrap(); + } + storage + .inner + .force_freeze_memtable(&storage.inner.state_lock.lock()) + .unwrap(); + } + storage.close().unwrap(); + // ensure all SSTs are flushed + assert!(storage.inner.state.read().memtable.is_empty()); + assert!(storage.inner.state.read().imm_memtables.is_empty()); + storage.dump_structure(); + drop(storage); + dump_files_in_dir(&dir); + + let storage = MiniLsm::open( + &dir, + LsmStorageOptions::default_for_week2_test(compaction_options.clone()), + ) + .unwrap(); + assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice()); + assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice()); + assert_eq!(storage.get(b"2").unwrap(), None); +} diff --git a/mini-lsm/src/tests/week2_day6.rs b/mini-lsm/src/tests/week2_day6.rs new file mode 100644 index 0000000..befd100 --- /dev/null +++ b/mini-lsm/src/tests/week2_day6.rs @@ -0,0 +1,77 @@ +use tempfile::tempdir; + +use crate::{ + compact::{ + CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions, + TieredCompactionOptions, + }, + lsm_storage::{LsmStorageOptions, MiniLsm}, + tests::harness::dump_files_in_dir, +}; + +#[test] +fn test_integration_leveled() { + test_integration(CompactionOptions::Leveled(LeveledCompactionOptions { + level_size_multiplier: 2, + level0_file_num_compaction_trigger: 2, + max_levels: 3, + base_level_size_mb: 1, + })) +} + +#[test] +fn test_integration_tiered() { + test_integration(CompactionOptions::Tiered(TieredCompactionOptions { + num_tiers: 3, + max_size_amplification_percent: 200, + size_ratio: 1, + min_merge_width: 3, + })) +} + +#[test] +fn test_integration_simple() { + test_integration(CompactionOptions::Simple(SimpleLeveledCompactionOptions { + size_ratio_percent: 200, + level0_file_num_compaction_trigger: 2, + max_levels: 3, + })); +} + +fn test_integration(compaction_options: CompactionOptions) { + let dir = tempdir().unwrap(); + let mut options = LsmStorageOptions::default_for_week2_test(compaction_options); + options.enable_wal = true; + let storage = MiniLsm::open(&dir, options.clone()).unwrap(); + for i in 0..=20 { + storage.put(b"0", format!("v{}", i).as_bytes()).unwrap(); + if i % 2 == 0 { + storage.put(b"1", format!("v{}", i).as_bytes()).unwrap(); + } else { + storage.delete(b"1").unwrap(); + } + if i % 2 == 1 { + storage.put(b"2", format!("v{}", i).as_bytes()).unwrap(); + } else { + storage.delete(b"2").unwrap(); + } + storage + .inner + .force_freeze_memtable(&storage.inner.state_lock.lock()) + .unwrap(); + } + storage.close().unwrap(); + // ensure some SSTs are not flushed + assert!( + !storage.inner.state.read().memtable.is_empty() + || !storage.inner.state.read().imm_memtables.is_empty() + ); + storage.dump_structure(); + drop(storage); + dump_files_in_dir(&dir); + + let storage = MiniLsm::open(&dir, options).unwrap(); + assert_eq!(&storage.get(b"0").unwrap().unwrap()[..], b"v20".as_slice()); + assert_eq!(&storage.get(b"1").unwrap().unwrap()[..], b"v20".as_slice()); + assert_eq!(storage.get(b"2").unwrap(), None); +}