implement mvcc compaction + snapshot

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2024-01-26 16:52:37 +08:00
parent 14c3be390c
commit 6025bb8dca
17 changed files with 300 additions and 63 deletions

View File

@@ -256,6 +256,10 @@ impl MiniLsm {
self.inner.sync()
}
pub fn new_txn(&self) -> Result<()> {
self.inner.new_txn()
}
pub fn scan(
&self,
lower: Bound<&[u8]>,
@@ -668,6 +672,11 @@ impl LsmStorageInner {
Ok(())
}
pub fn new_txn(&self) -> Result<()> {
// no-op
Ok(())
}
/// Create an iterator over a range of keys.
pub fn scan(
&self,

View File

@@ -184,25 +184,7 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
max_key = max_key.max(i);
}
}
let mut expected_key_value_pairs = Vec::new();
for i in 0..(max_key + 40000) {
let key = gen_key(i);
let value = storage.get(key.as_bytes()).unwrap();
if let Some(val) = key_map.get(&i) {
let expected_value = gen_value(*val);
assert_eq!(value, Some(Bytes::from(expected_value.clone())));
expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value)));
} else {
assert!(value.is_none());
}
}
check_lsm_iter_result_by_key(
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
expected_key_value_pairs,
);
std::thread::sleep(Duration::from_secs(1)); // wait until all memtables flush
while {
let snapshot = storage.inner.state.read();
!snapshot.imm_memtables.is_empty()
@@ -222,6 +204,24 @@ pub fn compaction_bench(storage: Arc<MiniLsm>) {
println!("waiting for compaction to converge");
}
let mut expected_key_value_pairs = Vec::new();
for i in 0..(max_key + 40000) {
let key = gen_key(i);
let value = storage.get(key.as_bytes()).unwrap();
if let Some(val) = key_map.get(&i) {
let expected_value = gen_value(*val);
assert_eq!(value, Some(Bytes::from(expected_value.clone())));
expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value)));
} else {
assert!(value.is_none());
}
}
check_lsm_iter_result_by_key(
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
expected_key_value_pairs,
);
storage.dump_structure();
println!("This test case does not guarantee your compaction algorithm produces a LSM state as expected. It only does minimal checks on the size of the levels. Please use the compaction simulator to check if the compaction is correctly going on.");
@@ -243,6 +243,11 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
};
level_size.push(size);
}
let num_iters = storage
.scan(Bound::Unbounded, Bound::Unbounded)
.unwrap()
.num_active_iterators();
let num_memtables = storage.inner.state.read().imm_memtables.len() + 1;
match compaction_options {
CompactionOptions::NoCompaction => unreachable!(),
CompactionOptions::Simple(SimpleLeveledCompactionOptions {
@@ -268,6 +273,10 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
size_ratio_percent
);
}
assert!(
num_iters <= l0_sst_num + num_memtables + max_levels,
"did you use concat iterators?"
);
}
CompactionOptions::Leveled(LeveledCompactionOptions {
level_size_multiplier,
@@ -291,6 +300,10 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
level_size_multiplier
);
}
assert!(
num_iters <= l0_sst_num + num_memtables + max_levels,
"did you use concat iterators?"
);
}
CompactionOptions::Tiered(TieredCompactionOptions {
num_tiers,
@@ -329,6 +342,10 @@ pub fn check_compaction_ratio(storage: Arc<MiniLsm>) {
}
sum_size += this_size;
}
assert!(
num_iters <= num_memtables + num_tiers,
"did you use concat iterators?"
);
}
}
}

View File

@@ -1,3 +1,5 @@
use std::sync::Arc;
use tempfile::tempdir;
use crate::{
@@ -51,8 +53,9 @@ fn test_task1_memtable_overwrite() {
#[test]
fn test_task2_storage_integration() {
let dir = tempdir().unwrap();
let storage =
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap();
let storage = Arc::new(
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap(),
);
assert_eq!(&storage.get(b"0").unwrap(), &None);
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
@@ -99,7 +102,7 @@ fn test_task3_freeze_on_capacity() {
let mut options = LsmStorageOptions::default_for_week1_test();
options.target_sst_size = 1024;
options.num_memtable_limit = 1000;
let storage = LsmStorageInner::open(dir.path(), options).unwrap();
let storage = Arc::new(LsmStorageInner::open(dir.path(), options).unwrap());
for _ in 0..1000 {
storage.put(b"1", b"2333").unwrap();
}
@@ -117,8 +120,9 @@ fn test_task3_freeze_on_capacity() {
#[test]
fn test_task4_storage_integration() {
let dir = tempdir().unwrap();
let storage =
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap();
let storage = Arc::new(
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap(),
);
assert_eq!(&storage.get(b"0").unwrap(), &None);
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();

View File

@@ -1,4 +1,4 @@
use std::ops::Bound;
use std::{ops::Bound, sync::Arc};
use bytes::Bytes;
use tempfile::tempdir;
@@ -262,8 +262,9 @@ fn test_task3_fused_iterator() {
#[test]
fn test_task4_integration() {
let dir = tempdir().unwrap();
let storage =
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap();
let storage = Arc::new(
LsmStorageInner::open(dir.path(), LsmStorageOptions::default_for_week1_test()).unwrap(),
);
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
storage.put(b"3", b"23333").unwrap();

View File

@@ -1,4 +1,5 @@
use std::ops::Bound;
use std::sync::Arc;
use self::harness::{check_iter_result_by_key, MockIterator};
use self::harness::{check_lsm_iter_result_by_key, generate_sst};
@@ -130,7 +131,8 @@ fn test_task1_merge_5() {
#[test]
fn test_task2_storage_scan() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
storage.put(b"00", b"2333").unwrap();
@@ -190,7 +192,8 @@ fn test_task2_storage_scan() {
#[test]
fn test_task3_storage_get() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
storage.put(b"00", b"2333").unwrap();

View File

@@ -1,4 +1,4 @@
use std::{ops::Bound, time::Duration};
use std::{ops::Bound, sync::Arc, time::Duration};
use bytes::Bytes;
use tempfile::tempdir;
@@ -14,7 +14,8 @@ use crate::{
#[test]
fn test_task1_storage_scan() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
storage.put(b"0", b"2333333").unwrap();
storage.put(b"00", b"2333333").unwrap();
storage.put(b"4", b"23").unwrap();
@@ -67,7 +68,8 @@ fn test_task1_storage_scan() {
#[test]
fn test_task1_storage_get() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
storage.put(b"0", b"2333333").unwrap();
storage.put(b"00", b"2333333").unwrap();
storage.put(b"4", b"23").unwrap();
@@ -137,7 +139,8 @@ fn test_task2_auto_flush() {
#[test]
fn test_task3_sst_filter() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
for i in 1..=10000 {
if i % 1000 == 0 {

View File

@@ -39,7 +39,10 @@ fn construct_merge_iterator_over_storage(
fn test_task1_full_compaction() {
// We do not use LSM iterator in this test because it's implemented as part of task 3
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
#[allow(clippy::let_unit_value)]
let _txn = storage.new_txn().unwrap();
storage.put(b"0", b"v1").unwrap();
sync(&storage);
storage.put(b"0", b"v2").unwrap();
@@ -211,7 +214,8 @@ fn test_task2_concat_iterator() {
#[test]
fn test_task3_integration() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
let storage =
Arc::new(LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap());
storage.put(b"0", b"2333333").unwrap();
storage.put(b"00", b"2333333").unwrap();
storage.put(b"4", b"23").unwrap();