finish week 1 day 6

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z
2024-01-21 17:40:47 +08:00
parent a2d8b3c865
commit fa35a7dc9e
16 changed files with 376 additions and 22 deletions

View File

@@ -26,7 +26,3 @@ serde = { version = "1.0", features = ["derive"] }
[dev-dependencies]
tempfile = "3"
[[bin]]
name = "mini-lsm-cli"
path = "src/bin/mini_lsm_cli.rs"

View File

@@ -0,0 +1 @@
../../../mini-lsm-starter/src/bin/mini-lsm-cli.rs

View File

@@ -1,134 +0,0 @@
use std::path::PathBuf;
use anyhow::Result;
use bytes::Bytes;
use clap::{Parser, ValueEnum};
use mini_lsm::compact::{
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
TieredCompactionOptions,
};
use mini_lsm::iterators::StorageIterator;
use mini_lsm::lsm_storage::{LsmStorageOptions, MiniLsm};
#[derive(Debug, Clone, ValueEnum)]
enum CompactionStrategy {
Simple,
Leveled,
Tiered,
}
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
#[arg(long, default_value = "lsm.db")]
path: PathBuf,
#[arg(long, default_value = "leveled")]
compaction: CompactionStrategy,
#[arg(long, default_value = "true")]
enable_wal: bool,
}
fn main() -> Result<()> {
let args = Args::parse();
let lsm = MiniLsm::open(
args.path,
LsmStorageOptions {
block_size: 4096,
target_sst_size: 2 << 20, // 2MB
num_memtable_limit: 3,
compaction_options: match args.compaction {
CompactionStrategy::Simple => {
CompactionOptions::Simple(SimpleLeveledCompactionOptions {
size_ratio_percent: 200,
level0_file_num_compaction_trigger: 2,
max_levels: 4,
})
}
CompactionStrategy::Tiered => CompactionOptions::Tiered(TieredCompactionOptions {
num_tiers: 3,
max_size_amplification_percent: 200,
size_ratio: 1,
min_merge_width: 2,
}),
CompactionStrategy::Leveled => {
CompactionOptions::Leveled(LeveledCompactionOptions {
level0_file_num_compaction_trigger: 2,
max_levels: 4,
base_level_size_mb: 128,
level_size_multiplier: 2,
})
}
},
enable_wal: args.enable_wal,
},
)?;
let mut epoch = 0;
loop {
let mut line = String::new();
std::io::stdin().read_line(&mut line)?;
let line = line.trim().to_string();
if line.starts_with("fill ") {
let Some((_, options)) = line.split_once(' ') else {
println!("invalid command");
continue;
};
let Some((begin, end)) = options.split_once(' ') else {
println!("invalid command");
continue;
};
let begin = begin.parse::<u64>()?;
let end = end.parse::<u64>()?;
for i in begin..=end {
lsm.put(
format!("{}", i).as_bytes(),
format!("value{}@{}", i, epoch).as_bytes(),
)?;
}
println!("{} values filled with epoch {}", end - begin + 1, epoch);
} else if line.starts_with("get ") {
let Some((_, key)) = line.split_once(' ') else {
println!("invalid command");
continue;
};
if let Some(value) = lsm.get(key.as_bytes())? {
println!("{}={:?}", key, value);
} else {
println!("{} not exist", key);
}
} else if line.starts_with("scan ") {
let Some((_, rest)) = line.split_once(' ') else {
println!("invalid command");
continue;
};
let Some((begin_key, end_key)) = rest.split_once(' ') else {
println!("invalid command");
continue;
};
let mut iter = lsm.scan(
std::ops::Bound::Included(begin_key.as_bytes()),
std::ops::Bound::Included(end_key.as_bytes()),
)?;
while iter.is_valid() {
println!(
"{:?}={:?}",
Bytes::copy_from_slice(iter.key()),
Bytes::copy_from_slice(iter.value()),
);
iter.next()?;
}
} else if line == "dump" {
lsm.dump_structure();
} else if line == "flush" {
lsm.force_flush()?;
} else if line == "quit" {
lsm.close()?;
break;
} else {
println!("invalid command: {}", line);
}
epoch += 1;
}
Ok(())
}

View File

@@ -0,0 +1,6 @@
pub mod mini_lsm_wrapper {
pub use mini_lsm::*;
}
#[allow(dead_code)]
fn main() {}

View File

@@ -1,10 +1,9 @@
use std::ops::Bound;
use self::harness::generate_sst;
use self::harness::{check_iter_result, MockIterator};
use bytes::Bytes;
use tempfile::tempdir;
use week1_day5::harness::generate_sst;
use self::harness::{check_iter_result, MockIterator};
use super::*;
use crate::{

View File

@@ -0,0 +1,201 @@
use std::{ops::Bound, time::Duration};
use bytes::Bytes;
use tempfile::tempdir;
use self::harness::check_iter_result;
use super::*;
use crate::{
iterators::StorageIterator,
lsm_storage::{LsmStorageInner, LsmStorageOptions, MiniLsm},
};
fn sync(storage: &LsmStorageInner) {
storage
.force_freeze_memtable(&storage.state_lock.lock())
.unwrap();
storage.force_flush_next_imm_memtable().unwrap();
}
#[test]
fn test_task1_storage_scan() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
storage.put(b"0", b"2333333").unwrap();
storage.put(b"00", b"2333333").unwrap();
storage.put(b"4", b"23").unwrap();
sync(&storage);
storage.delete(b"4").unwrap();
sync(&storage);
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
storage
.force_freeze_memtable(&storage.state_lock.lock())
.unwrap();
storage.put(b"00", b"2333").unwrap();
storage
.force_freeze_memtable(&storage.state_lock.lock())
.unwrap();
storage.put(b"3", b"23333").unwrap();
storage.delete(b"1").unwrap();
{
let state = storage.state.read();
assert_eq!(state.l0_sstables.len(), 2);
assert_eq!(state.imm_memtables.len(), 2);
}
check_iter_result(
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
vec![
(Bytes::from("0"), Bytes::from("2333333")),
(Bytes::from("00"), Bytes::from("2333")),
(Bytes::from("2"), Bytes::from("2333")),
(Bytes::from("3"), Bytes::from("23333")),
],
);
check_iter_result(
&mut storage
.scan(Bound::Included(b"1"), Bound::Included(b"2"))
.unwrap(),
vec![(Bytes::from("2"), Bytes::from("2333"))],
);
check_iter_result(
&mut storage
.scan(Bound::Excluded(b"1"), Bound::Excluded(b"3"))
.unwrap(),
vec![(Bytes::from("2"), Bytes::from("2333"))],
);
}
#[test]
fn test_task1_storage_get() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
storage.put(b"0", b"2333333").unwrap();
storage.put(b"00", b"2333333").unwrap();
storage.put(b"4", b"23").unwrap();
sync(&storage);
storage.delete(b"4").unwrap();
sync(&storage);
storage.put(b"1", b"233").unwrap();
storage.put(b"2", b"2333").unwrap();
storage
.force_freeze_memtable(&storage.state_lock.lock())
.unwrap();
storage.put(b"00", b"2333").unwrap();
storage
.force_freeze_memtable(&storage.state_lock.lock())
.unwrap();
storage.put(b"3", b"23333").unwrap();
storage.delete(b"1").unwrap();
{
let state = storage.state.read();
assert_eq!(state.l0_sstables.len(), 2);
assert_eq!(state.imm_memtables.len(), 2);
}
assert_eq!(
storage.get(b"0").unwrap(),
Some(Bytes::from_static(b"2333333"))
);
assert_eq!(
storage.get(b"00").unwrap(),
Some(Bytes::from_static(b"2333"))
);
assert_eq!(
storage.get(b"2").unwrap(),
Some(Bytes::from_static(b"2333"))
);
assert_eq!(
storage.get(b"3").unwrap(),
Some(Bytes::from_static(b"23333"))
);
assert_eq!(storage.get(b"4").unwrap(), None);
assert_eq!(storage.get(b"--").unwrap(), None);
assert_eq!(storage.get(b"555").unwrap(), None);
}
#[test]
fn test_task2_auto_flush() {
let dir = tempdir().unwrap();
let storage = MiniLsm::open(&dir, LsmStorageOptions::default_for_week1_day6_test()).unwrap();
let value = "1".repeat(1024); // 1KB
// approximately 6MB
for i in 0..6000 {
storage
.put(format!("{i}").as_bytes(), value.as_bytes())
.unwrap();
}
std::thread::sleep(Duration::from_millis(500));
assert!(!storage.inner.state.read().l0_sstables.is_empty());
}
#[test]
fn test_task3_sst_filter() {
let dir = tempdir().unwrap();
let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap();
for i in 1..=10000 {
if i % 1000 == 0 {
sync(&storage);
}
storage
.put(format!("{:05}", i).as_bytes(), b"2333333")
.unwrap();
}
let iter = storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap();
assert!(
iter.num_active_iterators() >= 10,
"did you implement num_active_iterators? current active iterators = {}",
iter.num_active_iterators()
);
let max_num = iter.num_active_iterators();
let iter = storage
.scan(
Bound::Excluded(format!("{:05}", 10000).as_bytes()),
Bound::Unbounded,
)
.unwrap();
assert!(iter.num_active_iterators() < max_num);
let min_num = iter.num_active_iterators();
let iter = storage
.scan(
Bound::Unbounded,
Bound::Excluded(format!("{:05}", 1).as_bytes()),
)
.unwrap();
assert_eq!(iter.num_active_iterators(), min_num);
let iter = storage
.scan(
Bound::Unbounded,
Bound::Included(format!("{:05}", 0).as_bytes()),
)
.unwrap();
assert_eq!(iter.num_active_iterators(), min_num);
let iter = storage
.scan(
Bound::Included(format!("{:05}", 10001).as_bytes()),
Bound::Unbounded,
)
.unwrap();
assert_eq!(iter.num_active_iterators(), min_num);
let iter = storage
.scan(
Bound::Included(format!("{:05}", 5000).as_bytes()),
Bound::Excluded(format!("{:05}", 6000).as_bytes()),
)
.unwrap();
assert!(min_num < iter.num_active_iterators() && iter.num_active_iterators() < max_num);
}