fix read path after compaction
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
@@ -8,6 +8,7 @@ license = { workspace = true }
|
|||||||
repository = { workspace = true }
|
repository = { workspace = true }
|
||||||
description = "A tutorial for building an LSM tree storage engine in a week."
|
description = "A tutorial for building an LSM tree storage engine in a week."
|
||||||
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
arc-swap = "1"
|
arc-swap = "1"
|
||||||
@@ -25,3 +26,7 @@ serde = { version = "1.0", features = ["derive"] }
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "mini-lsm-cli"
|
||||||
|
path = "src/bin/mini_lsm_cli.rs"
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ enum Args {
|
|||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
dump_real_id: bool,
|
dump_real_id: bool,
|
||||||
#[clap(long, default_value = "3")]
|
#[clap(long, default_value = "3")]
|
||||||
level0_file_num_compaction_trigger: usize,
|
num_tiers: usize,
|
||||||
#[clap(long, default_value = "200")]
|
#[clap(long, default_value = "200")]
|
||||||
max_size_amplification_percent: usize,
|
max_size_amplification_percent: usize,
|
||||||
#[clap(long, default_value = "1")]
|
#[clap(long, default_value = "1")]
|
||||||
@@ -316,7 +316,7 @@ fn main() {
|
|||||||
}
|
}
|
||||||
Args::Tiered {
|
Args::Tiered {
|
||||||
dump_real_id,
|
dump_real_id,
|
||||||
level0_file_num_compaction_trigger,
|
num_tiers: level0_file_num_compaction_trigger,
|
||||||
max_size_amplification_percent,
|
max_size_amplification_percent,
|
||||||
size_ratio,
|
size_ratio,
|
||||||
min_merge_width,
|
min_merge_width,
|
||||||
|
|||||||
@@ -1,19 +1,65 @@
|
|||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use mini_lsm::compact::{CompactionOptions, SimpleLeveledCompactionOptions};
|
use bytes::Bytes;
|
||||||
|
use clap::{Parser, ValueEnum};
|
||||||
|
|
||||||
|
use mini_lsm::compact::{
|
||||||
|
CompactionOptions, LeveledCompactionOptions, SimpleLeveledCompactionOptions,
|
||||||
|
TieredCompactionOptions,
|
||||||
|
};
|
||||||
|
use mini_lsm::iterators::StorageIterator;
|
||||||
use mini_lsm::lsm_storage::{LsmStorageOptions, MiniLsm};
|
use mini_lsm::lsm_storage::{LsmStorageOptions, MiniLsm};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, ValueEnum)]
|
||||||
|
enum CompactionStrategy {
|
||||||
|
Simple,
|
||||||
|
Leveled,
|
||||||
|
Tiered,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(long, default_value = "mini-lsm.db")]
|
||||||
|
path: PathBuf,
|
||||||
|
#[arg(long, default_value = "leveled")]
|
||||||
|
compaction: CompactionStrategy,
|
||||||
|
#[arg(long)]
|
||||||
|
enable_wal: bool,
|
||||||
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
let lsm = MiniLsm::open(
|
let lsm = MiniLsm::open(
|
||||||
"mini-lsm.db",
|
args.path,
|
||||||
LsmStorageOptions {
|
LsmStorageOptions {
|
||||||
block_size: 4096,
|
block_size: 4096,
|
||||||
target_sst_size: 2 << 20,
|
target_sst_size: 2 << 20, // 2MB
|
||||||
compaction_options: CompactionOptions::Simple(SimpleLeveledCompactionOptions {
|
compaction_options: match args.compaction {
|
||||||
size_ratio_percent: 200,
|
CompactionStrategy::Simple => {
|
||||||
level0_file_num_compaction_trigger: 2,
|
CompactionOptions::Simple(SimpleLeveledCompactionOptions {
|
||||||
max_levels: 4,
|
size_ratio_percent: 200,
|
||||||
}),
|
level0_file_num_compaction_trigger: 2,
|
||||||
enable_wal: false,
|
max_levels: 4,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
CompactionStrategy::Tiered => CompactionOptions::Tiered(TieredCompactionOptions {
|
||||||
|
num_tiers: 3,
|
||||||
|
max_size_amplification_percent: 200,
|
||||||
|
size_ratio: 1,
|
||||||
|
min_merge_width: 2,
|
||||||
|
}),
|
||||||
|
CompactionStrategy::Leveled => {
|
||||||
|
CompactionOptions::Leveled(LeveledCompactionOptions {
|
||||||
|
level0_file_num_compaction_trigger: 2,
|
||||||
|
max_levels: 4,
|
||||||
|
base_level_size_mb: 128,
|
||||||
|
level_size_multiplier: 2,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
},
|
||||||
|
enable_wal: args.enable_wal,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let mut epoch = 0;
|
let mut epoch = 0;
|
||||||
@@ -51,6 +97,29 @@ fn main() -> Result<()> {
|
|||||||
} else {
|
} else {
|
||||||
println!("{} not exist", key);
|
println!("{} not exist", key);
|
||||||
}
|
}
|
||||||
|
} else if line.starts_with("scan ") {
|
||||||
|
let Some((_, rest)) = line.split_once(' ') else {
|
||||||
|
println!("invalid command");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some((begin_key, end_key)) = rest.split_once(' ') else {
|
||||||
|
println!("invalid command");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let mut iter = lsm.scan(
|
||||||
|
std::ops::Bound::Included(begin_key.as_bytes()),
|
||||||
|
std::ops::Bound::Included(end_key.as_bytes()),
|
||||||
|
)?;
|
||||||
|
while iter.is_valid() {
|
||||||
|
println!(
|
||||||
|
"{:?}={:?}",
|
||||||
|
Bytes::copy_from_slice(iter.key()),
|
||||||
|
Bytes::copy_from_slice(iter.value()),
|
||||||
|
);
|
||||||
|
iter.next()?;
|
||||||
|
}
|
||||||
|
} else if line == "dump" {
|
||||||
|
lsm.dump_structure();
|
||||||
} else if line == "flush" {
|
} else if line == "flush" {
|
||||||
lsm.force_flush()?;
|
lsm.force_flush()?;
|
||||||
} else if line == "quit" {
|
} else if line == "quit" {
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ use crate::lsm_storage::{LsmStorageInner, LsmStorageState};
|
|||||||
use crate::manifest::ManifestRecord;
|
use crate::manifest::ManifestRecord;
|
||||||
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
|
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub(crate) enum CompactionTask {
|
pub(crate) enum CompactionTask {
|
||||||
Leveled(LeveledCompactionTask),
|
Leveled(LeveledCompactionTask),
|
||||||
Tiered(TieredCompactionTask),
|
Tiered(TieredCompactionTask),
|
||||||
@@ -195,7 +195,8 @@ impl LsmStorageInner {
|
|||||||
let state = self.state.read();
|
let state = self.state.read();
|
||||||
state.clone()
|
state.clone()
|
||||||
};
|
};
|
||||||
let original_sstables = snapshot.l0_sstables.clone();
|
let mut original_sstables = snapshot.l0_sstables.clone();
|
||||||
|
original_sstables.reverse();
|
||||||
let sstables = self.compact(&CompactionTask::ForceFullCompaction(
|
let sstables = self.compact(&CompactionTask::ForceFullCompaction(
|
||||||
original_sstables.clone(),
|
original_sstables.clone(),
|
||||||
))?;
|
))?;
|
||||||
@@ -232,7 +233,7 @@ impl LsmStorageInner {
|
|||||||
let Some(task) = task else {
|
let Some(task) = task else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
println!("running compaction task");
|
println!("running compaction task: {:?}", task);
|
||||||
let sstables = self.compact(&task)?;
|
let sstables = self.compact(&task)?;
|
||||||
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
|
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
|
||||||
let ssts_to_remove = {
|
let ssts_to_remove = {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::lsm_storage::LsmStorageState;
|
use crate::lsm_storage::LsmStorageState;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct LeveledCompactionTask {
|
pub struct LeveledCompactionTask {
|
||||||
// if upper_level is `None`, then it is L0 compaction
|
// if upper_level is `None`, then it is L0 compaction
|
||||||
pub upper_level: Option<usize>,
|
pub upper_level: Option<usize>,
|
||||||
@@ -94,19 +94,6 @@ impl LeveledCompactionController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println!(
|
|
||||||
"target level sizes: {:?}, real level sizes: {:?}, base_level: {}",
|
|
||||||
target_level_size
|
|
||||||
.iter()
|
|
||||||
.map(|x| format!("{}MB", x / 1024 / 1024))
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
real_level_size
|
|
||||||
.iter()
|
|
||||||
.map(|x| format!("{}MB", x / 1024 / 1024))
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
base_level,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Flush L0 SST is the top priority
|
// Flush L0 SST is the top priority
|
||||||
if snapshot.l0_sstables.len() >= self.options.level0_file_num_compaction_trigger {
|
if snapshot.l0_sstables.len() >= self.options.level0_file_num_compaction_trigger {
|
||||||
println!("flush L0 SST to base level {}", base_level);
|
println!("flush L0 SST to base level {}", base_level);
|
||||||
@@ -133,6 +120,19 @@ impl LeveledCompactionController {
|
|||||||
priorities.sort_by(|a, b| a.partial_cmp(b).unwrap().reverse());
|
priorities.sort_by(|a, b| a.partial_cmp(b).unwrap().reverse());
|
||||||
let priority = priorities.first();
|
let priority = priorities.first();
|
||||||
if let Some((_, level)) = priority {
|
if let Some((_, level)) = priority {
|
||||||
|
println!(
|
||||||
|
"target level sizes: {:?}, real level sizes: {:?}, base_level: {}",
|
||||||
|
target_level_size
|
||||||
|
.iter()
|
||||||
|
.map(|x| format!("{}MB", x / 1024 / 1024))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
real_level_size
|
||||||
|
.iter()
|
||||||
|
.map(|x| format!("{}MB", x / 1024 / 1024))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
base_level,
|
||||||
|
);
|
||||||
|
|
||||||
let level = *level;
|
let level = *level;
|
||||||
let selected_sst = snapshot.levels[level - 1].1.iter().min().copied().unwrap(); // select the oldest sst to compact
|
let selected_sst = snapshot.levels[level - 1].1.iter().min().copied().unwrap(); // select the oldest sst to compact
|
||||||
println!(
|
println!(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ pub struct SimpleLeveledCompactionOptions {
|
|||||||
pub max_levels: usize,
|
pub max_levels: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct SimpleLeveledCompactionTask {
|
pub struct SimpleLeveledCompactionTask {
|
||||||
// if upper_level is `None`, then it is L0 compaction
|
// if upper_level is `None`, then it is L0 compaction
|
||||||
pub upper_level: Option<usize>,
|
pub upper_level: Option<usize>,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::lsm_storage::LsmStorageState;
|
use crate::lsm_storage::LsmStorageState;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct TieredCompactionTask {
|
pub struct TieredCompactionTask {
|
||||||
pub tiers: Vec<(usize, Vec<usize>)>,
|
pub tiers: Vec<(usize, Vec<usize>)>,
|
||||||
pub bottom_tier_included: bool,
|
pub bottom_tier_included: bool,
|
||||||
|
|||||||
17
mini-lsm/src/debug.rs
Normal file
17
mini-lsm/src/debug.rs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
use crate::lsm_storage::MiniLsm;
|
||||||
|
|
||||||
|
impl MiniLsm {
|
||||||
|
pub fn dump_structure(&self) {
|
||||||
|
let snapshot = self.inner.state.read();
|
||||||
|
if !snapshot.l0_sstables.is_empty() {
|
||||||
|
println!(
|
||||||
|
"L0 ({}): {:?}",
|
||||||
|
snapshot.l0_sstables.len(),
|
||||||
|
snapshot.l0_sstables,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for (level, files) in &snapshot.levels {
|
||||||
|
println!("L{level} ({}): {:?}", files.len(), files);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
pub mod block;
|
pub mod block;
|
||||||
pub mod compact;
|
pub mod compact;
|
||||||
|
pub mod debug;
|
||||||
pub mod iterators;
|
pub mod iterators;
|
||||||
pub mod lsm_iterator;
|
pub mod lsm_iterator;
|
||||||
pub mod lsm_storage;
|
pub mod lsm_storage;
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ pub(crate) struct LsmStorageInner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct MiniLsm {
|
pub struct MiniLsm {
|
||||||
inner: Arc<LsmStorageInner>,
|
pub(crate) inner: Arc<LsmStorageInner>,
|
||||||
compaction_notifier: crossbeam_channel::Sender<()>,
|
compaction_notifier: crossbeam_channel::Sender<()>,
|
||||||
compaction_thread: Mutex<Option<std::thread::JoinHandle<()>>>,
|
compaction_thread: Mutex<Option<std::thread::JoinHandle<()>>>,
|
||||||
}
|
}
|
||||||
@@ -105,7 +105,7 @@ impl MiniLsm {
|
|||||||
pub fn close(&self) -> Result<()> {
|
pub fn close(&self) -> Result<()> {
|
||||||
self.compaction_notifier.send(()).ok();
|
self.compaction_notifier.send(()).ok();
|
||||||
let mut compaction_thread = self.compaction_thread.lock();
|
let mut compaction_thread = self.compaction_thread.lock();
|
||||||
if let Some(mut compaction_thread) = compaction_thread.take() {
|
if let Some(compaction_thread) = compaction_thread.take() {
|
||||||
compaction_thread
|
compaction_thread
|
||||||
.join()
|
.join()
|
||||||
.map_err(|e| anyhow::anyhow!("{:?}", e))?;
|
.map_err(|e| anyhow::anyhow!("{:?}", e))?;
|
||||||
@@ -211,7 +211,7 @@ impl LsmStorageInner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Search on immutable memtables.
|
// Search on immutable memtables.
|
||||||
for memtable in snapshot.imm_memtables.iter().rev() {
|
for memtable in snapshot.imm_memtables.iter() {
|
||||||
if let Some(value) = memtable.get(key) {
|
if let Some(value) = memtable.get(key) {
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
// found tomestone, return key not exists
|
// found tomestone, return key not exists
|
||||||
@@ -221,7 +221,11 @@ impl LsmStorageInner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
let mut iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
||||||
for table in snapshot.l0_sstables.iter().rev() {
|
for table in snapshot
|
||||||
|
.l0_sstables
|
||||||
|
.iter()
|
||||||
|
.chain(snapshot.levels.iter().map(|(_, files)| files).flatten())
|
||||||
|
{
|
||||||
iters.push(Box::new(SsTableIterator::create_and_seek_to_key(
|
iters.push(Box::new(SsTableIterator::create_and_seek_to_key(
|
||||||
snapshot.sstables[table].clone(),
|
snapshot.sstables[table].clone(),
|
||||||
key,
|
key,
|
||||||
@@ -292,7 +296,7 @@ impl LsmStorageInner {
|
|||||||
let mut snapshot = guard.as_ref().clone();
|
let mut snapshot = guard.as_ref().clone();
|
||||||
old_memtable = std::mem::replace(&mut snapshot.memtable, memtable);
|
old_memtable = std::mem::replace(&mut snapshot.memtable, memtable);
|
||||||
// Add the memtable to the immutable memtables.
|
// Add the memtable to the immutable memtables.
|
||||||
snapshot.imm_memtables.push(old_memtable.clone());
|
snapshot.imm_memtables.insert(0, old_memtable.clone());
|
||||||
// Update the snapshot.
|
// Update the snapshot.
|
||||||
*guard = Arc::new(snapshot);
|
*guard = Arc::new(snapshot);
|
||||||
}
|
}
|
||||||
@@ -311,10 +315,10 @@ impl LsmStorageInner {
|
|||||||
let flush_memtable;
|
let flush_memtable;
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut guard = self.state.read();
|
let guard = self.state.read();
|
||||||
flush_memtable = guard
|
flush_memtable = guard
|
||||||
.imm_memtables
|
.imm_memtables
|
||||||
.first()
|
.last()
|
||||||
.expect("no imm memtables!")
|
.expect("no imm memtables!")
|
||||||
.clone();
|
.clone();
|
||||||
}
|
}
|
||||||
@@ -333,12 +337,12 @@ impl LsmStorageInner {
|
|||||||
let mut guard = self.state.write();
|
let mut guard = self.state.write();
|
||||||
let mut snapshot = guard.as_ref().clone();
|
let mut snapshot = guard.as_ref().clone();
|
||||||
// Remove the memtable from the immutable memtables.
|
// Remove the memtable from the immutable memtables.
|
||||||
let mem = snapshot.imm_memtables.remove(0);
|
let mem = snapshot.imm_memtables.pop().unwrap();
|
||||||
assert_eq!(mem.id(), sst_id);
|
assert_eq!(mem.id(), sst_id);
|
||||||
// Add L0 table
|
// Add L0 table
|
||||||
if self.compaction_controller.flush_to_l0() {
|
if self.compaction_controller.flush_to_l0() {
|
||||||
// In leveled compaction or no compaction, simply flush to L0
|
// In leveled compaction or no compaction, simply flush to L0
|
||||||
snapshot.l0_sstables.push(sst_id);
|
snapshot.l0_sstables.insert(0, sst_id);
|
||||||
} else {
|
} else {
|
||||||
// In tiered compaction, create a new tier
|
// In tiered compaction, create a new tier
|
||||||
snapshot.levels.insert(0, (sst_id, vec![sst_id]));
|
snapshot.levels.insert(0, (sst_id, vec![sst_id]));
|
||||||
@@ -374,13 +378,17 @@ impl LsmStorageInner {
|
|||||||
|
|
||||||
let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1);
|
let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1);
|
||||||
memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper)));
|
memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper)));
|
||||||
for memtable in snapshot.imm_memtables.iter().rev() {
|
for memtable in snapshot.imm_memtables.iter() {
|
||||||
memtable_iters.push(Box::new(memtable.scan(lower, upper)));
|
memtable_iters.push(Box::new(memtable.scan(lower, upper)));
|
||||||
}
|
}
|
||||||
let memtable_iter = MergeIterator::create(memtable_iters);
|
let memtable_iter = MergeIterator::create(memtable_iters);
|
||||||
|
|
||||||
let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
||||||
for table_id in snapshot.l0_sstables.iter().rev() {
|
for table_id in snapshot
|
||||||
|
.l0_sstables
|
||||||
|
.iter()
|
||||||
|
.chain(snapshot.levels.iter().map(|(_, files)| files).flatten())
|
||||||
|
{
|
||||||
let table = snapshot.sstables[table_id].clone();
|
let table = snapshot.sstables[table_id].clone();
|
||||||
let iter = match lower {
|
let iter = match lower {
|
||||||
Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?,
|
Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?,
|
||||||
@@ -396,6 +404,7 @@ impl LsmStorageInner {
|
|||||||
|
|
||||||
table_iters.push(Box::new(iter));
|
table_iters.push(Box::new(iter));
|
||||||
}
|
}
|
||||||
|
|
||||||
let table_iter = MergeIterator::create(table_iters);
|
let table_iter = MergeIterator::create(table_iters);
|
||||||
|
|
||||||
let iter = TwoMergeIterator::create(memtable_iter, table_iter)?;
|
let iter = TwoMergeIterator::create(memtable_iter, table_iter)?;
|
||||||
|
|||||||
Reference in New Issue
Block a user