2024-01-18 14:50:12 +08:00
|
|
|
use std::collections::HashSet;
|
|
|
|
|
|
2024-01-19 11:21:38 +08:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
|
2024-01-18 17:51:24 +08:00
|
|
|
use crate::lsm_storage::LsmStorageState;
|
2024-01-16 16:30:01 +08:00
|
|
|
|
2024-01-19 16:10:18 +08:00
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
2024-01-16 16:30:01 +08:00
|
|
|
pub struct LeveledCompactionTask {
|
2024-01-17 15:42:33 +08:00
|
|
|
// if upper_level is `None`, then it is L0 compaction
|
2024-01-18 14:50:12 +08:00
|
|
|
pub upper_level: Option<usize>,
|
|
|
|
|
pub upper_level_sst_ids: Vec<usize>,
|
|
|
|
|
pub lower_level: usize,
|
|
|
|
|
pub lower_level_sst_ids: Vec<usize>,
|
2024-01-18 19:40:05 +08:00
|
|
|
pub is_lower_level_bottom_level: bool,
|
2024-01-16 16:30:01 +08:00
|
|
|
}
|
|
|
|
|
|
2024-01-18 17:51:24 +08:00
|
|
|
#[derive(Debug, Clone)]
|
2024-01-18 14:50:12 +08:00
|
|
|
pub struct LeveledCompactionOptions {
|
|
|
|
|
pub level_size_multiplier: usize,
|
|
|
|
|
pub level0_file_num_compaction_trigger: usize,
|
|
|
|
|
pub max_levels: usize,
|
|
|
|
|
pub base_level_size_mb: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct LeveledCompactionController {
|
|
|
|
|
options: LeveledCompactionOptions,
|
|
|
|
|
}
|
2024-01-16 16:30:01 +08:00
|
|
|
|
|
|
|
|
impl LeveledCompactionController {
|
2024-01-18 14:50:12 +08:00
|
|
|
pub fn new(options: LeveledCompactionOptions) -> Self {
|
|
|
|
|
Self { options }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn find_overlapping_ssts(
|
|
|
|
|
&self,
|
2024-01-18 17:51:24 +08:00
|
|
|
snapshot: &LsmStorageState,
|
2024-01-18 14:50:12 +08:00
|
|
|
sst_ids: &[usize],
|
|
|
|
|
in_level: usize,
|
|
|
|
|
) -> Vec<usize> {
|
|
|
|
|
let begin_key = sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|id| snapshot.sstables[id].first_key())
|
|
|
|
|
.min()
|
|
|
|
|
.cloned()
|
|
|
|
|
.unwrap();
|
|
|
|
|
let end_key = sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|id| snapshot.sstables[id].last_key())
|
|
|
|
|
.max()
|
|
|
|
|
.cloned()
|
|
|
|
|
.unwrap();
|
|
|
|
|
let mut overlap_ssts = Vec::new();
|
|
|
|
|
for sst_id in &snapshot.levels[in_level - 1].1 {
|
|
|
|
|
let sst = &snapshot.sstables[sst_id];
|
|
|
|
|
let first_key = sst.first_key();
|
|
|
|
|
let last_key = sst.last_key();
|
|
|
|
|
if !(last_key < &begin_key || first_key > &end_key) {
|
|
|
|
|
overlap_ssts.push(*sst_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
overlap_ssts
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn generate_compaction_task(
|
|
|
|
|
&self,
|
2024-01-18 17:51:24 +08:00
|
|
|
snapshot: &LsmStorageState,
|
2024-01-18 14:50:12 +08:00
|
|
|
) -> Option<LeveledCompactionTask> {
|
|
|
|
|
// step 1: compute target level size
|
|
|
|
|
let mut target_level_size = (0..self.options.max_levels).map(|_| 0).collect::<Vec<_>>(); // exclude level 0
|
|
|
|
|
let mut real_level_size = Vec::with_capacity(self.options.max_levels);
|
|
|
|
|
let mut base_level = self.options.max_levels;
|
|
|
|
|
for i in 0..self.options.max_levels {
|
|
|
|
|
real_level_size.push(
|
|
|
|
|
snapshot.levels[i]
|
|
|
|
|
.1
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|x| snapshot.sstables.get(x).unwrap().table_size())
|
|
|
|
|
.sum::<u64>() as usize,
|
|
|
|
|
);
|
|
|
|
|
}
|
2024-01-24 14:39:00 +08:00
|
|
|
let base_level_size_bytes = self.options.base_level_size_mb * 1024 * 1024;
|
2024-01-18 14:50:12 +08:00
|
|
|
|
|
|
|
|
// select base level and compute target level size
|
|
|
|
|
target_level_size[self.options.max_levels - 1] =
|
|
|
|
|
real_level_size[self.options.max_levels - 1].max(base_level_size_bytes);
|
|
|
|
|
for i in (0..(self.options.max_levels - 1)).rev() {
|
|
|
|
|
let next_level_size = target_level_size[i + 1];
|
|
|
|
|
let this_level_size = next_level_size / self.options.level_size_multiplier;
|
|
|
|
|
if next_level_size > base_level_size_bytes {
|
|
|
|
|
target_level_size[i] = this_level_size;
|
|
|
|
|
}
|
|
|
|
|
if target_level_size[i] > 0 {
|
|
|
|
|
base_level = i + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Flush L0 SST is the top priority
|
|
|
|
|
if snapshot.l0_sstables.len() >= self.options.level0_file_num_compaction_trigger {
|
|
|
|
|
println!("flush L0 SST to base level {}", base_level);
|
|
|
|
|
return Some(LeveledCompactionTask {
|
|
|
|
|
upper_level: None,
|
|
|
|
|
upper_level_sst_ids: snapshot.l0_sstables.clone(),
|
|
|
|
|
lower_level: base_level,
|
|
|
|
|
lower_level_sst_ids: self.find_overlapping_ssts(
|
|
|
|
|
snapshot,
|
|
|
|
|
&snapshot.l0_sstables,
|
|
|
|
|
base_level,
|
|
|
|
|
),
|
2024-01-18 19:40:05 +08:00
|
|
|
is_lower_level_bottom_level: base_level == self.options.max_levels,
|
2024-01-18 14:50:12 +08:00
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut priorities = Vec::with_capacity(self.options.max_levels);
|
|
|
|
|
for level in 0..self.options.max_levels {
|
|
|
|
|
let prio = real_level_size[level] as f64 / target_level_size[level] as f64;
|
|
|
|
|
if prio > 1.0 {
|
|
|
|
|
priorities.push((prio, level + 1));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
priorities.sort_by(|a, b| a.partial_cmp(b).unwrap().reverse());
|
|
|
|
|
let priority = priorities.first();
|
|
|
|
|
if let Some((_, level)) = priority {
|
2024-01-19 16:10:18 +08:00
|
|
|
println!(
|
|
|
|
|
"target level sizes: {:?}, real level sizes: {:?}, base_level: {}",
|
|
|
|
|
target_level_size
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|x| format!("{}MB", x / 1024 / 1024))
|
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
|
real_level_size
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|x| format!("{}MB", x / 1024 / 1024))
|
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
|
base_level,
|
|
|
|
|
);
|
|
|
|
|
|
2024-01-18 14:50:12 +08:00
|
|
|
let level = *level;
|
|
|
|
|
let selected_sst = snapshot.levels[level - 1].1.iter().min().copied().unwrap(); // select the oldest sst to compact
|
|
|
|
|
println!(
|
|
|
|
|
"compaction triggered by priority: {level} out of {:?}, select {selected_sst} for compaction",
|
|
|
|
|
priorities
|
|
|
|
|
);
|
|
|
|
|
return Some(LeveledCompactionTask {
|
|
|
|
|
upper_level: Some(level),
|
|
|
|
|
upper_level_sst_ids: vec![selected_sst],
|
|
|
|
|
lower_level: level + 1,
|
|
|
|
|
lower_level_sst_ids: self.find_overlapping_ssts(
|
|
|
|
|
snapshot,
|
|
|
|
|
&[selected_sst],
|
|
|
|
|
level + 1,
|
|
|
|
|
),
|
2024-01-18 19:40:05 +08:00
|
|
|
is_lower_level_bottom_level: level + 1 == self.options.max_levels,
|
2024-01-18 14:50:12 +08:00
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
None
|
2024-01-16 16:30:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn apply_compaction_result(
|
|
|
|
|
&self,
|
2024-01-18 17:51:24 +08:00
|
|
|
snapshot: &LsmStorageState,
|
2024-01-16 16:30:01 +08:00
|
|
|
task: &LeveledCompactionTask,
|
|
|
|
|
output: &[usize],
|
2024-01-18 17:51:24 +08:00
|
|
|
) -> (LsmStorageState, Vec<usize>) {
|
2024-01-18 14:50:12 +08:00
|
|
|
let mut snapshot = snapshot.clone();
|
|
|
|
|
let mut files_to_remove = Vec::new();
|
|
|
|
|
let mut upper_level_sst_ids_set = task
|
|
|
|
|
.upper_level_sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.copied()
|
|
|
|
|
.collect::<HashSet<_>>();
|
|
|
|
|
let mut lower_level_sst_ids_set = task
|
|
|
|
|
.lower_level_sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.copied()
|
|
|
|
|
.collect::<HashSet<_>>();
|
|
|
|
|
if let Some(upper_level) = task.upper_level {
|
2024-01-18 15:15:51 +08:00
|
|
|
let new_upper_level_ssts = snapshot.levels[upper_level - 1]
|
|
|
|
|
.1
|
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|x| {
|
|
|
|
|
if upper_level_sst_ids_set.remove(x) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
Some(*x)
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>();
|
2024-01-18 14:50:12 +08:00
|
|
|
assert!(upper_level_sst_ids_set.is_empty());
|
|
|
|
|
snapshot.levels[upper_level - 1].1 = new_upper_level_ssts;
|
|
|
|
|
} else {
|
|
|
|
|
let new_l0_ssts = snapshot
|
|
|
|
|
.l0_sstables
|
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|x| {
|
|
|
|
|
if upper_level_sst_ids_set.remove(x) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
Some(*x)
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
assert!(upper_level_sst_ids_set.is_empty());
|
|
|
|
|
snapshot.l0_sstables = new_l0_ssts;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
files_to_remove.extend(&task.upper_level_sst_ids);
|
|
|
|
|
files_to_remove.extend(&task.lower_level_sst_ids);
|
|
|
|
|
|
|
|
|
|
let mut new_lower_level_ssts = snapshot.levels[task.lower_level - 1]
|
|
|
|
|
.1
|
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|x| {
|
|
|
|
|
if lower_level_sst_ids_set.remove(x) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
Some(*x)
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
assert!(lower_level_sst_ids_set.is_empty());
|
|
|
|
|
new_lower_level_ssts.extend(output);
|
|
|
|
|
new_lower_level_ssts.sort_by(|x, y| {
|
|
|
|
|
snapshot
|
|
|
|
|
.sstables
|
|
|
|
|
.get(x)
|
|
|
|
|
.unwrap()
|
|
|
|
|
.first_key()
|
|
|
|
|
.cmp(snapshot.sstables.get(y).unwrap().first_key())
|
|
|
|
|
});
|
|
|
|
|
snapshot.levels[task.lower_level - 1].1 = new_lower_level_ssts;
|
|
|
|
|
(snapshot, files_to_remove)
|
2024-01-16 16:30:01 +08:00
|
|
|
}
|
|
|
|
|
}
|