* relicense mini-lsm-book to CC BY-NC-SA 4.0 Signed-off-by: Alex Chi Z <iskyzh@gmail.com> * clearify license Signed-off-by: Alex Chi Z <iskyzh@gmail.com> * fix fmt Signed-off-by: Alex Chi Z <iskyzh@gmail.com> * fix fmt Signed-off-by: Alex Chi Z <iskyzh@gmail.com> --------- Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
155 lines
5.9 KiB
Rust
155 lines
5.9 KiB
Rust
// Copyright (c) 2022-2025 Alex Chi Z
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
use std::collections::HashSet;
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use crate::lsm_storage::LsmStorageState;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SimpleLeveledCompactionOptions {
|
|
pub size_ratio_percent: usize,
|
|
pub level0_file_num_compaction_trigger: usize,
|
|
pub max_levels: usize,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct SimpleLeveledCompactionTask {
|
|
// if upper_level is `None`, then it is L0 compaction
|
|
pub upper_level: Option<usize>,
|
|
pub upper_level_sst_ids: Vec<usize>,
|
|
pub lower_level: usize,
|
|
pub lower_level_sst_ids: Vec<usize>,
|
|
pub is_lower_level_bottom_level: bool,
|
|
}
|
|
|
|
pub struct SimpleLeveledCompactionController {
|
|
options: SimpleLeveledCompactionOptions,
|
|
}
|
|
|
|
impl SimpleLeveledCompactionController {
|
|
pub fn new(options: SimpleLeveledCompactionOptions) -> Self {
|
|
Self { options }
|
|
}
|
|
|
|
/// Generates a compaction task.
|
|
///
|
|
/// Returns `None` if no compaction needs to be scheduled. The order of SSTs in the compaction task id vector matters.
|
|
pub fn generate_compaction_task(
|
|
&self,
|
|
snapshot: &LsmStorageState,
|
|
) -> Option<SimpleLeveledCompactionTask> {
|
|
let mut level_sizes = Vec::new();
|
|
level_sizes.push(snapshot.l0_sstables.len());
|
|
for (_, files) in &snapshot.levels {
|
|
level_sizes.push(files.len());
|
|
}
|
|
|
|
// check level0_file_num_compaction_trigger for compaction of L0 to L1
|
|
if snapshot.l0_sstables.len() >= self.options.level0_file_num_compaction_trigger {
|
|
println!(
|
|
"compaction triggered at level 0 because L0 has {} SSTs >= {}",
|
|
snapshot.l0_sstables.len(),
|
|
self.options.level0_file_num_compaction_trigger
|
|
);
|
|
return Some(SimpleLeveledCompactionTask {
|
|
upper_level: None,
|
|
upper_level_sst_ids: snapshot.l0_sstables.clone(),
|
|
lower_level: 1,
|
|
lower_level_sst_ids: snapshot.levels[0].1.clone(),
|
|
is_lower_level_bottom_level: false,
|
|
});
|
|
}
|
|
|
|
for i in 0..self.options.max_levels {
|
|
if i == 0
|
|
&& snapshot.l0_sstables.len() < self.options.level0_file_num_compaction_trigger
|
|
{
|
|
continue;
|
|
}
|
|
|
|
let lower_level = i + 1;
|
|
let size_ratio = level_sizes[lower_level] as f64 / level_sizes[i] as f64;
|
|
if size_ratio < self.options.size_ratio_percent as f64 / 100.0 {
|
|
println!(
|
|
"compaction triggered at level {} and {} with size ratio {}",
|
|
i, lower_level, size_ratio
|
|
);
|
|
return Some(SimpleLeveledCompactionTask {
|
|
upper_level: if i == 0 { None } else { Some(i) },
|
|
upper_level_sst_ids: if i == 0 {
|
|
snapshot.l0_sstables.clone()
|
|
} else {
|
|
snapshot.levels[i - 1].1.clone()
|
|
},
|
|
lower_level,
|
|
lower_level_sst_ids: snapshot.levels[lower_level - 1].1.clone(),
|
|
is_lower_level_bottom_level: lower_level == self.options.max_levels,
|
|
});
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Apply the compaction result.
|
|
///
|
|
/// The compactor will call this function with the compaction task and the list of SST ids generated. This function applies the
|
|
/// result and generates a new LSM state. The functions should only change `l0_sstables` and `levels` without changing memtables
|
|
/// and `sstables` hash map. Though there should only be one thread running compaction jobs, you should think about the case
|
|
/// where an L0 SST gets flushed while the compactor generates new SSTs, and with that in mind, you should do some sanity checks
|
|
/// in your implementation.
|
|
pub fn apply_compaction_result(
|
|
&self,
|
|
snapshot: &LsmStorageState,
|
|
task: &SimpleLeveledCompactionTask,
|
|
output: &[usize],
|
|
) -> (LsmStorageState, Vec<usize>) {
|
|
let mut snapshot = snapshot.clone();
|
|
let mut files_to_remove = Vec::new();
|
|
if let Some(upper_level) = task.upper_level {
|
|
assert_eq!(
|
|
task.upper_level_sst_ids,
|
|
snapshot.levels[upper_level - 1].1,
|
|
"sst mismatched"
|
|
);
|
|
files_to_remove.extend(&snapshot.levels[upper_level - 1].1);
|
|
snapshot.levels[upper_level - 1].1.clear();
|
|
} else {
|
|
files_to_remove.extend(&task.upper_level_sst_ids);
|
|
let mut l0_ssts_compacted = task
|
|
.upper_level_sst_ids
|
|
.iter()
|
|
.copied()
|
|
.collect::<HashSet<_>>();
|
|
let new_l0_sstables = snapshot
|
|
.l0_sstables
|
|
.iter()
|
|
.copied()
|
|
.filter(|x| !l0_ssts_compacted.remove(x))
|
|
.collect::<Vec<_>>();
|
|
assert!(l0_ssts_compacted.is_empty());
|
|
snapshot.l0_sstables = new_l0_sstables;
|
|
}
|
|
assert_eq!(
|
|
task.lower_level_sst_ids,
|
|
snapshot.levels[task.lower_level - 1].1,
|
|
"sst mismatched"
|
|
);
|
|
files_to_remove.extend(&snapshot.levels[task.lower_level - 1].1);
|
|
snapshot.levels[task.lower_level - 1].1 = output.to_vec();
|
|
(snapshot, files_to_remove)
|
|
}
|
|
}
|