Files
mini_lsm/mini-lsm-mvcc/src/compact/simple_leveled.rs
Alex Chi Z. 7f4b204064 relicense mini-lsm-book to CC BY-NC-SA 4.0 (#118)
* relicense mini-lsm-book to CC BY-NC-SA 4.0

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* clearify license

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* fix fmt

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* fix fmt

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

---------

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
2025-01-19 19:24:12 -05:00

155 lines
5.9 KiB
Rust

// Copyright (c) 2022-2025 Alex Chi Z
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use serde::{Deserialize, Serialize};
use crate::lsm_storage::LsmStorageState;
#[derive(Debug, Clone)]
pub struct SimpleLeveledCompactionOptions {
pub size_ratio_percent: usize,
pub level0_file_num_compaction_trigger: usize,
pub max_levels: usize,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SimpleLeveledCompactionTask {
// if upper_level is `None`, then it is L0 compaction
pub upper_level: Option<usize>,
pub upper_level_sst_ids: Vec<usize>,
pub lower_level: usize,
pub lower_level_sst_ids: Vec<usize>,
pub is_lower_level_bottom_level: bool,
}
pub struct SimpleLeveledCompactionController {
options: SimpleLeveledCompactionOptions,
}
impl SimpleLeveledCompactionController {
pub fn new(options: SimpleLeveledCompactionOptions) -> Self {
Self { options }
}
/// Generates a compaction task.
///
/// Returns `None` if no compaction needs to be scheduled. The order of SSTs in the compaction task id vector matters.
pub fn generate_compaction_task(
&self,
snapshot: &LsmStorageState,
) -> Option<SimpleLeveledCompactionTask> {
let mut level_sizes = Vec::new();
level_sizes.push(snapshot.l0_sstables.len());
for (_, files) in &snapshot.levels {
level_sizes.push(files.len());
}
// check level0_file_num_compaction_trigger for compaction of L0 to L1
if snapshot.l0_sstables.len() >= self.options.level0_file_num_compaction_trigger {
println!(
"compaction triggered at level 0 because L0 has {} SSTs >= {}",
snapshot.l0_sstables.len(),
self.options.level0_file_num_compaction_trigger
);
return Some(SimpleLeveledCompactionTask {
upper_level: None,
upper_level_sst_ids: snapshot.l0_sstables.clone(),
lower_level: 1,
lower_level_sst_ids: snapshot.levels[0].1.clone(),
is_lower_level_bottom_level: false,
});
}
for i in 0..self.options.max_levels {
if i == 0
&& snapshot.l0_sstables.len() < self.options.level0_file_num_compaction_trigger
{
continue;
}
let lower_level = i + 1;
let size_ratio = level_sizes[lower_level] as f64 / level_sizes[i] as f64;
if size_ratio < self.options.size_ratio_percent as f64 / 100.0 {
println!(
"compaction triggered at level {} and {} with size ratio {}",
i, lower_level, size_ratio
);
return Some(SimpleLeveledCompactionTask {
upper_level: if i == 0 { None } else { Some(i) },
upper_level_sst_ids: if i == 0 {
snapshot.l0_sstables.clone()
} else {
snapshot.levels[i - 1].1.clone()
},
lower_level,
lower_level_sst_ids: snapshot.levels[lower_level - 1].1.clone(),
is_lower_level_bottom_level: lower_level == self.options.max_levels,
});
}
}
None
}
/// Apply the compaction result.
///
/// The compactor will call this function with the compaction task and the list of SST ids generated. This function applies the
/// result and generates a new LSM state. The functions should only change `l0_sstables` and `levels` without changing memtables
/// and `sstables` hash map. Though there should only be one thread running compaction jobs, you should think about the case
/// where an L0 SST gets flushed while the compactor generates new SSTs, and with that in mind, you should do some sanity checks
/// in your implementation.
pub fn apply_compaction_result(
&self,
snapshot: &LsmStorageState,
task: &SimpleLeveledCompactionTask,
output: &[usize],
) -> (LsmStorageState, Vec<usize>) {
let mut snapshot = snapshot.clone();
let mut files_to_remove = Vec::new();
if let Some(upper_level) = task.upper_level {
assert_eq!(
task.upper_level_sst_ids,
snapshot.levels[upper_level - 1].1,
"sst mismatched"
);
files_to_remove.extend(&snapshot.levels[upper_level - 1].1);
snapshot.levels[upper_level - 1].1.clear();
} else {
files_to_remove.extend(&task.upper_level_sst_ids);
let mut l0_ssts_compacted = task
.upper_level_sst_ids
.iter()
.copied()
.collect::<HashSet<_>>();
let new_l0_sstables = snapshot
.l0_sstables
.iter()
.copied()
.filter(|x| !l0_ssts_compacted.remove(x))
.collect::<Vec<_>>();
assert!(l0_ssts_compacted.is_empty());
snapshot.l0_sstables = new_l0_sstables;
}
assert_eq!(
task.lower_level_sst_ids,
snapshot.levels[task.lower_level - 1].1,
"sst mismatched"
);
files_to_remove.extend(&snapshot.levels[task.lower_level - 1].1);
snapshot.levels[task.lower_level - 1].1 = output.to_vec();
(snapshot, files_to_remove)
}
}