2024-01-16 16:30:01 +08:00
|
|
|
mod leveled;
|
2024-01-17 15:42:33 +08:00
|
|
|
mod simple_leveled;
|
2024-01-16 16:30:01 +08:00
|
|
|
mod tiered;
|
|
|
|
|
|
2024-01-10 14:25:23 +08:00
|
|
|
use std::sync::Arc;
|
2024-01-18 19:40:05 +08:00
|
|
|
use std::time::Duration;
|
2024-01-10 14:25:23 +08:00
|
|
|
|
|
|
|
|
use anyhow::Result;
|
2024-01-18 14:50:12 +08:00
|
|
|
pub use leveled::{LeveledCompactionController, LeveledCompactionOptions, LeveledCompactionTask};
|
2024-01-19 11:21:38 +08:00
|
|
|
use serde::{Deserialize, Serialize};
|
2024-01-17 15:42:33 +08:00
|
|
|
pub use simple_leveled::{
|
|
|
|
|
SimpleLeveledCompactionController, SimpleLeveledCompactionOptions, SimpleLeveledCompactionTask,
|
|
|
|
|
};
|
2024-01-17 14:51:15 +08:00
|
|
|
pub use tiered::{TieredCompactionController, TieredCompactionOptions, TieredCompactionTask};
|
2024-01-16 16:30:01 +08:00
|
|
|
|
|
|
|
|
use crate::iterators::merge_iterator::MergeIterator;
|
|
|
|
|
use crate::iterators::StorageIterator;
|
2024-01-18 19:40:05 +08:00
|
|
|
use crate::lsm_storage::{LsmStorageInner, LsmStorageState};
|
2024-01-19 11:21:38 +08:00
|
|
|
use crate::manifest::ManifestRecord;
|
2024-01-16 16:30:01 +08:00
|
|
|
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
|
2024-01-10 14:25:23 +08:00
|
|
|
|
2024-01-19 16:10:18 +08:00
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
2024-01-18 17:51:24 +08:00
|
|
|
pub(crate) enum CompactionTask {
|
2024-01-16 16:30:01 +08:00
|
|
|
Leveled(LeveledCompactionTask),
|
|
|
|
|
Tiered(TieredCompactionTask),
|
2024-01-18 17:51:24 +08:00
|
|
|
Simple(SimpleLeveledCompactionTask),
|
2024-01-18 19:40:05 +08:00
|
|
|
ForceFullCompaction(Vec<usize>),
|
2024-01-16 16:30:01 +08:00
|
|
|
}
|
2024-01-10 14:25:23 +08:00
|
|
|
|
2024-01-18 19:40:05 +08:00
|
|
|
impl CompactionTask {
|
|
|
|
|
fn compact_to_bottom_level(&self) -> bool {
|
|
|
|
|
match self {
|
|
|
|
|
CompactionTask::ForceFullCompaction(_) => true,
|
|
|
|
|
CompactionTask::Leveled(task) => task.is_lower_level_bottom_level,
|
|
|
|
|
CompactionTask::Simple(task) => task.is_lower_level_bottom_level,
|
|
|
|
|
CompactionTask::Tiered(task) => task.bottom_tier_included,
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-01-10 14:25:23 +08:00
|
|
|
}
|
|
|
|
|
|
2024-01-18 17:51:24 +08:00
|
|
|
pub(crate) enum CompactionController {
|
|
|
|
|
Leveled(LeveledCompactionController),
|
|
|
|
|
Tiered(TieredCompactionController),
|
|
|
|
|
Simple(SimpleLeveledCompactionController),
|
|
|
|
|
NoCompaction,
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-18 19:40:05 +08:00
|
|
|
impl CompactionController {
|
2024-01-19 17:28:47 +08:00
|
|
|
pub fn generate_compaction_task(&self, snapshot: &LsmStorageState) -> Option<CompactionTask> {
|
2024-01-18 19:40:05 +08:00
|
|
|
match self {
|
|
|
|
|
CompactionController::Leveled(ctrl) => ctrl
|
|
|
|
|
.generate_compaction_task(&snapshot)
|
|
|
|
|
.map(CompactionTask::Leveled),
|
|
|
|
|
CompactionController::Simple(ctrl) => ctrl
|
|
|
|
|
.generate_compaction_task(&snapshot)
|
|
|
|
|
.map(CompactionTask::Simple),
|
|
|
|
|
CompactionController::Tiered(ctrl) => ctrl
|
|
|
|
|
.generate_compaction_task(&snapshot)
|
|
|
|
|
.map(CompactionTask::Tiered),
|
|
|
|
|
CompactionController::NoCompaction => unreachable!(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-19 17:28:47 +08:00
|
|
|
pub fn apply_compaction_result(
|
2024-01-18 19:40:05 +08:00
|
|
|
&self,
|
|
|
|
|
snapshot: &LsmStorageState,
|
|
|
|
|
task: &CompactionTask,
|
|
|
|
|
output: &[usize],
|
|
|
|
|
) -> (LsmStorageState, Vec<usize>) {
|
|
|
|
|
match (self, task) {
|
|
|
|
|
(CompactionController::Leveled(ctrl), CompactionTask::Leveled(task)) => {
|
|
|
|
|
ctrl.apply_compaction_result(&snapshot, task, output)
|
|
|
|
|
}
|
|
|
|
|
(CompactionController::Simple(ctrl), CompactionTask::Simple(task)) => {
|
|
|
|
|
ctrl.apply_compaction_result(&snapshot, task, output)
|
|
|
|
|
}
|
|
|
|
|
(CompactionController::Tiered(ctrl), CompactionTask::Tiered(task)) => {
|
|
|
|
|
ctrl.apply_compaction_result(&snapshot, task, output)
|
|
|
|
|
}
|
|
|
|
|
_ => unreachable!(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-18 17:51:24 +08:00
|
|
|
impl CompactionController {
|
|
|
|
|
pub fn flush_to_l0(&self) -> bool {
|
|
|
|
|
if let Self::Leveled(_) | Self::Simple(_) | Self::NoCompaction = self {
|
|
|
|
|
true
|
|
|
|
|
} else {
|
|
|
|
|
false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub enum CompactionOptions {
|
|
|
|
|
/// Leveled compaction with partial compaction + dynamic level support (= RocksDB's Leveled
|
|
|
|
|
/// Compaction)
|
|
|
|
|
Leveled(LeveledCompactionOptions),
|
|
|
|
|
/// Tiered compaction (= RocksDB's universal compaction)
|
|
|
|
|
Tiered(TieredCompactionOptions),
|
|
|
|
|
/// Simple leveled compaction
|
|
|
|
|
Simple(SimpleLeveledCompactionOptions),
|
|
|
|
|
/// In no compaction mode (week 1), always flush to L0
|
|
|
|
|
NoCompaction,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LsmStorageInner {
|
2024-01-18 19:40:05 +08:00
|
|
|
fn compact(&self, task: &CompactionTask) -> Result<Vec<Arc<SsTable>>> {
|
|
|
|
|
let table_ids = match task {
|
|
|
|
|
CompactionTask::Leveled(task) => task
|
|
|
|
|
.lower_level_sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.copied()
|
|
|
|
|
.chain(task.upper_level_sst_ids.iter().copied())
|
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
|
CompactionTask::Simple(task) => task
|
|
|
|
|
.lower_level_sst_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.copied()
|
|
|
|
|
.chain(task.upper_level_sst_ids.iter().copied())
|
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
|
CompactionTask::Tiered(task) => task
|
|
|
|
|
.tiers
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|(_, files)| files)
|
|
|
|
|
.flatten()
|
|
|
|
|
.copied()
|
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
|
CompactionTask::ForceFullCompaction(l0_ssts) => l0_ssts.clone(),
|
|
|
|
|
};
|
|
|
|
|
let tables: Vec<Arc<SsTable>> = {
|
|
|
|
|
let state = self.state.read();
|
|
|
|
|
table_ids
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|id| state.sstables.get(id).unwrap().clone())
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
};
|
|
|
|
|
|
2024-01-10 14:25:23 +08:00
|
|
|
let mut iters = Vec::new();
|
|
|
|
|
iters.reserve(tables.len());
|
|
|
|
|
for table in tables.iter() {
|
2024-01-18 15:15:51 +08:00
|
|
|
iters.push(Box::new(SsTableIterator::create_and_seek_to_first(
|
|
|
|
|
table.clone(),
|
|
|
|
|
)?));
|
2024-01-10 14:25:23 +08:00
|
|
|
}
|
|
|
|
|
let mut iter = MergeIterator::create(iters);
|
|
|
|
|
|
|
|
|
|
let mut builder = None;
|
|
|
|
|
let mut new_sst = vec![];
|
|
|
|
|
|
2024-01-18 19:40:05 +08:00
|
|
|
let compact_to_bottom_level = task.compact_to_bottom_level();
|
2024-01-18 17:51:24 +08:00
|
|
|
|
2024-01-10 14:25:23 +08:00
|
|
|
while iter.is_valid() {
|
|
|
|
|
if builder.is_none() {
|
2024-01-18 19:40:05 +08:00
|
|
|
builder = Some(SsTableBuilder::new(self.options.block_size));
|
2024-01-10 14:25:23 +08:00
|
|
|
}
|
|
|
|
|
let builder_inner = builder.as_mut().unwrap();
|
2024-01-18 17:51:24 +08:00
|
|
|
if compact_to_bottom_level {
|
2024-01-10 14:25:23 +08:00
|
|
|
if !iter.value().is_empty() {
|
|
|
|
|
builder_inner.add(iter.key(), iter.value());
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
builder_inner.add(iter.key(), iter.value());
|
|
|
|
|
}
|
|
|
|
|
iter.next()?;
|
|
|
|
|
|
2024-01-18 19:40:05 +08:00
|
|
|
if builder_inner.estimated_size() >= self.options.target_sst_size {
|
2024-01-10 14:25:23 +08:00
|
|
|
let sst_id = self.next_sst_id(); // lock dropped here
|
|
|
|
|
let builder = builder.take().unwrap();
|
|
|
|
|
let sst = Arc::new(builder.build(
|
|
|
|
|
sst_id,
|
|
|
|
|
Some(self.block_cache.clone()),
|
|
|
|
|
self.path_of_sst(sst_id),
|
|
|
|
|
)?);
|
|
|
|
|
new_sst.push(sst);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if let Some(builder) = builder {
|
|
|
|
|
let sst_id = self.next_sst_id(); // lock dropped here
|
|
|
|
|
let sst = Arc::new(builder.build(
|
|
|
|
|
sst_id,
|
|
|
|
|
Some(self.block_cache.clone()),
|
|
|
|
|
self.path_of_sst(sst_id),
|
|
|
|
|
)?);
|
|
|
|
|
new_sst.push(sst);
|
|
|
|
|
}
|
|
|
|
|
Ok(new_sst)
|
|
|
|
|
}
|
2024-01-18 17:51:24 +08:00
|
|
|
|
2024-01-18 19:49:36 +08:00
|
|
|
pub fn force_full_compaction(&self) -> Result<()> {
|
|
|
|
|
let CompactionOptions::NoCompaction = self.options.compaction_options else {
|
|
|
|
|
panic!("full compaction can only be called with compaction is not enabled")
|
|
|
|
|
};
|
|
|
|
|
let snapshot = {
|
|
|
|
|
let state = self.state.read();
|
|
|
|
|
state.clone()
|
|
|
|
|
};
|
2024-01-19 16:10:18 +08:00
|
|
|
let mut original_sstables = snapshot.l0_sstables.clone();
|
|
|
|
|
original_sstables.reverse();
|
2024-01-18 19:49:36 +08:00
|
|
|
let sstables = self.compact(&CompactionTask::ForceFullCompaction(
|
|
|
|
|
original_sstables.clone(),
|
|
|
|
|
))?;
|
|
|
|
|
{
|
|
|
|
|
let _state_lock = self.state_lock.lock();
|
|
|
|
|
let mut state = self.state.read().as_ref().clone();
|
|
|
|
|
for sst in original_sstables.iter() {
|
|
|
|
|
let result = state.sstables.remove(sst);
|
|
|
|
|
assert!(result.is_some());
|
|
|
|
|
}
|
|
|
|
|
let mut ids = Vec::with_capacity(sstables.len());
|
|
|
|
|
for new_sst in sstables {
|
|
|
|
|
ids.push(new_sst.sst_id());
|
|
|
|
|
let result = state.sstables.insert(new_sst.sst_id(), new_sst);
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
state.l0_sstables = ids;
|
|
|
|
|
*self.state.write() = Arc::new(state);
|
|
|
|
|
}
|
|
|
|
|
for sst in original_sstables {
|
|
|
|
|
std::fs::remove_file(self.path_of_sst(sst))?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-18 19:40:05 +08:00
|
|
|
fn trigger_compaction(&self) -> Result<()> {
|
|
|
|
|
let snapshot = {
|
|
|
|
|
let state = self.state.read();
|
|
|
|
|
state.clone()
|
|
|
|
|
};
|
|
|
|
|
let task = self
|
|
|
|
|
.compaction_controller
|
|
|
|
|
.generate_compaction_task(&snapshot);
|
|
|
|
|
let Some(task) = task else {
|
|
|
|
|
return Ok(());
|
|
|
|
|
};
|
2024-01-19 16:10:18 +08:00
|
|
|
println!("running compaction task: {:?}", task);
|
2024-01-18 19:40:05 +08:00
|
|
|
let sstables = self.compact(&task)?;
|
|
|
|
|
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
|
|
|
|
|
let ssts_to_remove = {
|
2024-01-19 11:21:38 +08:00
|
|
|
let state_lock = self.state_lock.lock();
|
2024-01-18 19:40:05 +08:00
|
|
|
let (mut snapshot, files_to_remove) = self
|
|
|
|
|
.compaction_controller
|
|
|
|
|
.apply_compaction_result(&self.state.read(), &task, &output);
|
|
|
|
|
let mut ssts_to_remove = Vec::with_capacity(files_to_remove.len());
|
|
|
|
|
for file_to_remove in &files_to_remove {
|
|
|
|
|
let result = snapshot.sstables.remove(file_to_remove);
|
|
|
|
|
assert!(result.is_some());
|
|
|
|
|
ssts_to_remove.push(result.unwrap());
|
|
|
|
|
}
|
2024-01-19 17:28:47 +08:00
|
|
|
let mut new_sst_ids = Vec::new();
|
2024-01-18 19:40:05 +08:00
|
|
|
for file_to_add in sstables {
|
2024-01-19 17:28:47 +08:00
|
|
|
new_sst_ids.push(file_to_add.sst_id());
|
2024-01-18 19:40:05 +08:00
|
|
|
let result = snapshot.sstables.insert(file_to_add.sst_id(), file_to_add);
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
let mut state = self.state.write();
|
|
|
|
|
*state = Arc::new(snapshot);
|
2024-01-19 11:21:38 +08:00
|
|
|
self.manifest
|
2024-01-19 17:28:47 +08:00
|
|
|
.add_record(&state_lock, ManifestRecord::Compaction(task, new_sst_ids))?;
|
2024-01-18 19:40:05 +08:00
|
|
|
ssts_to_remove
|
|
|
|
|
};
|
|
|
|
|
for sst in ssts_to_remove {
|
|
|
|
|
std::fs::remove_file(self.path_of_sst(sst.sst_id()))?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-18 17:51:24 +08:00
|
|
|
pub(crate) fn spawn_compaction_thread(
|
|
|
|
|
self: &Arc<Self>,
|
2024-01-18 19:40:05 +08:00
|
|
|
rx: crossbeam_channel::Receiver<()>,
|
2024-01-18 17:51:24 +08:00
|
|
|
) -> Result<Option<std::thread::JoinHandle<()>>> {
|
2024-01-18 19:40:05 +08:00
|
|
|
if let CompactionOptions::Leveled(_)
|
|
|
|
|
| CompactionOptions::Simple(_)
|
|
|
|
|
| CompactionOptions::Tiered(_) = self.options.compaction_options
|
|
|
|
|
{
|
|
|
|
|
let this = self.clone();
|
|
|
|
|
let handle = std::thread::spawn(move || {
|
|
|
|
|
let ticker = crossbeam_channel::tick(Duration::from_millis(50));
|
|
|
|
|
loop {
|
|
|
|
|
crossbeam_channel::select! {
|
|
|
|
|
recv(ticker) -> _ => if let Err(e) = this.trigger_compaction() {
|
|
|
|
|
eprintln!("compaction failed: {}", e);
|
|
|
|
|
},
|
|
|
|
|
recv(rx) -> _ => return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
return Ok(Some(handle));
|
|
|
|
|
}
|
2024-01-18 17:51:24 +08:00
|
|
|
Ok(None)
|
|
|
|
|
}
|
2024-01-10 14:25:23 +08:00
|
|
|
}
|