add compaction controller + simulator

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z
2024-01-16 16:30:01 +08:00
parent 327f6badef
commit a9fca43f79
11 changed files with 349 additions and 143 deletions

View File

@@ -17,6 +17,7 @@ crossbeam-skiplist = "0.1"
parking_lot = "0.12"
ouroboros = "0.15"
moka = "0.9"
clap = { version = "4.4.17", features = ["derive"] }
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,84 @@
use std::collections::HashSet;
use std::sync::Arc;
use clap::Parser;
use mini_lsm::compact::TieredCompactionController;
use mini_lsm::lsm_storage::LsmStorageInner;
use mini_lsm::mem_table::MemTable;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
enum Args {
Tiered {},
Leveled {},
}
pub struct MockStorage {
snapshot: LsmStorageInner,
next_sst_id: usize,
file_list: HashSet<usize>,
}
impl MockStorage {
pub fn new() -> Self {
let snapshot = LsmStorageInner {
memtable: Arc::new(MemTable::create()),
imm_memtables: Vec::new(),
l0_sstables: Vec::new(),
levels: Vec::new(),
sstables: Default::default(),
};
Self {
snapshot,
next_sst_id: 0,
file_list: Default::default(),
}
}
fn generate_sst_id(&mut self) -> usize {
let id = self.next_sst_id;
self.next_sst_id += 1;
id
}
pub fn flush_sst(&mut self) {
let id = self.generate_sst_id();
self.snapshot.l0_sstables.push(id);
self.file_list.insert(id);
}
pub fn remove(&mut self, files_to_remove: &[usize]) {
for file_id in files_to_remove {
self.file_list.remove(file_id);
}
}
pub fn dump(&self) {
print!("L0: {:?}", self.snapshot.l0_sstables);
for (level, files) in &self.snapshot.levels {
print!("L{level}: {:?}", files);
}
}
}
fn main() {
let args = Args::parse();
match args {
Args::Tiered {} => {
let controller = TieredCompactionController {};
let mut storage = MockStorage::new();
for i in 0..500 {
println!("Iteration {i}");
storage.flush_sst();
let task = controller.generate_compaction_task(&storage.snapshot);
let sst_id = storage.generate_sst_id();
let (snapshot, del) =
controller.apply_compaction_result(&storage.snapshot, &task, &[sst_id]);
storage.snapshot = snapshot;
storage.remove(&del);
storage.dump();
}
}
Args::Leveled {} => {}
}
}

View File

@@ -24,7 +24,7 @@ impl BlockBuilder {
fn estimated_size(&self) -> usize {
SIZEOF_U16 /* number of key-value pairs in the block */ + self.offsets.len() * SIZEOF_U16 /* offsets */ + self.data.len()
/* key-value pairs */
// key-value pairs
}
/// Adds a key-value pair to the block. Returns false when the block is full.

View File

@@ -1,12 +1,21 @@
mod leveled;
mod tiered;
use std::sync::Arc;
use anyhow::Result;
pub use leveled::{LeveledCompactionController, LeveledCompactionTask};
pub use tiered::{TieredCompactionController, TieredCompactionTask};
use crate::{
iterators::{merge_iterator::MergeIterator, StorageIterator},
lsm_storage::LsmStorage,
table::{SsTable, SsTableBuilder, SsTableIterator},
};
use crate::iterators::merge_iterator::MergeIterator;
use crate::iterators::StorageIterator;
use crate::lsm_storage::LsmStorage;
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
pub enum CompactionTask {
Leveled(LeveledCompactionTask),
Tiered(TieredCompactionTask),
}
struct CompactOptions {
block_size: usize,

View File

@@ -0,0 +1,25 @@
use crate::lsm_storage::LsmStorageInner;
pub struct LeveledCompactionTask {
upper_level: usize,
upper_level_sst_ids: Vec<usize>,
lower_level: usize,
lower_level_sst_ids: Vec<usize>,
}
pub struct LeveledCompactionController {}
impl LeveledCompactionController {
pub fn generate_compaction_task(&self, snapshot: &LsmStorageInner) -> LeveledCompactionTask {
unimplemented!()
}
pub fn apply_compaction_result(
&self,
snapshot: &LsmStorageInner,
task: &LeveledCompactionTask,
output: &[usize],
) -> (LsmStorageInner, Vec<usize>) {
unimplemented!()
}
}

View File

@@ -0,0 +1,23 @@
use crate::lsm_storage::LsmStorageInner;
use crate::table::SsTable;
pub struct TieredCompactionTask {
tiers: Vec<usize>,
}
pub struct TieredCompactionController {}
impl TieredCompactionController {
pub fn generate_compaction_task(&self, snapshot: &LsmStorageInner) -> TieredCompactionTask {
return TieredCompactionTask { tiers: Vec::new() };
}
pub fn apply_compaction_result(
&self,
snapshot: &LsmStorageInner,
task: &TieredCompactionTask,
output: &[usize],
) -> (LsmStorageInner, Vec<usize>) {
(snapshot.clone(), Vec::new())
}
}

View File

@@ -1,5 +1,5 @@
pub mod block;
mod compact;
pub mod compact;
pub mod iterators;
pub mod lsm_iterator;
pub mod lsm_storage;

View File

@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::ops::Bound;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicUsize;
@@ -20,14 +21,17 @@ pub type BlockCache = moka::sync::Cache<(usize, usize), Arc<Block>>;
#[derive(Clone)]
pub struct LsmStorageInner {
/// The current memtable.
memtable: Arc<MemTable>,
pub memtable: Arc<MemTable>,
/// Immutable memTables, from earliest to latest.
imm_memtables: Vec<Arc<MemTable>>,
pub imm_memtables: Vec<Arc<MemTable>>,
/// L0 SsTables, from earliest to latest.
l0_sstables: Vec<Arc<SsTable>>,
/// L1 - L6 SsTables, sorted by key range.
pub l0_sstables: Vec<usize>,
/// SsTables sorted by key range; L1 - L6 for leveled compaction, or tiers for tiered
/// compaction.
#[allow(dead_code)]
levels: Vec<Vec<Arc<SsTable>>>,
pub levels: Vec<(usize, Vec<usize>)>,
/// SsTable objects.
pub sstables: HashMap<usize, Arc<SsTable>>,
}
impl LsmStorageInner {
@@ -37,6 +41,7 @@ impl LsmStorageInner {
imm_memtables: vec![],
l0_sstables: vec![],
levels: vec![],
sstables: Default::default(),
}
}
}
@@ -94,7 +99,7 @@ impl LsmStorage {
let mut iters = Vec::with_capacity(snapshot.l0_sstables.len());
for table in snapshot.l0_sstables.iter().rev() {
iters.push(Box::new(SsTableIterator::create_and_seek_to_key(
table.clone(),
snapshot.sstables[table].clone(),
key,
)?));
}
@@ -173,7 +178,8 @@ impl LsmStorage {
// Remove the memtable from the immutable memtables.
snapshot.imm_memtables.pop();
// Add L0 table
snapshot.l0_sstables.push(sst);
snapshot.l0_sstables.push(sst_id);
snapshot.sstables.insert(sst_id, sst);
// Update the snapshot.
*guard = Arc::new(snapshot);
}
@@ -200,19 +206,18 @@ impl LsmStorage {
let memtable_iter = MergeIterator::create(memtable_iters);
let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len());
for table in snapshot.l0_sstables.iter().rev() {
for table_id in snapshot.l0_sstables.iter().rev() {
let table = snapshot.sstables[table_id].clone();
let iter = match lower {
Bound::Included(key) => {
SsTableIterator::create_and_seek_to_key(table.clone(), key)?
}
Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?,
Bound::Excluded(key) => {
let mut iter = SsTableIterator::create_and_seek_to_key(table.clone(), key)?;
let mut iter = SsTableIterator::create_and_seek_to_key(table, key)?;
if iter.is_valid() && iter.key() == key {
iter.next()?;
}
iter
}
Bound::Unbounded => SsTableIterator::create_and_seek_to_first(table.clone())?,
Bound::Unbounded => SsTableIterator::create_and_seek_to_first(table)?,
};
table_iters.push(Box::new(iter));

View File

@@ -33,7 +33,8 @@ impl BlockMeta {
// The size of actual key
estimated_size += meta.first_key.len();
}
// Reserve the space to improve performance, especially when the size of incoming data is large
// Reserve the space to improve performance, especially when the size of incoming data is
// large
buf.reserve(estimated_size);
let original_len = buf.len();
for meta in block_meta {