fix: universal compaction condition (#97)

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z.
2024-11-12 21:27:00 -05:00
committed by GitHub
parent b84dd3838f
commit 9cca6ad420
12 changed files with 107 additions and 29 deletions

View File

@@ -19,8 +19,17 @@ use mini_lsm_wrapper::table::SsTable;
#[command(author, version, about, long_about = None)]
enum Args {
Simple {
/// Dump the generated ID instead of where the original data comes from.
/// For example, if SST 1, 2, 3 is compacted to another level, it should have
/// a new SST ID 4, 5, 6 as SSTs are immutable and write-once. With this flag
/// enabled, you will see the new level has SST 1, 2, 3 because the data of
/// 4, 5, 6 are originated from 1, 2, 3.
#[clap(long)]
dump_real_id: bool,
/// Only dump size information instead of the layer files. if this is enabled,
/// it will print one row per compaction iteration.
#[clap(long)]
size_only: bool,
#[clap(long, default_value = "2")]
level0_file_num_compaction_trigger: usize,
#[clap(long, default_value = "3")]
@@ -31,8 +40,17 @@ enum Args {
iterations: usize,
},
Tiered {
/// Dump the generated ID instead of where the original data comes from.
/// For example, if SST 1, 2, 3 is compacted to another level, it should have
/// a new SST ID 4, 5, 6 as SSTs are immutable and write-once. With this flag
/// enabled, you will see the new level has SST 1, 2, 3 because the data of
/// 4, 5, 6 are originated from 1, 2, 3.
#[clap(long)]
dump_real_id: bool,
/// Only dump size information instead of the layer files. if this is enabled,
/// it will print one row per compaction iteration.
#[clap(long)]
size_only: bool,
#[clap(long, default_value = "3")]
num_tiers: usize,
#[clap(long, default_value = "200")]
@@ -41,12 +59,23 @@ enum Args {
size_ratio: usize,
#[clap(long, default_value = "2")]
min_merge_width: usize,
#[clap(long)]
max_merge_width: Option<usize>,
#[clap(long, default_value = "50")]
iterations: usize,
},
Leveled {
/// Dump the generated ID instead of where the original data comes from.
/// For example, if SST 1, 2, 3 is compacted to another level, it should have
/// a new SST ID 4, 5, 6 as SSTs are immutable and write-once. With this flag
/// enabled, you will see the new level has SST 1, 2, 3 because the data of
/// 4, 5, 6 are originated from 1, 2, 3.
#[clap(long)]
dump_real_id: bool,
/// Only dump size information instead of the layer files. if this is enabled,
/// it will print one row per compaction iteration.
#[clap(long)]
size_only: bool,
#[clap(long, default_value = "2")]
level0_file_num_compaction_trigger: usize,
#[clap(long, default_value = "2")]
@@ -148,6 +177,14 @@ impl MockStorage {
}
}
pub fn dump_size_only(&self) {
print!("Levels: {}", self.snapshot.l0_sstables.len());
for (_, files) in &self.snapshot.levels {
print!(" {}", files.len());
}
println!();
}
pub fn dump_original_id(&self, always_show_l0: bool, with_key: bool) {
if !self.snapshot.l0_sstables.is_empty() || always_show_l0 {
println!(
@@ -231,6 +268,7 @@ fn main() {
match args {
Args::Simple {
dump_real_id,
size_only,
size_ratio_percent,
iterations,
level0_file_num_compaction_trigger,
@@ -252,14 +290,18 @@ fn main() {
println!("=== Iteration {i} ===");
storage.flush_sst_to_l0();
println!("--- After Flush ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(true, false);
} else {
storage.dump_original_id(true, false);
}
let mut num_compactions = 0;
while let Some(task) = {
println!("--- Compaction Task ---");
if !size_only {
println!("--- Compaction Task ---");
}
controller.generate_compaction_task(&storage.snapshot)
} {
let mut sst_ids = Vec::new();
@@ -289,7 +331,9 @@ fn main() {
storage.snapshot = snapshot;
storage.remove(&del);
println!("--- After Compaction ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(true, false);
} else {
storage.dump_original_id(true, false);
@@ -333,10 +377,12 @@ fn main() {
}
Args::Tiered {
dump_real_id,
size_only,
num_tiers: level0_file_num_compaction_trigger,
max_size_amplification_percent,
size_ratio,
min_merge_width,
max_merge_width,
iterations,
} => {
let controller = TieredCompactionController::new(TieredCompactionOptions {
@@ -344,6 +390,7 @@ fn main() {
max_size_amplification_percent,
size_ratio,
min_merge_width,
max_merge_width,
});
let mut storage = MockStorage::new();
let mut max_space = 0;
@@ -351,15 +398,21 @@ fn main() {
println!("=== Iteration {i} ===");
storage.flush_sst_to_new_tier();
println!("--- After Flush ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(false, false);
} else {
storage.dump_original_id(false, false);
}
println!("--- Compaction Task ---");
if !size_only {
println!("--- Compaction Task ---");
}
let mut num_compactions = 0;
while let Some(task) = {
println!("--- Compaction Task ---");
if !size_only {
println!("--- Compaction Task ---");
}
controller.generate_compaction_task(&storage.snapshot)
} {
let mut sst_ids = Vec::new();
@@ -379,7 +432,9 @@ fn main() {
storage.snapshot = snapshot;
storage.remove(&del);
println!("--- After Compaction ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(false, false);
} else {
storage.dump_original_id(false, false);
@@ -423,6 +478,7 @@ fn main() {
}
Args::Leveled {
dump_real_id,
size_only,
level0_file_num_compaction_trigger,
level_size_multiplier,
max_levels,
@@ -456,14 +512,18 @@ fn main() {
)),
);
println!("--- After Flush ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(false, true);
} else {
storage.dump_original_id(false, true);
}
let mut num_compactions = 0;
while let Some(task) = {
println!("--- Compaction Task ---");
if !size_only {
println!("--- Compaction Task ---");
}
controller.generate_compaction_task(&storage.snapshot)
} {
let mut sst_ids = Vec::new();
@@ -570,7 +630,9 @@ fn main() {
storage.snapshot = snapshot;
storage.remove(&del);
println!("--- After Compaction ---");
if dump_real_id {
if size_only {
storage.dump_size_only();
} else if dump_real_id {
storage.dump_real_id(true, true);
} else {
storage.dump_original_id(true, true);

View File

@@ -334,6 +334,7 @@ fn main() -> Result<()> {
max_size_amplification_percent: 200,
size_ratio: 1,
min_merge_width: 2,
max_merge_width: None,
}),
CompactionStrategy::Leveled => {
CompactionOptions::Leveled(LeveledCompactionOptions {

View File

@@ -14,6 +14,7 @@ pub struct TieredCompactionOptions {
pub max_size_amplification_percent: usize,
pub size_ratio: usize,
pub min_merge_width: usize,
pub max_merge_width: Option<usize>,
}
pub struct TieredCompactionController {

View File

@@ -94,7 +94,7 @@ impl Bloom {
true
} else {
let nbits = self.filter.bit_len();
let delta = (h >> 17) | (h << 15);
let delta = h.rotate_left(15);
// TODO: probe the bloom filter