diff --git a/Cargo.lock b/Cargo.lock index c67d121..28cdde4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,54 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "anstream" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd2405b3ac1faab2990b74d728624cd9fd115651fcecc7c2d8daf01376275ba" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" version = "1.0.68" @@ -87,12 +135,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "cc" -version = "1.0.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" - [[package]] name = "cfg-if" version = "1.0.0" @@ -101,40 +143,49 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.0.32" +version = "4.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" +checksum = "80932e03c33999b9235edb8655bc9df3204adc9887c2f95b50cb1deb9fd54253" dependencies = [ - "bitflags", + "clap_builder", "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6c0db58c659eef1c73e444d298c27322a1b52f6927d2ad470c0c0f96fa7b8fa" +dependencies = [ + "anstream", + "anstyle", "clap_lex", - "is-terminal", - "once_cell", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.0.21" +version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] name = "clap_lex" -version = "0.3.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" -dependencies = [ - "os_str_bytes", -] +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "console" @@ -212,27 +263,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "error-chain" version = "0.12.4" @@ -292,28 +322,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "io-lifetimes" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "is-terminal" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" -dependencies = [ - "hermit-abi", - "io-lifetimes", - "rustix", - "windows-sys", -] - [[package]] name = "itoa" version = "1.0.5" @@ -341,12 +349,6 @@ version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" -[[package]] -name = "linux-raw-sys" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" - [[package]] name = "lock_api" version = "0.4.9" @@ -397,6 +399,7 @@ dependencies = [ "anyhow", "arc-swap", "bytes", + "clap", "crossbeam-epoch", "crossbeam-skiplist", "moka", @@ -491,15 +494,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6a252f1f8c11e84b3ab59d7a488e48e4478a93937e027076638c49536204639" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.42.0", ] -[[package]] -name = "os_str_bytes" -version = "6.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" - [[package]] name = "ouroboros" version = "0.15.5" @@ -520,7 +517,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -543,7 +540,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -555,7 +552,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "version_check", ] @@ -572,9 +569,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.49" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] @@ -608,9 +605,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -651,20 +648,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.36.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588" -dependencies = [ - "bitflags", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "ryu" version = "1.0.12" @@ -721,7 +704,7 @@ checksum = "255abe9a125a985c05190d687b320c12f9b1f0b99445e608c21ba0782c719ad8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -783,6 +766,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -803,15 +797,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - [[package]] name = "terminal_size" version = "0.1.17" @@ -839,7 +824,7 @@ checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -869,6 +854,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "uuid" version = "1.2.2" @@ -928,7 +919,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] @@ -950,7 +941,7 @@ checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1008,13 +999,37 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.0", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm 0.42.0", + "windows_x86_64_msvc 0.42.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -1023,38 +1038,80 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/README.md b/README.md index 8f23bc8..fb9e31d 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ We are working on a new version of the mini-lsm tutorial that is split into 3 we | 1.5 | Storage Engine - Read Path | ✅ | ✅ | ✅ | | 1.6 | Storage Engine - Write Path | ✅ | ✅ | ✅ | | 2.1 | Compaction Framework | ✅ | 🚧 | 🚧 | -| 2.2 | Compaction Strategy | 🚧 | | | -| 2.3 | Manifest | | | | -| 2.4 | Write-Ahead Log | | | | -| 2.5 | Bloom Filter and Key Compression | | | | +| 2.2 | Compaction Strategy - Tiered | 🚧 | | | +| 2.3 | Compaction Strategy - Leveled | 🚧 | | | +| 2.4 | Manifest | | | | +| 2.5 | Write-Ahead Log | | | | +| 2.6 | Bloom Filter and Key Compression | | | | | 3.1 | Timestamp Encoding + Prefix Bloom Filter | | | | | 3.2 | Snapshot Read | | | | | 3.3 | Watermark and Garbage Collection | | | | diff --git a/mini-lsm/Cargo.toml b/mini-lsm/Cargo.toml index 9916224..a008bde 100644 --- a/mini-lsm/Cargo.toml +++ b/mini-lsm/Cargo.toml @@ -17,6 +17,7 @@ crossbeam-skiplist = "0.1" parking_lot = "0.12" ouroboros = "0.15" moka = "0.9" +clap = { version = "4.4.17", features = ["derive"] } [dev-dependencies] tempfile = "3" diff --git a/mini-lsm/src/bin/compaction_simulator.rs b/mini-lsm/src/bin/compaction_simulator.rs new file mode 100644 index 0000000..54a3e1c --- /dev/null +++ b/mini-lsm/src/bin/compaction_simulator.rs @@ -0,0 +1,84 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use clap::Parser; +use mini_lsm::compact::TieredCompactionController; +use mini_lsm::lsm_storage::LsmStorageInner; +use mini_lsm::mem_table::MemTable; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +enum Args { + Tiered {}, + Leveled {}, +} + +pub struct MockStorage { + snapshot: LsmStorageInner, + next_sst_id: usize, + file_list: HashSet, +} + +impl MockStorage { + pub fn new() -> Self { + let snapshot = LsmStorageInner { + memtable: Arc::new(MemTable::create()), + imm_memtables: Vec::new(), + l0_sstables: Vec::new(), + levels: Vec::new(), + sstables: Default::default(), + }; + Self { + snapshot, + next_sst_id: 0, + file_list: Default::default(), + } + } + + fn generate_sst_id(&mut self) -> usize { + let id = self.next_sst_id; + self.next_sst_id += 1; + id + } + + pub fn flush_sst(&mut self) { + let id = self.generate_sst_id(); + self.snapshot.l0_sstables.push(id); + self.file_list.insert(id); + } + + pub fn remove(&mut self, files_to_remove: &[usize]) { + for file_id in files_to_remove { + self.file_list.remove(file_id); + } + } + + pub fn dump(&self) { + print!("L0: {:?}", self.snapshot.l0_sstables); + for (level, files) in &self.snapshot.levels { + print!("L{level}: {:?}", files); + } + } +} + +fn main() { + let args = Args::parse(); + match args { + Args::Tiered {} => { + let controller = TieredCompactionController {}; + let mut storage = MockStorage::new(); + for i in 0..500 { + println!("Iteration {i}"); + storage.flush_sst(); + let task = controller.generate_compaction_task(&storage.snapshot); + let sst_id = storage.generate_sst_id(); + let (snapshot, del) = + controller.apply_compaction_result(&storage.snapshot, &task, &[sst_id]); + storage.snapshot = snapshot; + storage.remove(&del); + storage.dump(); + } + } + Args::Leveled {} => {} + } +} diff --git a/mini-lsm/src/block/builder.rs b/mini-lsm/src/block/builder.rs index 1e73188..f69ec43 100644 --- a/mini-lsm/src/block/builder.rs +++ b/mini-lsm/src/block/builder.rs @@ -24,7 +24,7 @@ impl BlockBuilder { fn estimated_size(&self) -> usize { SIZEOF_U16 /* number of key-value pairs in the block */ + self.offsets.len() * SIZEOF_U16 /* offsets */ + self.data.len() - /* key-value pairs */ + // key-value pairs } /// Adds a key-value pair to the block. Returns false when the block is full. diff --git a/mini-lsm/src/compact.rs b/mini-lsm/src/compact.rs index eaaa78f..dc560cc 100644 --- a/mini-lsm/src/compact.rs +++ b/mini-lsm/src/compact.rs @@ -1,12 +1,21 @@ +mod leveled; +mod tiered; + use std::sync::Arc; use anyhow::Result; +pub use leveled::{LeveledCompactionController, LeveledCompactionTask}; +pub use tiered::{TieredCompactionController, TieredCompactionTask}; -use crate::{ - iterators::{merge_iterator::MergeIterator, StorageIterator}, - lsm_storage::LsmStorage, - table::{SsTable, SsTableBuilder, SsTableIterator}, -}; +use crate::iterators::merge_iterator::MergeIterator; +use crate::iterators::StorageIterator; +use crate::lsm_storage::LsmStorage; +use crate::table::{SsTable, SsTableBuilder, SsTableIterator}; + +pub enum CompactionTask { + Leveled(LeveledCompactionTask), + Tiered(TieredCompactionTask), +} struct CompactOptions { block_size: usize, diff --git a/mini-lsm/src/compact/leveled.rs b/mini-lsm/src/compact/leveled.rs new file mode 100644 index 0000000..539657e --- /dev/null +++ b/mini-lsm/src/compact/leveled.rs @@ -0,0 +1,25 @@ +use crate::lsm_storage::LsmStorageInner; + +pub struct LeveledCompactionTask { + upper_level: usize, + upper_level_sst_ids: Vec, + lower_level: usize, + lower_level_sst_ids: Vec, +} + +pub struct LeveledCompactionController {} + +impl LeveledCompactionController { + pub fn generate_compaction_task(&self, snapshot: &LsmStorageInner) -> LeveledCompactionTask { + unimplemented!() + } + + pub fn apply_compaction_result( + &self, + snapshot: &LsmStorageInner, + task: &LeveledCompactionTask, + output: &[usize], + ) -> (LsmStorageInner, Vec) { + unimplemented!() + } +} diff --git a/mini-lsm/src/compact/tiered.rs b/mini-lsm/src/compact/tiered.rs new file mode 100644 index 0000000..4b83aa3 --- /dev/null +++ b/mini-lsm/src/compact/tiered.rs @@ -0,0 +1,23 @@ +use crate::lsm_storage::LsmStorageInner; +use crate::table::SsTable; + +pub struct TieredCompactionTask { + tiers: Vec, +} + +pub struct TieredCompactionController {} + +impl TieredCompactionController { + pub fn generate_compaction_task(&self, snapshot: &LsmStorageInner) -> TieredCompactionTask { + return TieredCompactionTask { tiers: Vec::new() }; + } + + pub fn apply_compaction_result( + &self, + snapshot: &LsmStorageInner, + task: &TieredCompactionTask, + output: &[usize], + ) -> (LsmStorageInner, Vec) { + (snapshot.clone(), Vec::new()) + } +} diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs index 2912bec..b51e14b 100644 --- a/mini-lsm/src/lib.rs +++ b/mini-lsm/src/lib.rs @@ -1,5 +1,5 @@ pub mod block; -mod compact; +pub mod compact; pub mod iterators; pub mod lsm_iterator; pub mod lsm_storage; diff --git a/mini-lsm/src/lsm_storage.rs b/mini-lsm/src/lsm_storage.rs index 538234d..ebd2ca9 100644 --- a/mini-lsm/src/lsm_storage.rs +++ b/mini-lsm/src/lsm_storage.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::ops::Bound; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicUsize; @@ -20,14 +21,17 @@ pub type BlockCache = moka::sync::Cache<(usize, usize), Arc>; #[derive(Clone)] pub struct LsmStorageInner { /// The current memtable. - memtable: Arc, + pub memtable: Arc, /// Immutable memTables, from earliest to latest. - imm_memtables: Vec>, + pub imm_memtables: Vec>, /// L0 SsTables, from earliest to latest. - l0_sstables: Vec>, - /// L1 - L6 SsTables, sorted by key range. + pub l0_sstables: Vec, + /// SsTables sorted by key range; L1 - L6 for leveled compaction, or tiers for tiered + /// compaction. #[allow(dead_code)] - levels: Vec>>, + pub levels: Vec<(usize, Vec)>, + /// SsTable objects. + pub sstables: HashMap>, } impl LsmStorageInner { @@ -37,6 +41,7 @@ impl LsmStorageInner { imm_memtables: vec![], l0_sstables: vec![], levels: vec![], + sstables: Default::default(), } } } @@ -94,7 +99,7 @@ impl LsmStorage { let mut iters = Vec::with_capacity(snapshot.l0_sstables.len()); for table in snapshot.l0_sstables.iter().rev() { iters.push(Box::new(SsTableIterator::create_and_seek_to_key( - table.clone(), + snapshot.sstables[table].clone(), key, )?)); } @@ -173,7 +178,8 @@ impl LsmStorage { // Remove the memtable from the immutable memtables. snapshot.imm_memtables.pop(); // Add L0 table - snapshot.l0_sstables.push(sst); + snapshot.l0_sstables.push(sst_id); + snapshot.sstables.insert(sst_id, sst); // Update the snapshot. *guard = Arc::new(snapshot); } @@ -200,19 +206,18 @@ impl LsmStorage { let memtable_iter = MergeIterator::create(memtable_iters); let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len()); - for table in snapshot.l0_sstables.iter().rev() { + for table_id in snapshot.l0_sstables.iter().rev() { + let table = snapshot.sstables[table_id].clone(); let iter = match lower { - Bound::Included(key) => { - SsTableIterator::create_and_seek_to_key(table.clone(), key)? - } + Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?, Bound::Excluded(key) => { - let mut iter = SsTableIterator::create_and_seek_to_key(table.clone(), key)?; + let mut iter = SsTableIterator::create_and_seek_to_key(table, key)?; if iter.is_valid() && iter.key() == key { iter.next()?; } iter } - Bound::Unbounded => SsTableIterator::create_and_seek_to_first(table.clone())?, + Bound::Unbounded => SsTableIterator::create_and_seek_to_first(table)?, }; table_iters.push(Box::new(iter)); diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs index 05a3be6..831c65b 100644 --- a/mini-lsm/src/table.rs +++ b/mini-lsm/src/table.rs @@ -33,7 +33,8 @@ impl BlockMeta { // The size of actual key estimated_size += meta.first_key.len(); } - // Reserve the space to improve performance, especially when the size of incoming data is large + // Reserve the space to improve performance, especially when the size of incoming data is + // large buf.reserve(estimated_size); let original_len = buf.len(); for meta in block_meta {