diff --git a/README.md b/README.md index eae2e4d..2460142 100644 --- a/README.md +++ b/README.md @@ -55,12 +55,12 @@ We are working on a new version of the mini-lsm tutorial that is split into 3 we | 1.5 | Storage Engine - Read Path | ✅ | ✅ | ✅ | | 1.6 | Storage Engine - Write Path | ✅ | ✅ | ✅ | | 1.7 | Bloom Filter and Key Compression | ✅ | ✅ | ✅ | -| 2.1 | Compaction Implementation | ✅ | 🚧 | 🚧 | +| 2.1 | Compaction Implementation | ✅ | ✅ | ✅ | | 2.2 | Compaction Strategy - Simple | ✅ | 🚧 | 🚧 | -| 2.3 | Compaction Strategy - Tiered | ✅ | 🚧 | | -| 2.4 | Compaction Strategy - Leveled | ✅ | 🚧 | | -| 2.5 | Manifest | ✅ | 🚧 | | -| 2.6 | Write-Ahead Log | ✅ | 🚧 | | +| 2.3 | Compaction Strategy - Tiered | ✅ | 🚧 | 🚧 | +| 2.4 | Compaction Strategy - Leveled | ✅ | 🚧 | 🚧 | +| 2.5 | Manifest | ✅ | 🚧 | 🚧 | +| 2.6 | Write-Ahead Log | ✅ | 🚧 | 🚧 | | 2.7 | Batch Write + Checksum | | | | | 3.1 | Timestamp Key Encoding + New Block Format | | | | | 3.2 | Prefix Bloom Filter | | | | diff --git a/mini-lsm-book/custom.css b/mini-lsm-book/custom.css index 8792ecb..f1feffb 100644 --- a/mini-lsm-book/custom.css +++ b/mini-lsm-book/custom.css @@ -2,4 +2,10 @@ margin-left: auto; margin-right: auto; display: block; +} + +.caption { + text-align: center; + font-size: smaller; + color: gray; } \ No newline at end of file diff --git a/mini-lsm-book/src/lsm-tutorial/00-full-overview.svg b/mini-lsm-book/src/lsm-tutorial/00-full-overview.svg index 18fbce5..3ecd82b 100644 --- a/mini-lsm-book/src/lsm-tutorial/00-full-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/00-full-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + 00-full-overview diff --git a/mini-lsm-book/src/lsm-tutorial/00-lsm-read-flow.svg b/mini-lsm-book/src/lsm-tutorial/00-lsm-read-flow.svg index 21ac95a..53ac57c 100644 --- a/mini-lsm-book/src/lsm-tutorial/00-lsm-read-flow.svg +++ b/mini-lsm-book/src/lsm-tutorial/00-lsm-read-flow.svg @@ -1,6 +1,6 @@ - + @@ -18,7 +18,7 @@ - + 00-lsm-read-flow diff --git a/mini-lsm-book/src/lsm-tutorial/00-lsm-write-flow.svg b/mini-lsm-book/src/lsm-tutorial/00-lsm-write-flow.svg index 8bb95d2..4820e5a 100644 --- a/mini-lsm-book/src/lsm-tutorial/00-lsm-write-flow.svg +++ b/mini-lsm-book/src/lsm-tutorial/00-lsm-write-flow.svg @@ -1,6 +1,6 @@ - + @@ -13,7 +13,7 @@ - + 00-lsm-write-flow diff --git a/mini-lsm-book/src/lsm-tutorial/week1-01-frozen.svg b/mini-lsm-book/src/lsm-tutorial/week1-01-frozen.svg index 99f4434..e34dacd 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-01-frozen.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-01-frozen.svg @@ -1,6 +1,6 @@ - + @@ -13,7 +13,7 @@ - + week1-01-frozen diff --git a/mini-lsm-book/src/lsm-tutorial/week1-01-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-01-overview.svg index 8ed4b03..6b23672 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-01-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-01-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-01-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-01-single.svg b/mini-lsm-book/src/lsm-tutorial/week1-01-single.svg index f19b12c..23308dd 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-01-single.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-01-single.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-01-single diff --git a/mini-lsm-book/src/lsm-tutorial/week1-02-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-02-overview.svg index ed2c9e5..4ffddfc 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-02-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-02-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-02-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-03-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-03-overview.svg index 3d8116f..6bc9d54 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-03-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-03-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-03-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-04-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-04-overview.svg index af84287..d9f1680 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-04-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-04-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-04-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-05-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-05-overview.svg index 8e86d6c..a51f9e2 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-05-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-05-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-05-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-07-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-07-overview.svg index 62abf63..eac92c9 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-07-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-07-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week1-07-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week1-overview.svg b/mini-lsm-book/src/lsm-tutorial/week1-overview.svg index 2a7995e..a6caf06 100644 --- a/mini-lsm-book/src/lsm-tutorial/week1-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week1-overview.svg @@ -1,6 +1,6 @@ - + @@ -13,7 +13,7 @@ - + week1-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week2-00-leveled.svg b/mini-lsm-book/src/lsm-tutorial/week2-00-leveled.svg new file mode 100644 index 0000000..fbc06d7 --- /dev/null +++ b/mini-lsm-book/src/lsm-tutorial/week2-00-leveled.svg @@ -0,0 +1,141 @@ + + + + + + + + + + + + week2-00-leveled + + + Layer 1 + + + + + + + + + SST + + + + + L0 + + + + + + + SST + + + + + L1 + + + + + + + SST + + + + + + + SST + + + + + L2 + + + + + + + SST + + + + + + + SST + + + + + + + + + + SST + + + + + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + + + + Leveled Compaction + + + + + L1 + + + + + + + SST + + + + + + + SST + + + + + diff --git a/mini-lsm-book/src/lsm-tutorial/week2-00-tiered.svg b/mini-lsm-book/src/lsm-tutorial/week2-00-tiered.svg new file mode 100644 index 0000000..e9d63be --- /dev/null +++ b/mini-lsm-book/src/lsm-tutorial/week2-00-tiered.svg @@ -0,0 +1,113 @@ + + + + + + + + + + + + week2-00-tiered + + + Layer 1 + + + + + + + + + + + + + SST + + + + + Tier 2 + + + + + + + SST + + + + + Tier 1 + + + + + + + SST + + + + + + + SST + + + + + Tier 0 + + + + + + + SST + + + + + + + SST + + + + + + + + + + + + + SST + + + + + Tier 3 + + + + + + + SST + + + + + Tiered Compaction + + + + + diff --git a/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-1.svg b/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-1.svg new file mode 100644 index 0000000..629496e --- /dev/null +++ b/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-1.svg @@ -0,0 +1,68 @@ + + + + + + + + + + + + week2-00-two-extremes-1 + + + Layer 1 + + + + + SST + + + + + L0 + + + + + + + SST + + + + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + diff --git a/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-2.svg b/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-2.svg new file mode 100644 index 0000000..a7a0dc0 --- /dev/null +++ b/mini-lsm-book/src/lsm-tutorial/week2-00-two-extremes-2.svg @@ -0,0 +1,122 @@ + + + + + + + + + + + + week2-00-two-extremes-2 + + + Layer 1 + + + + + + + + + + + + + SST + + + + + L0 + + + + + + + + + + SST + + + + + L1 + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + L1 + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + + + SST + + + + + + + + Full Compaction + + + + + diff --git a/mini-lsm-book/src/lsm-tutorial/week2-01-overview.svg b/mini-lsm-book/src/lsm-tutorial/week2-01-overview.svg index 1bfa71d..3d294b5 100644 --- a/mini-lsm-book/src/lsm-tutorial/week2-01-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week2-01-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week2-01-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week2-05-overview.svg b/mini-lsm-book/src/lsm-tutorial/week2-05-overview.svg index cbc9c3b..d8f942f 100644 --- a/mini-lsm-book/src/lsm-tutorial/week2-05-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week2-05-overview.svg @@ -1,6 +1,6 @@ - + @@ -13,7 +13,7 @@ - + week2-05-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week2-06-overview.svg b/mini-lsm-book/src/lsm-tutorial/week2-06-overview.svg index 3cc1679..8fdd0ec 100644 --- a/mini-lsm-book/src/lsm-tutorial/week2-06-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week2-06-overview.svg @@ -1,6 +1,6 @@ - + @@ -8,7 +8,7 @@ - + week2-06-overview diff --git a/mini-lsm-book/src/lsm-tutorial/week2-overview.svg b/mini-lsm-book/src/lsm-tutorial/week2-overview.svg index dcb0560..e7cd021 100644 --- a/mini-lsm-book/src/lsm-tutorial/week2-overview.svg +++ b/mini-lsm-book/src/lsm-tutorial/week2-overview.svg @@ -1,6 +1,6 @@ - + @@ -13,7 +13,7 @@ - + week2-overview diff --git a/mini-lsm-book/src/week2-01-compaction.md b/mini-lsm-book/src/week2-01-compaction.md index ab5cb66..59d2c0a 100644 --- a/mini-lsm-book/src/week2-01-compaction.md +++ b/mini-lsm-book/src/week2-01-compaction.md @@ -39,7 +39,7 @@ fn force_full_compaction(&self) { let state_lock = self.state_lock.lock(); let state = self.state.write(); state.l0_sstables.remove(/* the ones being compacted */); - state.levels[0] = new_ssts; + state.levels[0] = new_ssts; // new SSTs added to L1 }; std::fs::remove(ssts_to_compact)?; } @@ -51,7 +51,7 @@ In your compaction implementation, you only need to handle `FullCompaction` for Because we always compact all SSTs, if we find multiple version of a key, we can simply retain the latest one. If the latest version is a delete marker, we do not need to keep it in the produced SST files. This does not apply for the compaction strategies in the next few chapters. -There are some niches that you might need to think about. +There are some things that you might need to think about. * How does your implementation handle L0 flush in par with compaction? (Not taking the state lock when doing the compaction, and also need to consider new L0 files produced when compaction is going on.) * If your implementation removes the original SST files immediately after the compaction completes, will it cause problems in your system? (Generally no on macOS/Linux because the OS will not actually remove the file until no file handle is being held.) @@ -89,10 +89,10 @@ You will need to implement `num_active_iterators` for concat iterator so that th * What are the definitions of read/write/space amplifications? (This is covered in the overview chapter) * What are the ways to accurately compute the read/write/space amplifications, and what are the ways to estimate them? * Is it correct that a key will take some storage space even if a user requests to delete it? -* Given that compaction takes a lot of write bandwidth and read bandwidth and may interfere with foreground operations, it is a good idea to postpone compaction when there are large write flow. It is even beneficial to stop/pause existing compaction tasks in this situation. What do you think of this idea? (Read the Slik paper!) +* Given that compaction takes a lot of write bandwidth and read bandwidth and may interfere with foreground operations, it is a good idea to postpone compaction when there are large write flow. It is even beneficial to stop/pause existing compaction tasks in this situation. What do you think of this idea? (Read the [Silk](https://www.usenix.org/conference/atc19/presentation/balmau) paper!) * Is it a good idea to use/fill the block cache for compactions? Or is it better to fully bypass the block cache when compaction? * Does it make sense to have a `struct ConcatIterator` in the system? -* Some researchers/engineers propose to offload compaction to a remote server or a serverless lambda function. What are the benefits, and what might be the potential challenges and performance impacts of doing remote compaction? (Think of the point when a compaction completes and the block cache...) +* Some researchers/engineers propose to offload compaction to a remote server or a serverless lambda function. What are the benefits, and what might be the potential challenges and performance impacts of doing remote compaction? (Think of the point when a compaction completes and what happens to the block cache on the next read request...) We do not provide reference answers to the questions, and feel free to discuss about them in the Discord community. diff --git a/mini-lsm-book/src/week2-02-simple.md b/mini-lsm-book/src/week2-02-simple.md index 7abc93a..af9659a 100644 --- a/mini-lsm-book/src/week2-02-simple.md +++ b/mini-lsm-book/src/week2-02-simple.md @@ -17,8 +17,9 @@ In this chapter, you will: * Is it correct that a key will only be purged from the LSM tree if the user requests to delete it and it has been compacted in the bottom-most level? * Is it a good strategy to periodically do a full compaction on the LSM tree? Why or why not? -* Actively choosing some old files/levels to compact even if they do not violate the level amplifier would be a good choice, is it true? (Look at the Lethe paper!) +* Actively choosing some old files/levels to compact even if they do not violate the level amplifier would be a good choice, is it true? (Look at the [Lethe](https://disc-projects.bu.edu/lethe/) paper!) * If the storage device can achieve a sustainable 1GB/s write throughput and the write amplification of the LSM tree is 10x, how much throughput can the user get from the LSM key-value interfaces? +* Can you merge L1 and L3 directly if there are SST files in L2? Does it still produce correct result? * What is your favorite boba shop in your city? (If you answered yes in week 1 day 3...) We do not provide reference answers to the questions, and feel free to discuss about them in the Discord community. diff --git a/mini-lsm-book/src/week2-overview.md b/mini-lsm-book/src/week2-overview.md index 2d6d221..d1adc55 100644 --- a/mini-lsm-book/src/week2-overview.md +++ b/mini-lsm-book/src/week2-overview.md @@ -45,6 +45,14 @@ So from the above example, we have 2 naive ways of handling the LSM structure -- Compaction is a time-consuming operation. It will need to read all data from some files, and write the same amount of files to the disk. This operation takes a lot of CPU resources and I/O resources. Not doing compactions at all leads to high read amplification, but it does not need to write new files. Always doing full compaction reduces the read amplification, but it will need to constantly rewrite the files on the disk. +![no compaction](./lsm-tutorial/week2-00-two-extremes-1.svg) + +

No Compaction at All

+ +![always full compaction](./lsm-tutorial/week2-00-two-extremes-2.svg) + +

Always compact when new SST being flushed

+ The ratio of memtables flushed to the disk versus total data written to the disk is write amplification. That is to say, no compaction has a write amplification ratio of 1x, because once the SSTs are flushed to the disk, they will stay there. Always doing compaction has a very high write amplification. If we do a full compaction every time we get an SST, the data written to the disk will be quadratic to the number of SSTs flushed. For example, if we flushed 100 SSTs to the disk, we will do compactions of 2 files, 3 files, ..., 100 files, where the actual total amount of data we wrote to the disk is about 5000 SSTs. The write amplification after writing 100 SSTs in this cause would be 50x. A good compaction strategy can balance read amplification, write amplification, and space amplification (we will talk about it soon). In a general-purpose LSM storage engine, it is generally impossible to find a strategy that can achieve the lowest amplification in all 3 of these factors, unless there are some specific data pattern that the engine could use. The good thing about LSM is that we can theoretically analyze the amplifications of a compaction strategy and all these things happen in the background. We can choose compaction strategies and dynamically change some parameters of them to adjust our storage engine to the optimal state. Compaction strategies are all about tradeoffs, and LSM-based storage engine enables us to select what to be traded at runtime. @@ -57,10 +65,14 @@ If the workload is like a time-series database, it is possible that the user alw Compaction strategies usually aim to control the number of sorted runs, so as to keep read amplification in a reasonable amount of number. There are generally two categories of compaction strategies: leveled and tiered. -In leveled compaction, the user can specify a maximum number of levels, which is the number of sorted runs in the system (except L0). For example, RocksDB usually keeps 6 levels (sorted runs) in leveled compaction mode. During the compaction process, SSTs from two adjacent levels will be merged and then the produced SSTs will be put to the lower level of the two levels. Therefore, you will usually see a small sorted run merged with a large sorted run in leveled compaction. The sorted runs (levels) grow exponentially in size -- the lower level will be < some number x > of the upper level in size. +In leveled compaction, the user can specify a maximum number of levels, which is the number of sorted runs in the system (except L0). For example, RocksDB usually keeps 6 levels (sorted runs) in leveled compaction mode. During the compaction process, SSTs from two adjacent levels will be merged and then the produced SSTs will be put to the lower level of the two levels. Therefore, you will usually see a small sorted run merged with a large sorted run in leveled compaction. The sorted runs (levels) grow exponentially in size -- the lower level will be `` of the upper level in size. + +![leveled compaction](./lsm-tutorial/week2-00-leveled.svg) In tiered compaction, the engine will dynamically adjust the number of sorted runs by merging them or letting new SSTs flushed as new sorted run (a tier) to minimize write amplification. In this strategy, you will usually see the engine merge two equally-sized sorted runs. The number of tiers can be high if the compaction strategy does not choose to merge tiers, therefore making read amplification high. In this tutorial, we will implement RocksDB's universal compaction, which is a kind of tiered compaction strategy. +![tiered compaction](./lsm-tutorial/week2-00-tiered.svg) + ## Space Amplification The most intuitive way to compute space amplification is to divide the actual space used by the LSM engine by the user space usage (i.e., database size, number of rows in the database, etc.) . The engine will need to store delete tombstones, and sometimes multiple version of the same key if compaction is not happening frequently enough, therefore causing space amplification. diff --git a/mini-lsm/src/tests/harness.rs b/mini-lsm/src/tests/harness.rs index c1a6c9c..6b4f4a9 100644 --- a/mini-lsm/src/tests/harness.rs +++ b/mini-lsm/src/tests/harness.rs @@ -5,7 +5,7 @@ use bytes::Bytes; use crate::{ iterators::StorageIterator, - lsm_storage::BlockCache, + lsm_storage::{BlockCache, LsmStorageInner}, table::{SsTable, SsTableBuilder}, }; @@ -123,3 +123,11 @@ pub fn generate_sst( } builder.build(id, block_cache, path.as_ref()).unwrap() } + + +pub fn sync(storage: &LsmStorageInner) { + storage + .force_freeze_memtable(&storage.state_lock.lock()) + .unwrap(); + storage.force_flush_next_imm_memtable().unwrap(); +} \ No newline at end of file diff --git a/mini-lsm/src/tests/week1_day6.rs b/mini-lsm/src/tests/week1_day6.rs index d196daf..6b63faa 100644 --- a/mini-lsm/src/tests/week1_day6.rs +++ b/mini-lsm/src/tests/week1_day6.rs @@ -3,7 +3,7 @@ use std::{ops::Bound, time::Duration}; use bytes::Bytes; use tempfile::tempdir; -use self::harness::check_iter_result; +use self::harness::{check_iter_result, sync}; use super::*; use crate::{ @@ -11,12 +11,6 @@ use crate::{ lsm_storage::{LsmStorageInner, LsmStorageOptions, MiniLsm}, }; -fn sync(storage: &LsmStorageInner) { - storage - .force_freeze_memtable(&storage.state_lock.lock()) - .unwrap(); - storage.force_flush_next_imm_memtable().unwrap(); -} #[test] fn test_task1_storage_scan() { diff --git a/mini-lsm/src/tests/week2_day1.rs b/mini-lsm/src/tests/week2_day1.rs index 7d70af1..b64a37b 100644 --- a/mini-lsm/src/tests/week2_day1.rs +++ b/mini-lsm/src/tests/week2_day1.rs @@ -1,18 +1,143 @@ -use std::ops::Bound; +use std::{ops::Bound, path::Path, sync::Arc}; use bytes::Bytes; use tempfile::tempdir; +use week2_day1::harness::sync; use self::harness::check_iter_result; use super::*; -use crate::lsm_storage::{LsmStorageInner, LsmStorageOptions}; +use crate::{ + iterators::{ + concat_iterator::SstConcatIterator, merge_iterator::MergeIterator, StorageIterator, + }, + lsm_storage::{LsmStorageInner, LsmStorageOptions, LsmStorageState}, + table::{SsTable, SsTableBuilder, SsTableIterator}, +}; -fn sync(storage: &LsmStorageInner) { - storage - .force_freeze_memtable(&storage.state_lock.lock()) +fn construct_merge_iterator_over_storage( + state: &LsmStorageState, +) -> MergeIterator { + let mut iters = Vec::new(); + for t in &state.l0_sstables { + iters.push(Box::new( + SsTableIterator::create_and_seek_to_first(state.sstables.get(t).cloned().unwrap()) + .unwrap(), + )); + } + for (_, files) in &state.levels { + for f in files { + iters.push(Box::new( + SsTableIterator::create_and_seek_to_first(state.sstables.get(f).cloned().unwrap()) + .unwrap(), + )); + } + } + MergeIterator::create(iters) +} + +#[test] +fn test_task1_full_compaction() { + let dir = tempdir().unwrap(); + let storage = LsmStorageInner::open(&dir, LsmStorageOptions::default_for_week1_test()).unwrap(); + storage.put(b"0", b"v1").unwrap(); + sync(&storage); + storage.put(b"0", b"v2").unwrap(); + storage.put(b"1", b"v2").unwrap(); + storage.put(b"2", b"v2").unwrap(); + sync(&storage); + storage.delete(b"0").unwrap(); + storage.delete(b"2").unwrap(); + sync(&storage); + assert_eq!(storage.state.read().l0_sstables.len(), 3); + let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); + check_iter_result( + &mut iter, + vec![ + (Bytes::from_static(b"0"), Bytes::from_static(b"")), + (Bytes::from_static(b"1"), Bytes::from_static(b"v2")), + (Bytes::from_static(b"2"), Bytes::from_static(b"")), + ], + ); + storage.force_full_compaction().unwrap(); + assert!(storage.state.read().l0_sstables.is_empty()); + let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); + check_iter_result( + &mut iter, + vec![(Bytes::from_static(b"1"), Bytes::from_static(b"v2"))], + ); + storage.put(b"0", b"v3").unwrap(); + storage.put(b"2", b"v3").unwrap(); + sync(&storage); + storage.delete(b"1").unwrap(); + sync(&storage); + let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); + check_iter_result( + &mut iter, + vec![ + (Bytes::from_static(b"0"), Bytes::from_static(b"v3")), + (Bytes::from_static(b"1"), Bytes::from_static(b"")), + (Bytes::from_static(b"2"), Bytes::from_static(b"v3")), + ], + ); + storage.force_full_compaction().unwrap(); + assert!(storage.state.read().l0_sstables.is_empty()); + let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); + check_iter_result( + &mut iter, + vec![ + (Bytes::from_static(b"0"), Bytes::from_static(b"v3")), + (Bytes::from_static(b"2"), Bytes::from_static(b"v3")), + ], + ); +} + +fn generate_concat_sst( + start_key: usize, + end_key: usize, + dir: impl AsRef, + id: usize, +) -> SsTable { + let mut builder = SsTableBuilder::new(128); + for idx in start_key..end_key { + let key = format!("{:05}", idx); + builder.add(key.as_bytes(), b"test"); + } + let path = dir.as_ref().join(format!("{id}.sst")); + builder.build_for_test(path).unwrap() +} + +#[test] +fn test_task2_concat_iterator() { + let dir = tempdir().unwrap(); + let mut sstables = Vec::new(); + for i in 1..=10 { + sstables.push(Arc::new(generate_concat_sst( + i * 10, + (i + 1) * 10, + dir.path(), + i, + ))); + } + for key in 0..120 { + let iter = SstConcatIterator::create_and_seek_to_key( + sstables.clone(), + format!("{:05}", key).as_bytes(), + ) .unwrap(); - storage.force_flush_next_imm_memtable().unwrap(); + if key < 10 { + assert!(iter.is_valid()); + assert_eq!(iter.key(), b"00010"); + } else if key >= 110 { + assert!(!iter.is_valid()); + } else { + assert!(iter.is_valid()); + assert_eq!(iter.key(), format!("{:05}", key).as_bytes()); + } + } + let iter = SstConcatIterator::create_and_seek_to_first(sstables.clone()).unwrap(); + assert!(iter.is_valid()); + assert_eq!(iter.key(), b"00010"); } #[test] @@ -38,11 +163,11 @@ fn test_task3_integration() { storage.put(b"00", b"2333").unwrap(); storage.put(b"3", b"23333").unwrap(); storage.delete(b"1").unwrap(); - // sync(&storage); - // storage.force_full_compaction().unwrap(); + sync(&storage); + storage.force_full_compaction().unwrap(); - // assert!(storage.state.read().l0_sstables.is_empty()); - // assert!(!storage.state.read().levels[0].1.is_empty()); + assert!(storage.state.read().l0_sstables.is_empty()); + assert!(!storage.state.read().levels[0].1.is_empty()); check_iter_result( &mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),