From a3a92359e182b1a24bdd920e161e751930df7a7a Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Thu, 25 Jan 2024 10:59:08 +0800 Subject: [PATCH] add key abstraction and prepare for MVCC (#28) * add key abstraction and prepare for MVCC Signed-off-by: Alex Chi * a little bit type exercise Signed-off-by: Alex Chi * refactor tests Signed-off-by: Alex Chi * fix clippy warnings Signed-off-by: Alex Chi * refactor starter code Signed-off-by: Alex Chi * final touch docs Signed-off-by: Alex Chi --------- Signed-off-by: Alex Chi --- mini-lsm-book/src/00-get-started.md | 2 + mini-lsm-book/src/sitemap.xml | 64 +++---- mini-lsm-book/src/week1-02-merge-iterator.md | 4 +- .../src/bin/compaction-simulator.rs | 63 ++++--- mini-lsm-starter/src/block/builder.rs | 15 +- mini-lsm-starter/src/block/iterator.rs | 15 +- mini-lsm-starter/src/iterators.rs | 6 +- .../src/iterators/concat_iterator.rs | 11 +- .../src/iterators/merge_iterator.rs | 12 +- .../src/iterators/two_merge_iterator.rs | 18 +- mini-lsm-starter/src/key.rs | 159 ++++++++++++++++++ mini-lsm-starter/src/lib.rs | 1 + mini-lsm-starter/src/lsm_iterator.rs | 6 +- mini-lsm-starter/src/mem_table.rs | 5 +- mini-lsm-starter/src/table.rs | 24 ++- mini-lsm-starter/src/table/builder.rs | 4 +- mini-lsm-starter/src/table/iterator.rs | 10 +- mini-lsm-starter/src/tests.rs | 3 +- mini-lsm-starter/src/tests/.gitkeep | 0 mini-lsm/src/block/builder.rs | 18 +- mini-lsm/src/block/iterator.rs | 33 ++-- mini-lsm/src/compact.rs | 3 +- mini-lsm/src/iterators.rs | 6 +- mini-lsm/src/iterators/concat_iterator.rs | 13 +- mini-lsm/src/iterators/merge_iterator.rs | 12 +- mini-lsm/src/iterators/two_merge_iterator.rs | 18 +- mini-lsm/src/key.rs | 159 ++++++++++++++++++ mini-lsm/src/lib.rs | 1 + mini-lsm/src/lsm_iterator.rs | 12 +- mini-lsm/src/lsm_storage.rs | 68 +++++--- mini-lsm/src/mem_table.rs | 9 +- mini-lsm/src/table.rs | 36 ++-- mini-lsm/src/table/builder.rs | 29 ++-- mini-lsm/src/table/iterator.rs | 11 +- mini-lsm/src/tests/harness.rs | 39 ++++- mini-lsm/src/tests/week1_day2.rs | 35 ++-- mini-lsm/src/tests/week1_day3.rs | 53 +++--- mini-lsm/src/tests/week1_day4.rs | 58 ++++--- mini-lsm/src/tests/week1_day5.rs | 22 +-- mini-lsm/src/tests/week1_day6.rs | 8 +- mini-lsm/src/tests/week1_day7.rs | 9 +- mini-lsm/src/tests/week2_day1.rs | 31 ++-- 42 files changed, 824 insertions(+), 281 deletions(-) create mode 100644 mini-lsm-starter/src/key.rs create mode 100644 mini-lsm-starter/src/tests/.gitkeep create mode 100644 mini-lsm/src/key.rs diff --git a/mini-lsm-book/src/00-get-started.md b/mini-lsm-book/src/00-get-started.md index 82f2ec1..8e7531e 100644 --- a/mini-lsm-book/src/00-get-started.md +++ b/mini-lsm-book/src/00-get-started.md @@ -21,6 +21,8 @@ code . ## Install Tools +You will need the latest stable Rust to compile this project. The minimum requirement is `1.74`. + ``` cargo x install-tools ``` diff --git a/mini-lsm-book/src/sitemap.xml b/mini-lsm-book/src/sitemap.xml index 6e6539a..3200ba1 100644 --- a/mini-lsm-book/src/sitemap.xml +++ b/mini-lsm-book/src/sitemap.xml @@ -2,130 +2,130 @@ https://skyzh.github.io/mini-lsm - 2024-01-21T13:41:44.657Z + 2024-01-25T02:56:28.231Z https://skyzh.github.io/mini-lsm/00-get-started - 2024-01-21T13:41:44.659Z + 2024-01-25T02:56:28.234Z https://skyzh.github.io/mini-lsm/00-overview - 2024-01-21T13:41:44.658Z + 2024-01-25T02:56:28.232Z https://skyzh.github.io/mini-lsm/00-preface - 2024-01-21T13:41:44.656Z + 2024-01-25T02:56:28.230Z https://skyzh.github.io/mini-lsm/00-v1 - 2024-01-21T13:41:44.677Z + 2024-01-25T02:56:28.256Z https://skyzh.github.io/mini-lsm/01-block - 2024-01-21T13:41:44.678Z + 2024-01-25T02:56:28.257Z https://skyzh.github.io/mini-lsm/02-sst - 2024-01-21T13:41:44.679Z + 2024-01-25T02:56:28.258Z https://skyzh.github.io/mini-lsm/03-memtable - 2024-01-21T13:41:44.680Z + 2024-01-25T02:56:28.259Z https://skyzh.github.io/mini-lsm/04-engine - 2024-01-21T13:41:44.681Z + 2024-01-25T02:56:28.260Z https://skyzh.github.io/mini-lsm/05-compaction - 2024-01-21T13:41:44.682Z + 2024-01-25T02:56:28.261Z https://skyzh.github.io/mini-lsm/06-recovery - 2024-01-21T13:41:44.682Z + 2024-01-25T02:56:28.262Z https://skyzh.github.io/mini-lsm/07-bloom-filter - 2024-01-21T13:41:44.683Z + 2024-01-25T02:56:28.263Z https://skyzh.github.io/mini-lsm/08-key-compression - 2024-01-21T13:41:44.684Z + 2024-01-25T02:56:28.264Z https://skyzh.github.io/mini-lsm/09-whats-next - 2024-01-21T13:41:44.685Z + 2024-01-25T02:56:28.265Z https://skyzh.github.io/mini-lsm/week1-01-memtable - 2024-01-21T13:41:44.661Z + 2024-01-25T02:56:28.237Z https://skyzh.github.io/mini-lsm/week1-02-merge-iterator - 2024-01-21T13:41:44.662Z + 2024-01-25T02:56:28.238Z https://skyzh.github.io/mini-lsm/week1-03-block - 2024-01-21T13:41:44.663Z + 2024-01-25T02:56:28.239Z https://skyzh.github.io/mini-lsm/week1-04-sst - 2024-01-21T13:41:44.664Z + 2024-01-25T02:56:28.240Z https://skyzh.github.io/mini-lsm/week1-05-read-path - 2024-01-21T13:41:44.665Z + 2024-01-25T02:56:28.242Z https://skyzh.github.io/mini-lsm/week1-06-write-path - 2024-01-21T13:41:44.666Z + 2024-01-25T02:56:28.243Z https://skyzh.github.io/mini-lsm/week1-07-sst-optimizations - 2024-01-21T13:41:44.668Z + 2024-01-25T02:56:28.244Z https://skyzh.github.io/mini-lsm/week1-overview - 2024-01-21T13:41:44.660Z + 2024-01-25T02:56:28.235Z https://skyzh.github.io/mini-lsm/week2-01-compaction - 2024-01-21T13:41:44.669Z + 2024-01-25T02:56:28.246Z https://skyzh.github.io/mini-lsm/week2-02-simple - 2024-01-21T13:41:44.670Z + 2024-01-25T02:56:28.248Z https://skyzh.github.io/mini-lsm/week2-03-tiered - 2024-01-21T13:41:44.671Z + 2024-01-25T02:56:28.249Z https://skyzh.github.io/mini-lsm/week2-04-leveled - 2024-01-21T13:41:44.672Z + 2024-01-25T02:56:28.250Z https://skyzh.github.io/mini-lsm/week2-05-manifest - 2024-01-21T13:41:44.672Z + 2024-01-25T02:56:28.251Z https://skyzh.github.io/mini-lsm/week2-06-wal - 2024-01-21T13:41:44.673Z + 2024-01-25T02:56:28.252Z https://skyzh.github.io/mini-lsm/week2-07-snacks - 2024-01-21T13:41:44.674Z + 2024-01-25T02:56:28.253Z https://skyzh.github.io/mini-lsm/week2-overview - 2024-01-21T13:41:44.669Z + 2024-01-25T02:56:28.245Z https://skyzh.github.io/mini-lsm/week3-overview - 2024-01-21T13:41:44.675Z + 2024-01-25T02:56:28.254Z https://skyzh.github.io/mini-lsm/week4-overview - 2024-01-21T13:41:44.676Z + 2024-01-25T02:56:28.255Z diff --git a/mini-lsm-book/src/week1-02-merge-iterator.md b/mini-lsm-book/src/week1-02-merge-iterator.md index 6ab7901..d10bb6a 100644 --- a/mini-lsm-book/src/week1-02-merge-iterator.md +++ b/mini-lsm-book/src/week1-02-merge-iterator.md @@ -94,7 +94,9 @@ let Some(mut inner_iter) = self.iters.peek_mut() { If `next` returns an error (i.e., due to disk failure, network failure, checksum error, etc.), it is no longer valid. However, when we go out of the if condition and return the error to the caller, `PeekMut`'s drop will try move the element within the heap, which causes an access to an invalid iterator. Therefore, you will need to do all error handling by yourself instead of using `?` within the scope of `PeekMut`. -We want to avoid dynamic dispatch as much as possible, and therefore we do not use `Box` in the system. Instead, we prefer static dispatch using generics. +We want to avoid dynamic dispatch as much as possible, and therefore we do not use `Box` in the system. Instead, we prefer static dispatch using generics. Also note that `StorageIterator` uses generic associated type (GAT), so that it can support both `KeySlice` and `&[u8]` as the key type. We will change `KeySlice` to include the timestamp in week 3 and using a separate type for it now can make the transition more smooth. + +Starting this section, we will use `Key` to represent LSM key types and distinguish them from values in the type system. You should use provided APIs of `Key` instead of directly accessing the inner value. We will add timestamp to this key type in part 3, and using the key abstraction will make the transition more smooth. For now, `KeySlice` is equivalent to `&[u8]`, `KeyVec` is equivalent to `Vec`, and `KeyBytes` is equivalent to `Bytes`. ## Task 3: LSM Iterator + Fused Iterator diff --git a/mini-lsm-starter/src/bin/compaction-simulator.rs b/mini-lsm-starter/src/bin/compaction-simulator.rs index 3e9d023..18eba53 100644 --- a/mini-lsm-starter/src/bin/compaction-simulator.rs +++ b/mini-lsm-starter/src/bin/compaction-simulator.rs @@ -4,12 +4,13 @@ use wrapper::mini_lsm_wrapper; use std::collections::HashMap; use std::sync::Arc; -use bytes::{Buf, BufMut, Bytes, BytesMut}; +use bytes::{Buf, BufMut, BytesMut}; use clap::Parser; use mini_lsm_wrapper::compact::{ LeveledCompactionController, LeveledCompactionOptions, SimpleLeveledCompactionController, SimpleLeveledCompactionOptions, TieredCompactionController, TieredCompactionOptions, }; +use mini_lsm_wrapper::key::KeyBytes; use mini_lsm_wrapper::lsm_storage::LsmStorageState; use mini_lsm_wrapper::mem_table::MemTable; use mini_lsm_wrapper::table::SsTable; @@ -135,11 +136,11 @@ impl MockStorage { "invalid file arrangement in L{}: id={}, range={:x}..={:x}; id={}, range={:x}..={:x}", level, this_file.sst_id(), - this_file.first_key().clone().get_u64(), - this_file.last_key().clone().get_u64(), + this_file.first_key().for_testing_key_ref().get_u64(), + this_file.last_key().for_testing_key_ref().get_u64(), next_file.sst_id(), - next_file.first_key().clone().get_u64(), - next_file.last_key().clone().get_u64() + next_file.first_key().for_testing_key_ref().get_u64(), + next_file.last_key().for_testing_key_ref().get_u64() ); } } @@ -184,7 +185,7 @@ impl MockStorage { } } -fn generate_random_key_range() -> (Bytes, Bytes) { +fn generate_random_key_range() -> (KeyBytes, KeyBytes) { use rand::Rng; let mut rng = rand::thread_rng(); let begin: usize = rng.gen_range(0..(1 << 31)); @@ -193,16 +194,19 @@ fn generate_random_key_range() -> (Bytes, Bytes) { let mut end_bytes = BytesMut::new(); begin_bytes.put_u64(begin as u64); end_bytes.put_u64(end as u64); - (begin_bytes.into(), end_bytes.into()) + ( + KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()), + KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()), + ) } fn generate_random_split( - mut begin_bytes: Bytes, - mut end_bytes: Bytes, + begin_bytes: KeyBytes, + end_bytes: KeyBytes, split: usize, -) -> Vec<(Bytes, Bytes)> { - let begin = begin_bytes.get_u64(); - let end = end_bytes.get_u64(); +) -> Vec<(KeyBytes, KeyBytes)> { + let begin = begin_bytes.for_testing_key_ref().get_u64(); + let end = end_bytes.for_testing_key_ref().get_u64(); let len = end - begin + 1; let mut result = Vec::new(); let split = split as u64; @@ -214,7 +218,10 @@ fn generate_random_split( let mut end_bytes = BytesMut::new(); begin_bytes.put_u64(nb); end_bytes.put_u64(ne); - result.push((begin_bytes.into(), end_bytes.into())); + result.push(( + KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()), + KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()), + )); } result } @@ -502,8 +509,14 @@ fn main() { .map(|id| format!( "{}.sst {:x}..={:x}", id, - storage.snapshot.sstables[id].first_key().clone().get_u64(), - storage.snapshot.sstables[id].last_key().clone().get_u64() + storage.snapshot.sstables[id] + .first_key() + .for_testing_key_ref() + .get_u64(), + storage.snapshot.sstables[id] + .last_key() + .for_testing_key_ref() + .get_u64() )) .collect::>() .join(", ") @@ -516,8 +529,14 @@ fn main() { .map(|id| format!( "{}.sst {:x}..={:x}", id, - storage.snapshot.sstables[id].first_key().clone().get_u64(), - storage.snapshot.sstables[id].last_key().clone().get_u64() + storage.snapshot.sstables[id] + .first_key() + .for_testing_key_ref() + .get_u64(), + storage.snapshot.sstables[id] + .last_key() + .for_testing_key_ref() + .get_u64() )) .collect::>() .join(", ") @@ -529,8 +548,14 @@ fn main() { .map(|id| format!( "{}.sst {:x}..={:x}", id, - storage.snapshot.sstables[id].first_key().clone().get_u64(), - storage.snapshot.sstables[id].last_key().clone().get_u64() + storage.snapshot.sstables[id] + .first_key() + .for_testing_key_ref() + .get_u64(), + storage.snapshot.sstables[id] + .last_key() + .for_testing_key_ref() + .get_u64() )) .collect::>() .join(", ") diff --git a/mini-lsm-starter/src/block/builder.rs b/mini-lsm-starter/src/block/builder.rs index d14fabe..4c83959 100644 --- a/mini-lsm-starter/src/block/builder.rs +++ b/mini-lsm-starter/src/block/builder.rs @@ -1,10 +1,21 @@ #![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod #![allow(dead_code)] // TODO(you): remove this lint after implementing this mod +use crate::key::{KeySlice, KeyVec}; + use super::Block; /// Builds a block. -pub struct BlockBuilder {} +pub struct BlockBuilder { + /// Offsets of each key-value entries. + offsets: Vec, + /// All serialized key-value pairs in the block. + data: Vec, + /// The expected block size. + block_size: usize, + /// The first key in the block + first_key: KeyVec, +} impl BlockBuilder { /// Creates a new block builder. @@ -14,7 +25,7 @@ impl BlockBuilder { /// Adds a key-value pair to the block. Returns false when the block is full. #[must_use] - pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { + pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool { unimplemented!() } diff --git a/mini-lsm-starter/src/block/iterator.rs b/mini-lsm-starter/src/block/iterator.rs index 4e69678..f823b5a 100644 --- a/mini-lsm-starter/src/block/iterator.rs +++ b/mini-lsm-starter/src/block/iterator.rs @@ -3,6 +3,8 @@ use std::sync::Arc; +use crate::key::{KeySlice, KeyVec}; + use super::Block; /// Iterates on a block. @@ -10,20 +12,23 @@ pub struct BlockIterator { /// The internal `Block`, wrapped by an `Arc` block: Arc, /// The current key, empty represents the iterator is invalid - key: Vec, + key: KeyVec, /// the value range from the block value_range: (usize, usize), /// Current index of the key-value pair, should be in range of [0, num_of_elements) idx: usize, + /// The first key in the block + first_key: KeyVec, } impl BlockIterator { fn new(block: Arc) -> Self { Self { block, - key: Vec::new(), + key: KeyVec::new(), value_range: (0, 0), idx: 0, + first_key: KeyVec::new(), } } @@ -33,12 +38,12 @@ impl BlockIterator { } /// Creates a block iterator and seek to the first key that >= `key`. - pub fn create_and_seek_to_key(block: Arc, key: &[u8]) -> Self { + pub fn create_and_seek_to_key(block: Arc, key: KeySlice) -> Self { unimplemented!() } /// Returns the key of the current entry. - pub fn key(&self) -> &[u8] { + pub fn key(&self) -> KeySlice { unimplemented!() } @@ -66,7 +71,7 @@ impl BlockIterator { /// Seek to the first key that >= `key`. /// Note: You should assume the key-value pairs in the block are sorted when being added by /// callers. - pub fn seek_to_key(&mut self, key: &[u8]) { + pub fn seek_to_key(&mut self, key: KeySlice) { unimplemented!() } } diff --git a/mini-lsm-starter/src/iterators.rs b/mini-lsm-starter/src/iterators.rs index 56a36a4..a0da803 100644 --- a/mini-lsm-starter/src/iterators.rs +++ b/mini-lsm-starter/src/iterators.rs @@ -3,11 +3,15 @@ pub mod merge_iterator; pub mod two_merge_iterator; pub trait StorageIterator { + type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord + where + Self: 'a; + /// Get the current value. fn value(&self) -> &[u8]; /// Get the current key. - fn key(&self) -> &[u8]; + fn key(&self) -> Self::KeyType<'_>; /// Check if the current iterator is valid. fn is_valid(&self) -> bool; diff --git a/mini-lsm-starter/src/iterators/concat_iterator.rs b/mini-lsm-starter/src/iterators/concat_iterator.rs index 18b886e..8cef715 100644 --- a/mini-lsm-starter/src/iterators/concat_iterator.rs +++ b/mini-lsm-starter/src/iterators/concat_iterator.rs @@ -6,7 +6,10 @@ use std::sync::Arc; use anyhow::Result; use super::StorageIterator; -use crate::table::{SsTable, SsTableIterator}; +use crate::{ + key::KeySlice, + table::{SsTable, SsTableIterator}, +}; /// Concat multiple iterators ordered in key order and their key ranges do not overlap. We do not want to create the /// iterators when initializing this iterator to reduce the overhead of seeking. @@ -21,13 +24,15 @@ impl SstConcatIterator { unimplemented!() } - pub fn create_and_seek_to_key(sstables: Vec>, key: &[u8]) -> Result { + pub fn create_and_seek_to_key(sstables: Vec>, key: KeySlice) -> Result { unimplemented!() } } impl StorageIterator for SstConcatIterator { - fn key(&self) -> &[u8] { + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { unimplemented!() } diff --git a/mini-lsm-starter/src/iterators/merge_iterator.rs b/mini-lsm-starter/src/iterators/merge_iterator.rs index e5d5aad..a3e911d 100644 --- a/mini-lsm-starter/src/iterators/merge_iterator.rs +++ b/mini-lsm-starter/src/iterators/merge_iterator.rs @@ -6,6 +6,8 @@ use std::collections::BinaryHeap; use anyhow::Result; +use crate::key::KeySlice; + use super::StorageIterator; struct HeapWrapper(pub usize, pub Box); @@ -21,7 +23,7 @@ impl Eq for HeapWrapper {} impl PartialOrd for HeapWrapper { #[allow(clippy::non_canonical_partial_ord_impl)] fn partial_cmp(&self, other: &Self) -> Option { - match self.1.key().cmp(other.1.key()) { + match self.1.key().cmp(&other.1.key()) { cmp::Ordering::Greater => Some(cmp::Ordering::Greater), cmp::Ordering::Less => Some(cmp::Ordering::Less), cmp::Ordering::Equal => self.0.partial_cmp(&other.0), @@ -49,8 +51,12 @@ impl MergeIterator { } } -impl StorageIterator for MergeIterator { - fn key(&self) -> &[u8] { +impl StorageIterator = KeySlice<'a>>> StorageIterator + for MergeIterator +{ + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { unimplemented!() } diff --git a/mini-lsm-starter/src/iterators/two_merge_iterator.rs b/mini-lsm-starter/src/iterators/two_merge_iterator.rs index 43cac61..c51dc72 100644 --- a/mini-lsm-starter/src/iterators/two_merge_iterator.rs +++ b/mini-lsm-starter/src/iterators/two_merge_iterator.rs @@ -3,6 +3,8 @@ use anyhow::Result; +use crate::key::KeySlice; + use super::StorageIterator; /// Merges two iterators of different types into one. If the two iterators have the same key, only @@ -13,14 +15,24 @@ pub struct TwoMergeIterator { // Add fields as need } -impl TwoMergeIterator { +impl< + A: 'static + for<'a> StorageIterator = KeySlice<'a>>, + B: 'static + for<'a> StorageIterator = KeySlice<'a>>, + > TwoMergeIterator +{ pub fn create(a: A, b: B) -> Result { unimplemented!() } } -impl StorageIterator for TwoMergeIterator { - fn key(&self) -> &[u8] { +impl< + A: 'static + for<'a> StorageIterator = KeySlice<'a>>, + B: 'static + for<'a> StorageIterator = KeySlice<'a>>, + > StorageIterator for TwoMergeIterator +{ + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { unimplemented!() } diff --git a/mini-lsm-starter/src/key.rs b/mini-lsm-starter/src/key.rs new file mode 100644 index 0000000..edf6342 --- /dev/null +++ b/mini-lsm-starter/src/key.rs @@ -0,0 +1,159 @@ +use std::fmt::Debug; + +use bytes::Bytes; + +pub struct Key>(T); + +pub type KeySlice<'a> = Key<&'a [u8]>; +pub type KeyVec = Key>; +pub type KeyBytes = Key; + +impl> Key { + pub fn into_inner(self) -> T { + self.0 + } + + pub fn len(&self) -> usize { + self.0.as_ref().len() + } + + pub fn is_empty(&self) -> bool { + self.0.as_ref().is_empty() + } +} + +impl Key> { + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Create a `KeyVec` from a `Vec`. Will be removed in week 3. + pub fn from_vec(key: Vec) -> Self { + Self(key) + } + + /// Clears the key and set ts to 0. + pub fn clear(&mut self) { + self.0.clear() + } + + /// Append a slice to the end of the key + pub fn append(&mut self, data: &[u8]) { + self.0.extend(data) + } + + /// Set the key from a slice without re-allocating. The signature will change in week 3. + pub fn set_from_slice(&mut self, key_slice: KeySlice) { + self.0.clear(); + self.0.extend(key_slice.0); + } + + pub fn as_key_slice(&self) -> KeySlice { + Key(self.0.as_slice()) + } + + pub fn into_key_bytes(self) -> KeyBytes { + Key(self.0.into()) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_key_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_from_vec_no_ts(key: Vec) -> Self { + Self(key) + } +} + +impl Key { + pub fn as_key_slice(&self) -> KeySlice { + Key(&self.0) + } + + /// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3. + pub fn from_bytes(bytes: Bytes) -> KeyBytes { + Key(bytes) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes { + Key(bytes) + } + + pub fn for_testing_key_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl<'a> Key<&'a [u8]> { + pub fn to_key_vec(self) -> KeyVec { + Key(self.0.to_vec()) + } + + /// Create a key slice from a slice. Will be removed in week 3. + pub fn from_slice(slice: &'a [u8]) -> Self { + Self(slice) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(self) -> &'a [u8] { + self.0 + } + + pub fn for_testing_key_ref(self) -> &'a [u8] { + self.0 + } + + pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self { + Self(slice) + } +} + +impl + Debug> Debug for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl + Default> Default for Key { + fn default() -> Self { + Self(T::default()) + } +} + +impl + PartialEq> PartialEq for Key { + fn eq(&self, other: &Self) -> bool { + self.0.eq(&other.0) + } +} + +impl + Eq> Eq for Key {} + +impl + Clone> Clone for Key { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl + Copy> Copy for Key {} + +impl + PartialOrd> PartialOrd for Key { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl + Ord> Ord for Key { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} diff --git a/mini-lsm-starter/src/lib.rs b/mini-lsm-starter/src/lib.rs index ebfb02a..afdfb65 100644 --- a/mini-lsm-starter/src/lib.rs +++ b/mini-lsm-starter/src/lib.rs @@ -2,6 +2,7 @@ pub mod block; pub mod compact; pub mod debug; pub mod iterators; +pub mod key; pub mod lsm_iterator; pub mod lsm_storage; pub mod manifest; diff --git a/mini-lsm-starter/src/lsm_iterator.rs b/mini-lsm-starter/src/lsm_iterator.rs index d656495..82842b2 100644 --- a/mini-lsm-starter/src/lsm_iterator.rs +++ b/mini-lsm-starter/src/lsm_iterator.rs @@ -22,6 +22,8 @@ impl LsmIterator { } impl StorageIterator for LsmIterator { + type KeyType<'a> = &'a [u8]; + fn is_valid(&self) -> bool { unimplemented!() } @@ -53,11 +55,13 @@ impl FusedIterator { } impl StorageIterator for FusedIterator { + type KeyType<'a> = I::KeyType<'a> where Self: 'a; + fn is_valid(&self) -> bool { unimplemented!() } - fn key(&self) -> &[u8] { + fn key(&self) -> Self::KeyType<'_> { unimplemented!() } diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index 2070707..94538a1 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -11,6 +11,7 @@ use crossbeam_skiplist::SkipMap; use ouroboros::self_referencing; use crate::iterators::StorageIterator; +use crate::key::KeySlice; use crate::table::SsTableBuilder; use crate::wal::Wal; @@ -115,11 +116,13 @@ pub struct MemTableIterator { } impl StorageIterator for MemTableIterator { + type KeyType<'a> = KeySlice<'a>; + fn value(&self) -> &[u8] { unimplemented!() } - fn key(&self) -> &[u8] { + fn key(&self) -> KeySlice { unimplemented!() } diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index e5241dd..58188a5 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -11,10 +11,11 @@ use std::sync::Arc; use anyhow::Result; pub use builder::SsTableBuilder; -use bytes::{Buf, Bytes}; +use bytes::Buf; pub use iterator::SsTableIterator; use crate::block::Block; +use crate::key::{KeyBytes, KeySlice}; use crate::lsm_storage::BlockCache; use self::bloom::Bloom; @@ -24,9 +25,9 @@ pub struct BlockMeta { /// Offset of this data block. pub offset: usize, /// The first key of the data block. - pub first_key: Bytes, + pub first_key: KeyBytes, /// The last key of the data block. - pub last_key: Bytes, + pub last_key: KeyBytes, } impl BlockMeta { @@ -92,8 +93,8 @@ pub struct SsTable { pub(crate) block_meta_offset: usize, id: usize, block_cache: Option>, - first_key: Bytes, - last_key: Bytes, + first_key: KeyBytes, + last_key: KeyBytes, pub(crate) bloom: Option, } @@ -109,7 +110,12 @@ impl SsTable { } /// Create a mock SST with only first key + last key metadata - pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self { + pub fn create_meta_only( + id: usize, + file_size: u64, + first_key: KeyBytes, + last_key: KeyBytes, + ) -> Self { Self { file: FileObject(None, file_size), block_meta: vec![], @@ -135,7 +141,7 @@ impl SsTable { /// Find the block that may contain `key`. /// Note: You may want to make use of the `first_key` stored in `BlockMeta`. /// You may also assume the key-value pairs stored in each consecutive block are sorted. - pub fn find_block_idx(&self, key: &[u8]) -> usize { + pub fn find_block_idx(&self, key: KeySlice) -> usize { unimplemented!() } @@ -144,11 +150,11 @@ impl SsTable { self.block_meta.len() } - pub fn first_key(&self) -> &Bytes { + pub fn first_key(&self) -> &KeyBytes { &self.first_key } - pub fn last_key(&self) -> &Bytes { + pub fn last_key(&self) -> &KeyBytes { &self.last_key } diff --git a/mini-lsm-starter/src/table/builder.rs b/mini-lsm-starter/src/table/builder.rs index 33c6d38..cea3d08 100644 --- a/mini-lsm-starter/src/table/builder.rs +++ b/mini-lsm-starter/src/table/builder.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use anyhow::Result; use super::{BlockMeta, SsTable}; -use crate::{block::BlockBuilder, lsm_storage::BlockCache}; +use crate::{block::BlockBuilder, key::KeySlice, lsm_storage::BlockCache}; /// Builds an SSTable from key-value pairs. pub struct SsTableBuilder { @@ -29,7 +29,7 @@ impl SsTableBuilder { /// /// Note: You should split a new block when the current block is full.(`std::mem::replace` may /// be helpful here) - pub fn add(&mut self, key: &[u8], value: &[u8]) { + pub fn add(&mut self, key: KeySlice, value: &[u8]) { unimplemented!() } diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs index 95a6d22..32b06a2 100644 --- a/mini-lsm-starter/src/table/iterator.rs +++ b/mini-lsm-starter/src/table/iterator.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use anyhow::Result; use super::SsTable; -use crate::{block::BlockIterator, iterators::StorageIterator}; +use crate::{block::BlockIterator, iterators::StorageIterator, key::KeySlice}; /// An iterator over the contents of an SSTable. pub struct SsTableIterator { @@ -27,21 +27,23 @@ impl SsTableIterator { } /// Create a new iterator and seek to the first key-value pair which >= `key`. - pub fn create_and_seek_to_key(table: Arc, key: &[u8]) -> Result { + pub fn create_and_seek_to_key(table: Arc, key: KeySlice) -> Result { unimplemented!() } /// Seek to the first key-value pair which >= `key`. /// Note: You probably want to review the handout for detailed explanation when implementing /// this function. - pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { + pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> { unimplemented!() } } impl StorageIterator for SsTableIterator { + type KeyType<'a> = KeySlice<'a>; + /// Return the `key` that's held by the underlying block iterator. - fn key(&self) -> &[u8] { + fn key(&self) -> KeySlice { unimplemented!() } diff --git a/mini-lsm-starter/src/tests.rs b/mini-lsm-starter/src/tests.rs index 8b13789..688adfa 100644 --- a/mini-lsm-starter/src/tests.rs +++ b/mini-lsm-starter/src/tests.rs @@ -1 +1,2 @@ - +//! DO NOT MODIFY -- Mini-LSM tests modules +//! This file will be automatically rewritten by the copy-test command. diff --git a/mini-lsm-starter/src/tests/.gitkeep b/mini-lsm-starter/src/tests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/mini-lsm/src/block/builder.rs b/mini-lsm/src/block/builder.rs index 5b13c25..4d28a75 100644 --- a/mini-lsm/src/block/builder.rs +++ b/mini-lsm/src/block/builder.rs @@ -1,5 +1,7 @@ use bytes::BufMut; +use crate::key::{KeySlice, KeyVec}; + use super::{Block, SIZEOF_U16}; /// Builds a block. @@ -11,16 +13,16 @@ pub struct BlockBuilder { /// The expected block size. block_size: usize, /// The first key in the block - first_key: Vec, + first_key: KeyVec, } -fn compute_overlap(first_key: &[u8], key: &[u8]) -> usize { +fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize { let mut i = 0; loop { if i >= first_key.len() || i >= key.len() { break; } - if first_key[i] != key[i] { + if first_key.raw_ref()[i] != key.raw_ref()[i] { break; } i += 1; @@ -35,7 +37,7 @@ impl BlockBuilder { offsets: Vec::new(), data: Vec::new(), block_size, - first_key: Vec::new(), + first_key: KeyVec::new(), } } @@ -46,7 +48,7 @@ impl BlockBuilder { /// Adds a key-value pair to the block. Returns false when the block is full. #[must_use] - pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { + pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool { assert!(!key.is_empty(), "key must not be empty"); if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size && !self.is_empty() @@ -55,20 +57,20 @@ impl BlockBuilder { } // Add the offset of the data into the offset array. self.offsets.push(self.data.len() as u16); - let overlap = compute_overlap(&self.first_key, key); + let overlap = compute_overlap(self.first_key.as_key_slice(), key); // Encode key overlap. self.data.put_u16(overlap as u16); // Encode key length. self.data.put_u16((key.len() - overlap) as u16); // Encode key content. - self.data.put(&key[overlap..]); + self.data.put(&key.raw_ref()[overlap..]); // Encode value length. self.data.put_u16(value.len() as u16); // Encode value content. self.data.put(value); if self.first_key.is_empty() { - self.first_key = key.to_vec(); + self.first_key = key.to_key_vec(); } true diff --git a/mini-lsm/src/block/iterator.rs b/mini-lsm/src/block/iterator.rs index 72de838..65b06af 100644 --- a/mini-lsm/src/block/iterator.rs +++ b/mini-lsm/src/block/iterator.rs @@ -2,7 +2,10 @@ use std::sync::Arc; use bytes::Buf; -use crate::block::SIZEOF_U16; +use crate::{ + block::SIZEOF_U16, + key::{KeySlice, KeyVec}, +}; use super::Block; @@ -11,22 +14,22 @@ pub struct BlockIterator { /// reference to the block block: Arc, /// the current key at the iterator position - key: Vec, + key: KeyVec, /// the value range from the block value_range: (usize, usize), /// the current index at the iterator position idx: usize, /// the first key in the block - first_key: Vec, + first_key: KeyVec, } impl Block { - fn get_first_key(&self) -> Vec { + fn get_first_key(&self) -> KeyVec { let mut buf = &self.data[..]; buf.get_u16(); let key_len = buf.get_u16(); let key = &buf[..key_len as usize]; - key.to_vec() + KeyVec::from_vec(key.to_vec()) } } @@ -35,7 +38,7 @@ impl BlockIterator { Self { first_key: block.get_first_key(), block, - key: Vec::new(), + key: KeyVec::new(), value_range: (0, 0), idx: 0, } @@ -49,16 +52,16 @@ impl BlockIterator { } /// Creates a block iterator and seek to the first key that >= `key`. - pub fn create_and_seek_to_key(block: Arc, key: &[u8]) -> Self { + pub fn create_and_seek_to_key(block: Arc, key: KeySlice) -> Self { let mut iter = Self::new(block); iter.seek_to_key(key); iter } /// Returns the key of the current entry. - pub fn key(&self) -> &[u8] { + pub fn key(&self) -> KeySlice { debug_assert!(!self.key.is_empty(), "invalid iterator"); - &self.key + self.key.as_key_slice() } /// Returns the value of the current entry. @@ -103,11 +106,11 @@ impl BlockIterator { // we don't need to manually advance it let overlap_len = entry.get_u16() as usize; let key_len = entry.get_u16() as usize; - let key = entry[..key_len].to_vec(); - entry.advance(key_len); + let key = &entry[..key_len]; self.key.clear(); - self.key.extend(&self.first_key[..overlap_len]); - self.key.extend(key); + self.key.append(&self.first_key.raw_ref()[..overlap_len]); + self.key.append(key); + entry.advance(key_len); let value_len = entry.get_u16() as usize; let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16; let value_offset_end = value_offset_begin + value_len; @@ -116,14 +119,14 @@ impl BlockIterator { } /// Seek to the first key that is >= `key`. - pub fn seek_to_key(&mut self, key: &[u8]) { + pub fn seek_to_key(&mut self, key: KeySlice) { let mut low = 0; let mut high = self.block.offsets.len(); while low < high { let mid = low + (high - low) / 2; self.seek_to(mid); assert!(self.is_valid()); - match self.key().cmp(key) { + match self.key().cmp(&key) { std::cmp::Ordering::Less => low = mid + 1, std::cmp::Ordering::Greater => high = mid, std::cmp::Ordering::Equal => return, diff --git a/mini-lsm/src/compact.rs b/mini-lsm/src/compact.rs index 5f3c7cd..2378bb2 100644 --- a/mini-lsm/src/compact.rs +++ b/mini-lsm/src/compact.rs @@ -18,6 +18,7 @@ use crate::iterators::concat_iterator::SstConcatIterator; use crate::iterators::merge_iterator::MergeIterator; use crate::iterators::two_merge_iterator::TwoMergeIterator; use crate::iterators::StorageIterator; +use crate::key::KeySlice; use crate::lsm_storage::{LsmStorageInner, LsmStorageState}; use crate::manifest::ManifestRecord; use crate::table::{SsTable, SsTableBuilder, SsTableIterator}; @@ -112,7 +113,7 @@ pub enum CompactionOptions { impl LsmStorageInner { fn compact_generate_sst_from_iter( &self, - mut iter: impl StorageIterator, + mut iter: impl for<'a> StorageIterator = KeySlice<'a>>, compact_to_bottom_level: bool, ) -> Result>> { let mut builder = None; diff --git a/mini-lsm/src/iterators.rs b/mini-lsm/src/iterators.rs index 56a36a4..a0da803 100644 --- a/mini-lsm/src/iterators.rs +++ b/mini-lsm/src/iterators.rs @@ -3,11 +3,15 @@ pub mod merge_iterator; pub mod two_merge_iterator; pub trait StorageIterator { + type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord + where + Self: 'a; + /// Get the current value. fn value(&self) -> &[u8]; /// Get the current key. - fn key(&self) -> &[u8]; + fn key(&self) -> Self::KeyType<'_>; /// Check if the current iterator is valid. fn is_valid(&self) -> bool; diff --git a/mini-lsm/src/iterators/concat_iterator.rs b/mini-lsm/src/iterators/concat_iterator.rs index 63d7991..e6203f2 100644 --- a/mini-lsm/src/iterators/concat_iterator.rs +++ b/mini-lsm/src/iterators/concat_iterator.rs @@ -2,7 +2,10 @@ use std::sync::Arc; use anyhow::Result; -use crate::table::{SsTable, SsTableIterator}; +use crate::{ + key::KeySlice, + table::{SsTable, SsTableIterator}, +}; use super::StorageIterator; @@ -46,10 +49,10 @@ impl SstConcatIterator { Ok(iter) } - pub fn create_and_seek_to_key(sstables: Vec>, key: &[u8]) -> Result { + pub fn create_and_seek_to_key(sstables: Vec>, key: KeySlice) -> Result { Self::check_sst_valid(&sstables); let idx: usize = sstables - .partition_point(|table| table.first_key() <= key) + .partition_point(|table| table.first_key().as_key_slice() <= key) .saturating_sub(1); if idx >= sstables.len() { return Ok(Self { @@ -89,7 +92,9 @@ impl SstConcatIterator { } impl StorageIterator for SstConcatIterator { - fn key(&self) -> &[u8] { + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { self.current.as_ref().unwrap().key() } diff --git a/mini-lsm/src/iterators/merge_iterator.rs b/mini-lsm/src/iterators/merge_iterator.rs index 0e5b10b..c4abc8d 100644 --- a/mini-lsm/src/iterators/merge_iterator.rs +++ b/mini-lsm/src/iterators/merge_iterator.rs @@ -4,6 +4,8 @@ use std::collections::BinaryHeap; use anyhow::Result; +use crate::key::KeySlice; + use super::StorageIterator; struct HeapWrapper(pub usize, pub Box); @@ -19,7 +21,7 @@ impl Eq for HeapWrapper {} impl PartialOrd for HeapWrapper { #[allow(clippy::non_canonical_partial_ord_impl)] fn partial_cmp(&self, other: &Self) -> Option { - match self.1.key().cmp(other.1.key()) { + match self.1.key().cmp(&other.1.key()) { cmp::Ordering::Greater => Some(cmp::Ordering::Greater), cmp::Ordering::Less => Some(cmp::Ordering::Less), cmp::Ordering::Equal => self.0.partial_cmp(&other.0), @@ -75,8 +77,12 @@ impl MergeIterator { } } -impl StorageIterator for MergeIterator { - fn key(&self) -> &[u8] { +impl StorageIterator = KeySlice<'a>>> StorageIterator + for MergeIterator +{ + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { self.current.as_ref().unwrap().1.key() } diff --git a/mini-lsm/src/iterators/two_merge_iterator.rs b/mini-lsm/src/iterators/two_merge_iterator.rs index 4fb5941..781055a 100644 --- a/mini-lsm/src/iterators/two_merge_iterator.rs +++ b/mini-lsm/src/iterators/two_merge_iterator.rs @@ -1,5 +1,7 @@ use anyhow::Result; +use crate::key::KeySlice; + use super::StorageIterator; /// Merges two iterators of different types into one. If the two iterators have the same key, only @@ -10,7 +12,11 @@ pub struct TwoMergeIterator { choose_a: bool, } -impl TwoMergeIterator { +impl< + A: 'static + for<'a> StorageIterator = KeySlice<'a>>, + B: 'static + for<'a> StorageIterator = KeySlice<'a>>, + > TwoMergeIterator +{ fn choose_a(a: &A, b: &B) -> bool { if !a.is_valid() { return false; @@ -40,8 +46,14 @@ impl TwoMergeIterator { } } -impl StorageIterator for TwoMergeIterator { - fn key(&self) -> &[u8] { +impl< + A: 'static + for<'a> StorageIterator = KeySlice<'a>>, + B: 'static + for<'a> StorageIterator = KeySlice<'a>>, + > StorageIterator for TwoMergeIterator +{ + type KeyType<'a> = KeySlice<'a>; + + fn key(&self) -> KeySlice { if self.choose_a { self.a.key() } else { diff --git a/mini-lsm/src/key.rs b/mini-lsm/src/key.rs new file mode 100644 index 0000000..edf6342 --- /dev/null +++ b/mini-lsm/src/key.rs @@ -0,0 +1,159 @@ +use std::fmt::Debug; + +use bytes::Bytes; + +pub struct Key>(T); + +pub type KeySlice<'a> = Key<&'a [u8]>; +pub type KeyVec = Key>; +pub type KeyBytes = Key; + +impl> Key { + pub fn into_inner(self) -> T { + self.0 + } + + pub fn len(&self) -> usize { + self.0.as_ref().len() + } + + pub fn is_empty(&self) -> bool { + self.0.as_ref().is_empty() + } +} + +impl Key> { + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Create a `KeyVec` from a `Vec`. Will be removed in week 3. + pub fn from_vec(key: Vec) -> Self { + Self(key) + } + + /// Clears the key and set ts to 0. + pub fn clear(&mut self) { + self.0.clear() + } + + /// Append a slice to the end of the key + pub fn append(&mut self, data: &[u8]) { + self.0.extend(data) + } + + /// Set the key from a slice without re-allocating. The signature will change in week 3. + pub fn set_from_slice(&mut self, key_slice: KeySlice) { + self.0.clear(); + self.0.extend(key_slice.0); + } + + pub fn as_key_slice(&self) -> KeySlice { + Key(self.0.as_slice()) + } + + pub fn into_key_bytes(self) -> KeyBytes { + Key(self.0.into()) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_key_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_from_vec_no_ts(key: Vec) -> Self { + Self(key) + } +} + +impl Key { + pub fn as_key_slice(&self) -> KeySlice { + Key(&self.0) + } + + /// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3. + pub fn from_bytes(bytes: Bytes) -> KeyBytes { + Key(bytes) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(&self) -> &[u8] { + self.0.as_ref() + } + + pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes { + Key(bytes) + } + + pub fn for_testing_key_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl<'a> Key<&'a [u8]> { + pub fn to_key_vec(self) -> KeyVec { + Key(self.0.to_vec()) + } + + /// Create a key slice from a slice. Will be removed in week 3. + pub fn from_slice(slice: &'a [u8]) -> Self { + Self(slice) + } + + /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. + pub fn raw_ref(self) -> &'a [u8] { + self.0 + } + + pub fn for_testing_key_ref(self) -> &'a [u8] { + self.0 + } + + pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self { + Self(slice) + } +} + +impl + Debug> Debug for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl + Default> Default for Key { + fn default() -> Self { + Self(T::default()) + } +} + +impl + PartialEq> PartialEq for Key { + fn eq(&self, other: &Self) -> bool { + self.0.eq(&other.0) + } +} + +impl + Eq> Eq for Key {} + +impl + Clone> Clone for Key { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl + Copy> Copy for Key {} + +impl + PartialOrd> PartialOrd for Key { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl + Ord> Ord for Key { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs index ebfb02a..afdfb65 100644 --- a/mini-lsm/src/lib.rs +++ b/mini-lsm/src/lib.rs @@ -2,6 +2,7 @@ pub mod block; pub mod compact; pub mod debug; pub mod iterators; +pub mod key; pub mod lsm_iterator; pub mod lsm_storage; pub mod manifest; diff --git a/mini-lsm/src/lsm_iterator.rs b/mini-lsm/src/lsm_iterator.rs index 1a1817b..044769c 100644 --- a/mini-lsm/src/lsm_iterator.rs +++ b/mini-lsm/src/lsm_iterator.rs @@ -41,8 +41,8 @@ impl LsmIterator { } match self.end_bound.as_ref() { Bound::Unbounded => {} - Bound::Included(key) => self.is_valid = self.inner.key() <= key.as_ref(), - Bound::Excluded(key) => self.is_valid = self.inner.key() < key.as_ref(), + Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(), + Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(), } Ok(()) } @@ -56,12 +56,14 @@ impl LsmIterator { } impl StorageIterator for LsmIterator { + type KeyType<'a> = &'a [u8]; + fn is_valid(&self) -> bool { self.is_valid } fn key(&self) -> &[u8] { - self.inner.key() + self.inner.key().raw_ref() } fn value(&self) -> &[u8] { @@ -97,11 +99,13 @@ impl FusedIterator { } impl StorageIterator for FusedIterator { + type KeyType<'a> = I::KeyType<'a> where Self: 'a; + fn is_valid(&self) -> bool { !self.has_errored && self.iter.is_valid() } - fn key(&self) -> &[u8] { + fn key(&self) -> Self::KeyType<'_> { if self.has_errored || !self.iter.is_valid() { panic!("invalid access to the underlying iterator"); } diff --git a/mini-lsm/src/lsm_storage.rs b/mini-lsm/src/lsm_storage.rs index cd6111f..65b7b7f 100644 --- a/mini-lsm/src/lsm_storage.rs +++ b/mini-lsm/src/lsm_storage.rs @@ -18,6 +18,7 @@ use crate::iterators::concat_iterator::SstConcatIterator; use crate::iterators::merge_iterator::MergeIterator; use crate::iterators::two_merge_iterator::TwoMergeIterator; use crate::iterators::StorageIterator; +use crate::key::KeySlice; use crate::lsm_iterator::{FusedIterator, LsmIterator}; use crate::manifest::{Manifest, ManifestRecord}; use crate::mem_table::{map_bound, MemTable}; @@ -98,23 +99,23 @@ impl LsmStorageOptions { fn range_overlap( user_begin: Bound<&[u8]>, user_end: Bound<&[u8]>, - table_begin: &[u8], - table_end: &[u8], + table_begin: KeySlice, + table_end: KeySlice, ) -> bool { match user_end { - Bound::Excluded(key) if key <= table_begin => { + Bound::Excluded(key) if key <= table_begin.raw_ref() => { return false; } - Bound::Included(key) if key < table_begin => { + Bound::Included(key) if key < table_begin.raw_ref() => { return false; } _ => {} } match user_begin { - Bound::Excluded(key) if key >= table_end => { + Bound::Excluded(key) if key >= table_end.raw_ref() => { return false; } - Bound::Included(key) if key > table_end => { + Bound::Included(key) if key > table_end.raw_ref() => { return false; } _ => {} @@ -122,8 +123,8 @@ fn range_overlap( true } -fn key_within(user_key: &[u8], table_begin: &[u8], table_end: &[u8]) -> bool { - table_begin <= user_key && user_key <= table_end +fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool { + table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref() } /// The storage interface of the LSM tree. @@ -425,7 +426,11 @@ impl LsmStorageInner { let mut l0_iters = Vec::with_capacity(snapshot.l0_sstables.len()); let keep_table = |key: &[u8], table: &SsTable| { - if key_within(key, table.first_key(), table.last_key()) { + if key_within( + key, + table.first_key().as_key_slice(), + table.last_key().as_key_slice(), + ) { if let Some(bloom) = &table.bloom { if bloom.may_contain(farmhash::fingerprint32(key)) { return true; @@ -441,7 +446,8 @@ impl LsmStorageInner { let table = snapshot.sstables[table].clone(); if keep_table(key, &table) { l0_iters.push(Box::new(SsTableIterator::create_and_seek_to_key( - table, key, + table, + KeySlice::from_slice(key), )?)); } } @@ -455,13 +461,14 @@ impl LsmStorageInner { level_ssts.push(table); } } - let level_iter = SstConcatIterator::create_and_seek_to_key(level_ssts, key)?; + let level_iter = + SstConcatIterator::create_and_seek_to_key(level_ssts, KeySlice::from_slice(key))?; level_iters.push(Box::new(level_iter)); } let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?; - if iter.is_valid() && iter.key() == key && !iter.value().is_empty() { + if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() { return Ok(Some(Bytes::copy_from_slice(iter.value()))); } Ok(None) @@ -653,12 +660,22 @@ impl LsmStorageInner { let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len()); for table_id in snapshot.l0_sstables.iter() { let table = snapshot.sstables[table_id].clone(); - if range_overlap(lower, upper, table.first_key(), table.last_key()) { + if range_overlap( + lower, + upper, + table.first_key().as_key_slice(), + table.last_key().as_key_slice(), + ) { let iter = match lower { - Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?, + Bound::Included(key) => { + SsTableIterator::create_and_seek_to_key(table, KeySlice::from_slice(key))? + } Bound::Excluded(key) => { - let mut iter = SsTableIterator::create_and_seek_to_key(table, key)?; - if iter.is_valid() && iter.key() == key { + let mut iter = SsTableIterator::create_and_seek_to_key( + table, + KeySlice::from_slice(key), + )?; + if iter.is_valid() && iter.key().raw_ref() == key { iter.next()?; } iter @@ -676,16 +693,27 @@ impl LsmStorageInner { let mut level_ssts = Vec::with_capacity(level_sst_ids.len()); for table in level_sst_ids { let table = snapshot.sstables[table].clone(); - if range_overlap(lower, upper, table.first_key(), table.last_key()) { + if range_overlap( + lower, + upper, + table.first_key().as_key_slice(), + table.last_key().as_key_slice(), + ) { level_ssts.push(table); } } let level_iter = match lower { - Bound::Included(key) => SstConcatIterator::create_and_seek_to_key(level_ssts, key)?, + Bound::Included(key) => SstConcatIterator::create_and_seek_to_key( + level_ssts, + KeySlice::from_slice(key), + )?, Bound::Excluded(key) => { - let mut iter = SstConcatIterator::create_and_seek_to_key(level_ssts, key)?; - if iter.is_valid() && iter.key() == key { + let mut iter = SstConcatIterator::create_and_seek_to_key( + level_ssts, + KeySlice::from_slice(key), + )?; + if iter.is_valid() && iter.key().raw_ref() == key { iter.next()?; } iter diff --git a/mini-lsm/src/mem_table.rs b/mini-lsm/src/mem_table.rs index 688380b..d6d27de 100644 --- a/mini-lsm/src/mem_table.rs +++ b/mini-lsm/src/mem_table.rs @@ -10,6 +10,7 @@ use crossbeam_skiplist::SkipMap; use ouroboros::self_referencing; use crate::iterators::StorageIterator; +use crate::key::KeySlice; use crate::table::SsTableBuilder; use crate::wal::Wal; @@ -110,7 +111,7 @@ impl MemTable { /// Flush the mem-table to SSTable. Implement in week 1 day 6. pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> { for entry in self.map.iter() { - builder.add(&entry.key()[..], &entry.value()[..]); + builder.add(KeySlice::from_slice(&entry.key()[..]), &entry.value()[..]); } Ok(()) } @@ -158,12 +159,14 @@ impl MemTableIterator { } impl StorageIterator for MemTableIterator { + type KeyType<'a> = KeySlice<'a>; + fn value(&self) -> &[u8] { &self.borrow_item().1[..] } - fn key(&self) -> &[u8] { - &self.borrow_item().0[..] + fn key(&self) -> KeySlice { + KeySlice::from_slice(&self.borrow_item().0[..]) } fn is_valid(&self) -> bool { diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs index fecb101..99fa818 100644 --- a/mini-lsm/src/table.rs +++ b/mini-lsm/src/table.rs @@ -8,10 +8,11 @@ use std::sync::Arc; use anyhow::{anyhow, Result}; pub use builder::SsTableBuilder; -use bytes::{Buf, BufMut, Bytes}; +use bytes::{Buf, BufMut}; pub use iterator::SsTableIterator; use crate::block::Block; +use crate::key::{KeyBytes, KeySlice}; use crate::lsm_storage::BlockCache; use self::bloom::Bloom; @@ -21,9 +22,9 @@ pub struct BlockMeta { /// Offset of this data block. pub offset: usize, /// The first key of the data block. - pub first_key: Bytes, + pub first_key: KeyBytes, /// The last key of the data block. - pub last_key: Bytes, + pub last_key: KeyBytes, } impl BlockMeta { @@ -49,9 +50,9 @@ impl BlockMeta { for meta in block_meta { buf.put_u32(meta.offset as u32); buf.put_u16(meta.first_key.len() as u16); - buf.put_slice(&meta.first_key); + buf.put_slice(meta.first_key.raw_ref()); buf.put_u16(meta.last_key.len() as u16); - buf.put_slice(&meta.last_key); + buf.put_slice(meta.last_key.raw_ref()); } assert_eq!(estimated_size, buf.len() - original_len); } @@ -62,9 +63,9 @@ impl BlockMeta { while buf.has_remaining() { let offset = buf.get_u32() as usize; let first_key_len = buf.get_u16() as usize; - let first_key = buf.copy_to_bytes(first_key_len); - let last_key_len = buf.get_u16() as usize; - let last_key = buf.copy_to_bytes(last_key_len); + let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len)); + let last_key_len: usize = buf.get_u16() as usize; + let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len)); block_meta.push(BlockMeta { offset, first_key, @@ -120,8 +121,8 @@ pub struct SsTable { pub(crate) block_meta_offset: usize, id: usize, block_cache: Option>, - first_key: Bytes, - last_key: Bytes, + first_key: KeyBytes, + last_key: KeyBytes, pub(crate) bloom: Option, } impl SsTable { @@ -154,7 +155,12 @@ impl SsTable { } /// Create a mock SST with only first key + last key metadata - pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self { + pub fn create_meta_only( + id: usize, + file_size: u64, + first_key: KeyBytes, + last_key: KeyBytes, + ) -> Self { Self { file: FileObject(None, file_size), block_meta: vec![], @@ -193,9 +199,9 @@ impl SsTable { } /// Find the block that may contain `key`. - pub fn find_block_idx(&self, key: &[u8]) -> usize { + pub fn find_block_idx(&self, key: KeySlice) -> usize { self.block_meta - .partition_point(|meta| meta.first_key <= key) + .partition_point(|meta| meta.first_key.as_key_slice() <= key) .saturating_sub(1) } @@ -204,11 +210,11 @@ impl SsTable { self.block_meta.len() } - pub fn first_key(&self) -> &Bytes { + pub fn first_key(&self) -> &KeyBytes { &self.first_key } - pub fn last_key(&self) -> &Bytes { + pub fn last_key(&self) -> &KeyBytes { &self.last_key } diff --git a/mini-lsm/src/table/builder.rs b/mini-lsm/src/table/builder.rs index 718c3c8..a8753d1 100644 --- a/mini-lsm/src/table/builder.rs +++ b/mini-lsm/src/table/builder.rs @@ -7,13 +7,14 @@ use bytes::BufMut; use super::bloom::Bloom; use super::{BlockMeta, FileObject, SsTable}; use crate::block::BlockBuilder; +use crate::key::{KeySlice, KeyVec}; use crate::lsm_storage::BlockCache; /// Builds an SSTable from key-value pairs. pub struct SsTableBuilder { builder: BlockBuilder, - first_key: Vec, - last_key: Vec, + first_key: KeyVec, + last_key: KeyVec, data: Vec, pub(crate) meta: Vec, block_size: usize, @@ -26,8 +27,8 @@ impl SsTableBuilder { Self { data: Vec::new(), meta: Vec::new(), - first_key: Vec::new(), - last_key: Vec::new(), + first_key: KeyVec::new(), + last_key: KeyVec::new(), block_size, builder: BlockBuilder::new(block_size), key_hashes: Vec::new(), @@ -35,17 +36,15 @@ impl SsTableBuilder { } /// Adds a key-value pair to SSTable - pub fn add(&mut self, key: &[u8], value: &[u8]) { + pub fn add(&mut self, key: KeySlice, value: &[u8]) { if self.first_key.is_empty() { - self.first_key.clear(); - self.first_key.extend(key); + self.first_key.set_from_slice(key); } - self.key_hashes.push(farmhash::fingerprint32(key)); + self.key_hashes.push(farmhash::fingerprint32(key.raw_ref())); if self.builder.add(key, value) { - self.last_key.clear(); - self.last_key.extend(key); + self.last_key.set_from_slice(key); return; } @@ -54,10 +53,8 @@ impl SsTableBuilder { // add the key-value pair to the next block assert!(self.builder.add(key, value)); - self.first_key.clear(); - self.first_key.extend(key); - self.last_key.clear(); - self.last_key.extend(key); + self.first_key.set_from_slice(key); + self.last_key.set_from_slice(key); } /// Get the estimated size of the SSTable. @@ -70,8 +67,8 @@ impl SsTableBuilder { let encoded_block = builder.build().encode(); self.meta.push(BlockMeta { offset: self.data.len(), - first_key: std::mem::take(&mut self.first_key).into(), - last_key: std::mem::take(&mut self.last_key).into(), + first_key: std::mem::take(&mut self.first_key).into_key_bytes(), + last_key: std::mem::take(&mut self.last_key).into_key_bytes(), }); self.data.extend(encoded_block); } diff --git a/mini-lsm/src/table/iterator.rs b/mini-lsm/src/table/iterator.rs index 08b135f..522f1a0 100644 --- a/mini-lsm/src/table/iterator.rs +++ b/mini-lsm/src/table/iterator.rs @@ -5,6 +5,7 @@ use anyhow::Result; use super::SsTable; use crate::block::BlockIterator; use crate::iterators::StorageIterator; +use crate::key::KeySlice; /// An iterator over the contents of an SSTable. pub struct SsTableIterator { @@ -40,7 +41,7 @@ impl SsTableIterator { Ok(()) } - fn seek_to_key_inner(table: &Arc, key: &[u8]) -> Result<(usize, BlockIterator)> { + fn seek_to_key_inner(table: &Arc, key: KeySlice) -> Result<(usize, BlockIterator)> { let mut blk_idx = table.find_block_idx(key); let mut blk_iter = BlockIterator::create_and_seek_to_key(table.read_block_cached(blk_idx)?, key); @@ -55,7 +56,7 @@ impl SsTableIterator { } /// Create a new iterator and seek to the first key-value pair which >= `key`. - pub fn create_and_seek_to_key(table: Arc, key: &[u8]) -> Result { + pub fn create_and_seek_to_key(table: Arc, key: KeySlice) -> Result { let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?; let iter = Self { blk_iter, @@ -66,7 +67,7 @@ impl SsTableIterator { } /// Seek to the first key-value pair which >= `key`. - pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { + pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> { let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?; self.blk_iter = blk_iter; self.blk_idx = blk_idx; @@ -75,11 +76,13 @@ impl SsTableIterator { } impl StorageIterator for SsTableIterator { + type KeyType<'a> = KeySlice<'a>; + fn value(&self) -> &[u8] { self.blk_iter.value() } - fn key(&self) -> &[u8] { + fn key(&self) -> KeySlice { self.blk_iter.key() } diff --git a/mini-lsm/src/tests/harness.rs b/mini-lsm/src/tests/harness.rs index 8ee0d44..e87ba69 100644 --- a/mini-lsm/src/tests/harness.rs +++ b/mini-lsm/src/tests/harness.rs @@ -5,6 +5,7 @@ use bytes::Bytes; use crate::{ iterators::StorageIterator, + key::KeySlice, lsm_storage::{BlockCache, LsmStorageInner}, table::{SsTable, SsTableBuilder}, }; @@ -35,6 +36,8 @@ impl MockIterator { } impl StorageIterator for MockIterator { + type KeyType<'a> = KeySlice<'a>; + fn next(&mut self) -> Result<()> { if self.index < self.data.len() { self.index += 1; @@ -47,13 +50,13 @@ impl StorageIterator for MockIterator { Ok(()) } - fn key(&self) -> &[u8] { + fn key(&self) -> KeySlice { if let Some(error_when) = self.error_when { if self.index >= error_when { panic!("invalid access after next returns an error!"); } } - self.data[self.index].0.as_ref() + KeySlice::for_testing_from_slice_no_ts(self.data[self.index].0.as_ref()) } fn value(&self) -> &[u8] { @@ -79,7 +82,35 @@ pub fn as_bytes(x: &[u8]) -> Bytes { Bytes::copy_from_slice(x) } -pub fn check_iter_result(iter: &mut impl StorageIterator, expected: Vec<(Bytes, Bytes)>) { +pub fn check_iter_result_by_key(iter: &mut I, expected: Vec<(Bytes, Bytes)>) +where + I: for<'a> StorageIterator = KeySlice<'a>>, +{ + for (k, v) in expected { + assert!(iter.is_valid()); + assert_eq!( + k, + iter.key().for_testing_key_ref(), + "expected key: {:?}, actual key: {:?}", + k, + as_bytes(iter.key().for_testing_key_ref()), + ); + assert_eq!( + v, + iter.value(), + "expected value: {:?}, actual value: {:?}", + v, + as_bytes(iter.value()), + ); + iter.next().unwrap(); + } + assert!(!iter.is_valid()); +} + +pub fn check_lsm_iter_result_by_key(iter: &mut I, expected: Vec<(Bytes, Bytes)>) +where + I: for<'a> StorageIterator = &'a [u8]>, +{ for (k, v) in expected { assert!(iter.is_valid()); assert_eq!( @@ -119,7 +150,7 @@ pub fn generate_sst( ) -> SsTable { let mut builder = SsTableBuilder::new(128); for (key, value) in data { - builder.add(&key[..], &value[..]); + builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]); } builder.build(id, block_cache, path.as_ref()).unwrap() } diff --git a/mini-lsm/src/tests/week1_day2.rs b/mini-lsm/src/tests/week1_day2.rs index 9de8326..b8c36cb 100644 --- a/mini-lsm/src/tests/week1_day2.rs +++ b/mini-lsm/src/tests/week1_day2.rs @@ -8,9 +8,10 @@ use crate::{ lsm_iterator::FusedIterator, lsm_storage::{LsmStorageInner, LsmStorageOptions}, mem_table::MemTable, + tests::harness::check_lsm_iter_result_by_key, }; -use super::harness::{check_iter_result, expect_iter_error, MockIterator}; +use super::harness::{check_iter_result_by_key, expect_iter_error, MockIterator}; #[test] fn test_task1_memtable_iter() { @@ -22,15 +23,15 @@ fn test_task1_memtable_iter() { { let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded); - assert_eq!(iter.key(), b"key1"); + assert_eq!(iter.key().for_testing_key_ref(), b"key1"); assert_eq!(iter.value(), b"value1"); assert!(iter.is_valid()); iter.next().unwrap(); - assert_eq!(iter.key(), b"key2"); + assert_eq!(iter.key().for_testing_key_ref(), b"key2"); assert_eq!(iter.value(), b"value2"); assert!(iter.is_valid()); iter.next().unwrap(); - assert_eq!(iter.key(), b"key3"); + assert_eq!(iter.key().for_testing_key_ref(), b"key3"); assert_eq!(iter.value(), b"value3"); assert!(iter.is_valid()); iter.next().unwrap(); @@ -39,11 +40,11 @@ fn test_task1_memtable_iter() { { let mut iter = memtable.scan(Bound::Included(b"key1"), Bound::Included(b"key2")); - assert_eq!(iter.key(), b"key1"); + assert_eq!(iter.key().for_testing_key_ref(), b"key1"); assert_eq!(iter.value(), b"value1"); assert!(iter.is_valid()); iter.next().unwrap(); - assert_eq!(iter.key(), b"key2"); + assert_eq!(iter.key().for_testing_key_ref(), b"key2"); assert_eq!(iter.value(), b"value2"); assert!(iter.is_valid()); iter.next().unwrap(); @@ -52,7 +53,7 @@ fn test_task1_memtable_iter() { { let mut iter = memtable.scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3")); - assert_eq!(iter.key(), b"key2"); + assert_eq!(iter.key().for_testing_key_ref(), b"key2"); assert_eq!(iter.value(), b"value2"); assert!(iter.is_valid()); iter.next().unwrap(); @@ -104,7 +105,7 @@ fn test_task2_merge_1() { Box::new(i3.clone()), ]); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -117,7 +118,7 @@ fn test_task2_merge_1() { let mut iter = MergeIterator::create(vec![Box::new(i3), Box::new(i1), Box::new(i2)]); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -169,7 +170,7 @@ fn test_task2_merge_2() { Box::new(i3.clone()), Box::new(i4.clone()), ]); - check_iter_result(&mut iter, result.clone()); + check_iter_result_by_key(&mut iter, result.clone()); let mut iter = MergeIterator::create(vec![ Box::new(i2.clone()), @@ -177,17 +178,17 @@ fn test_task2_merge_2() { Box::new(i3.clone()), Box::new(i1.clone()), ]); - check_iter_result(&mut iter, result.clone()); + check_iter_result_by_key(&mut iter, result.clone()); let mut iter = MergeIterator::create(vec![Box::new(i4), Box::new(i3), Box::new(i2), Box::new(i1)]); - check_iter_result(&mut iter, result); + check_iter_result_by_key(&mut iter, result); } #[test] fn test_task2_merge_empty() { let mut iter = MergeIterator::::create(vec![]); - check_iter_result(&mut iter, vec![]); + check_iter_result_by_key(&mut iter, vec![]); let i1 = MockIterator::new(vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -196,7 +197,7 @@ fn test_task2_merge_empty() { ]); let i2 = MockIterator::new(vec![]); let mut iter = MergeIterator::::create(vec![Box::new(i1), Box::new(i2)]); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -209,7 +210,7 @@ fn test_task2_merge_empty() { #[test] fn test_task2_merge_error() { let mut iter = MergeIterator::::create(vec![]); - check_iter_result(&mut iter, vec![]); + check_iter_result_by_key(&mut iter, vec![]); let i1 = MockIterator::new(vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -276,7 +277,7 @@ fn test_task4_integration() { storage.put(b"3", b"233333").unwrap(); { let mut iter = storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(); - check_iter_result( + check_lsm_iter_result_by_key( &mut iter, vec![ (Bytes::from_static(b"1"), Bytes::from_static(b"233333")), @@ -294,7 +295,7 @@ fn test_task4_integration() { let mut iter = storage .scan(Bound::Included(b"2"), Bound::Included(b"3")) .unwrap(); - check_iter_result( + check_lsm_iter_result_by_key( &mut iter, vec![(Bytes::from_static(b"3"), Bytes::from_static(b"233333"))], ); diff --git a/mini-lsm/src/tests/week1_day3.rs b/mini-lsm/src/tests/week1_day3.rs index 850f969..91deea7 100644 --- a/mini-lsm/src/tests/week1_day3.rs +++ b/mini-lsm/src/tests/week1_day3.rs @@ -2,39 +2,48 @@ use std::sync::Arc; use bytes::Bytes; -use crate::block::{Block, BlockBuilder, BlockIterator}; +use crate::{ + block::{Block, BlockBuilder, BlockIterator}, + key::{KeySlice, KeyVec}, +}; #[test] fn test_block_build_single_key() { let mut builder = BlockBuilder::new(16); - assert!(builder.add(b"233", b"233333")); + assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"233"), b"233333")); builder.build(); } #[test] fn test_block_build_full() { let mut builder = BlockBuilder::new(16); - assert!(builder.add(b"11", b"11")); - assert!(!builder.add(b"22", b"22")); + assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"11")); + assert!(!builder.add(KeySlice::for_testing_from_slice_no_ts(b"22"), b"22")); builder.build(); } #[test] fn test_block_build_large_1() { let mut builder = BlockBuilder::new(16); - assert!(builder.add(b"11", &b"1".repeat(100))); + assert!(builder.add( + KeySlice::for_testing_from_slice_no_ts(b"11"), + &b"1".repeat(100) + )); builder.build(); } #[test] fn test_block_build_large_2() { let mut builder = BlockBuilder::new(16); - assert!(builder.add(b"11", b"1")); - assert!(!builder.add(b"11", &b"1".repeat(100))); + assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"1")); + assert!(!builder.add( + KeySlice::for_testing_from_slice_no_ts(b"11"), + &b"1".repeat(100) + )); } -fn key_of(idx: usize) -> Vec { - format!("key_{:03}", idx * 5).into_bytes() +fn key_of(idx: usize) -> KeyVec { + KeyVec::for_testing_from_vec_no_ts(format!("key_{:03}", idx * 5).into_bytes()) } fn value_of(idx: usize) -> Vec { @@ -50,7 +59,7 @@ fn generate_block() -> Block { for idx in 0..num_of_keys() { let key = key_of(idx); let value = value_of(idx); - assert!(builder.add(&key[..], &value[..])); + assert!(builder.add(key.as_key_slice(), &value[..])); } builder.build() } @@ -88,11 +97,11 @@ fn test_block_iterator() { let key = iter.key(); let value = iter.value(); assert_eq!( - key, - key_of(i), + key.for_testing_key_ref(), + key_of(i).for_testing_key_ref(), "expected key: {:?}, actual key: {:?}", - as_bytes(&key_of(i)), - as_bytes(key) + as_bytes(key_of(i).for_testing_key_ref()), + as_bytes(key.for_testing_key_ref()) ); assert_eq!( value, @@ -110,17 +119,17 @@ fn test_block_iterator() { #[test] fn test_block_seek_key() { let block = Arc::new(generate_block()); - let mut iter = BlockIterator::create_and_seek_to_key(block, &key_of(0)); + let mut iter = BlockIterator::create_and_seek_to_key(block, key_of(0).as_key_slice()); for offset in 1..=5 { for i in 0..num_of_keys() { let key = iter.key(); let value = iter.value(); assert_eq!( - key, - key_of(i), + key.for_testing_key_ref(), + key_of(i).for_testing_key_ref(), "expected key: {:?}, actual key: {:?}", - as_bytes(&key_of(i)), - as_bytes(key) + as_bytes(key_of(i).for_testing_key_ref()), + as_bytes(key.for_testing_key_ref()) ); assert_eq!( value, @@ -129,8 +138,10 @@ fn test_block_seek_key() { as_bytes(&value_of(i)), as_bytes(value) ); - iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes()); + iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts( + &format!("key_{:03}", i * 5 + offset).into_bytes(), + )); } - iter.seek_to_key(b"k"); + iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(b"k")); } } diff --git a/mini-lsm/src/tests/week1_day4.rs b/mini-lsm/src/tests/week1_day4.rs index f62765e..7f0cfd3 100644 --- a/mini-lsm/src/tests/week1_day4.rs +++ b/mini-lsm/src/tests/week1_day4.rs @@ -4,12 +4,13 @@ use bytes::Bytes; use tempfile::{tempdir, TempDir}; use crate::iterators::StorageIterator; +use crate::key::{KeySlice, KeyVec}; use crate::table::{SsTable, SsTableBuilder, SsTableIterator}; #[test] fn test_sst_build_single_key() { let mut builder = SsTableBuilder::new(16); - builder.add(b"233", b"233333"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"233"), b"233333"); let dir = tempdir().unwrap(); builder.build_for_test(dir.path().join("1.sst")).unwrap(); } @@ -17,19 +18,19 @@ fn test_sst_build_single_key() { #[test] fn test_sst_build_two_blocks() { let mut builder = SsTableBuilder::new(16); - builder.add(b"11", b"11"); - builder.add(b"22", b"22"); - builder.add(b"33", b"11"); - builder.add(b"44", b"22"); - builder.add(b"55", b"11"); - builder.add(b"66", b"22"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"11"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"22"), b"22"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"33"), b"11"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"44"), b"22"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"55"), b"11"); + builder.add(KeySlice::for_testing_from_slice_no_ts(b"66"), b"22"); assert!(builder.meta.len() >= 2); let dir = tempdir().unwrap(); builder.build_for_test(dir.path().join("1.sst")).unwrap(); } -fn key_of(idx: usize) -> Vec { - format!("key_{:03}", idx * 5).into_bytes() +fn key_of(idx: usize) -> KeyVec { + KeyVec::for_testing_from_vec_no_ts(format!("key_{:03}", idx * 5).into_bytes()) } fn value_of(idx: usize) -> Vec { @@ -45,7 +46,7 @@ fn generate_sst() -> (TempDir, SsTable) { for idx in 0..num_of_keys() { let key = key_of(idx); let value = value_of(idx); - builder.add(&key[..], &value[..]); + builder.add(key.as_key_slice(), &value[..]); } let dir = tempdir().unwrap(); let path = dir.path().join("1.sst"); @@ -63,8 +64,14 @@ fn test_sst_decode() { let meta = sst.block_meta.clone(); let new_sst = SsTable::open_for_test(sst.file).unwrap(); assert_eq!(new_sst.block_meta, meta); - assert_eq!(new_sst.first_key(), &key_of(0)); - assert_eq!(new_sst.last_key(), &key_of(num_of_keys() - 1)); + assert_eq!( + new_sst.first_key().for_testing_key_ref(), + key_of(0).for_testing_key_ref() + ); + assert_eq!( + new_sst.last_key().for_testing_key_ref(), + key_of(num_of_keys() - 1).for_testing_key_ref() + ); } fn as_bytes(x: &[u8]) -> Bytes { @@ -81,11 +88,11 @@ fn test_sst_iterator() { let key = iter.key(); let value = iter.value(); assert_eq!( - key, - key_of(i), + key.for_testing_key_ref(), + key_of(i).for_testing_key_ref(), "expected key: {:?}, actual key: {:?}", - as_bytes(&key_of(i)), - as_bytes(key) + as_bytes(key_of(i).for_testing_key_ref()), + as_bytes(key.for_testing_key_ref()) ); assert_eq!( value, @@ -104,17 +111,17 @@ fn test_sst_iterator() { fn test_sst_seek_key() { let (_dir, sst) = generate_sst(); let sst = Arc::new(sst); - let mut iter = SsTableIterator::create_and_seek_to_key(sst, &key_of(0)).unwrap(); + let mut iter = SsTableIterator::create_and_seek_to_key(sst, key_of(0).as_key_slice()).unwrap(); for offset in 1..=5 { for i in 0..num_of_keys() { let key = iter.key(); let value = iter.value(); assert_eq!( - key, - key_of(i), + key.for_testing_key_ref(), + key_of(i).for_testing_key_ref(), "expected key: {:?}, actual key: {:?}", - as_bytes(&key_of(i)), - as_bytes(key) + as_bytes(key_of(i).for_testing_key_ref()), + as_bytes(key.for_testing_key_ref()) ); assert_eq!( value, @@ -123,9 +130,12 @@ fn test_sst_seek_key() { as_bytes(&value_of(i)), as_bytes(value) ); - iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes()) - .unwrap(); + iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts( + &format!("key_{:03}", i * 5 + offset).into_bytes(), + )) + .unwrap(); } - iter.seek_to_key(b"k").unwrap(); + iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(b"k")) + .unwrap(); } } diff --git a/mini-lsm/src/tests/week1_day5.rs b/mini-lsm/src/tests/week1_day5.rs index 9f91f76..6e7dc40 100644 --- a/mini-lsm/src/tests/week1_day5.rs +++ b/mini-lsm/src/tests/week1_day5.rs @@ -1,7 +1,7 @@ use std::ops::Bound; -use self::harness::generate_sst; -use self::harness::{check_iter_result, MockIterator}; +use self::harness::{check_iter_result_by_key, MockIterator}; +use self::harness::{check_lsm_iter_result_by_key, generate_sst}; use bytes::Bytes; use tempfile::tempdir; @@ -25,7 +25,7 @@ fn test_task1_merge_1() { (Bytes::from("d"), Bytes::from("4.2")), ]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -50,7 +50,7 @@ fn test_task1_merge_2() { (Bytes::from("d"), Bytes::from("4.2")), ]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.2")), @@ -74,7 +74,7 @@ fn test_task1_merge_3() { (Bytes::from("d"), Bytes::from("4.2")), ]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("a"), Bytes::from("1.1")), @@ -94,7 +94,7 @@ fn test_task1_merge_4() { (Bytes::from("d"), Bytes::from("4.2")), ]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("b"), Bytes::from("2.2")), @@ -109,7 +109,7 @@ fn test_task1_merge_4() { (Bytes::from("d"), Bytes::from("4.2")), ]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from("b"), Bytes::from("2.2")), @@ -124,7 +124,7 @@ fn test_task1_merge_5() { let i2 = MockIterator::new(vec![]); let i1 = MockIterator::new(vec![]); let mut iter = TwoMergeIterator::create(i1, i2).unwrap(); - check_iter_result(&mut iter, vec![]) + check_iter_result_by_key(&mut iter, vec![]) } #[test] @@ -164,7 +164,7 @@ fn test_task2_storage_scan() { snapshot.sstables.insert(sst1.sst_id(), sst1.into()); *state = snapshot.into(); } - check_iter_result( + check_lsm_iter_result_by_key( &mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(), vec![ (Bytes::from("0"), Bytes::from("2333333")), @@ -173,13 +173,13 @@ fn test_task2_storage_scan() { (Bytes::from("3"), Bytes::from("23333")), ], ); - check_iter_result( + check_lsm_iter_result_by_key( &mut storage .scan(Bound::Included(b"1"), Bound::Included(b"2")) .unwrap(), vec![(Bytes::from("2"), Bytes::from("2333"))], ); - check_iter_result( + check_lsm_iter_result_by_key( &mut storage .scan(Bound::Excluded(b"1"), Bound::Excluded(b"3")) .unwrap(), diff --git a/mini-lsm/src/tests/week1_day6.rs b/mini-lsm/src/tests/week1_day6.rs index 0e8758a..969cd2f 100644 --- a/mini-lsm/src/tests/week1_day6.rs +++ b/mini-lsm/src/tests/week1_day6.rs @@ -3,7 +3,7 @@ use std::{ops::Bound, time::Duration}; use bytes::Bytes; use tempfile::tempdir; -use self::harness::{check_iter_result, sync}; +use self::harness::{check_lsm_iter_result_by_key, sync}; use super::*; use crate::{ @@ -41,7 +41,7 @@ fn test_task1_storage_scan() { assert_eq!(state.imm_memtables.len(), 2); } - check_iter_result( + check_lsm_iter_result_by_key( &mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(), vec![ (Bytes::from("0"), Bytes::from("2333333")), @@ -50,13 +50,13 @@ fn test_task1_storage_scan() { (Bytes::from("3"), Bytes::from("23333")), ], ); - check_iter_result( + check_lsm_iter_result_by_key( &mut storage .scan(Bound::Included(b"1"), Bound::Included(b"2")) .unwrap(), vec![(Bytes::from("2"), Bytes::from("2333"))], ); - check_iter_result( + check_lsm_iter_result_by_key( &mut storage .scan(Bound::Excluded(b"1"), Bound::Excluded(b"3")) .unwrap(), diff --git a/mini-lsm/src/tests/week1_day7.rs b/mini-lsm/src/tests/week1_day7.rs index 9bd1e64..ef29b6f 100644 --- a/mini-lsm/src/tests/week1_day7.rs +++ b/mini-lsm/src/tests/week1_day7.rs @@ -1,6 +1,9 @@ use tempfile::tempdir; -use crate::table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder}; +use crate::{ + key::KeySlice, + table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder}, +}; fn key_of(idx: usize) -> Vec { format!("key_{:010}", idx * 5).into_bytes() @@ -49,7 +52,7 @@ fn test_task2_sst_decode() { for idx in 0..num_of_keys() { let key = key_of(idx); let value = value_of(idx); - builder.add(&key[..], &value[..]); + builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]); } let dir = tempdir().unwrap(); let path = dir.path().join("1.sst"); @@ -67,7 +70,7 @@ fn test_task3_block_key_compression() { for idx in 0..num_of_keys() { let key = key_of(idx); let value = value_of(idx); - builder.add(&key[..], &value[..]); + builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]); } let dir = tempdir().unwrap(); let path = dir.path().join("1.sst"); diff --git a/mini-lsm/src/tests/week2_day1.rs b/mini-lsm/src/tests/week2_day1.rs index b64a37b..6820569 100644 --- a/mini-lsm/src/tests/week2_day1.rs +++ b/mini-lsm/src/tests/week2_day1.rs @@ -1,16 +1,15 @@ use std::{ops::Bound, path::Path, sync::Arc}; +use self::harness::{check_iter_result_by_key, check_lsm_iter_result_by_key, sync}; use bytes::Bytes; use tempfile::tempdir; -use week2_day1::harness::sync; - -use self::harness::check_iter_result; use super::*; use crate::{ iterators::{ concat_iterator::SstConcatIterator, merge_iterator::MergeIterator, StorageIterator, }, + key::KeySlice, lsm_storage::{LsmStorageInner, LsmStorageOptions, LsmStorageState}, table::{SsTable, SsTableBuilder, SsTableIterator}, }; @@ -51,7 +50,7 @@ fn test_task1_full_compaction() { sync(&storage); assert_eq!(storage.state.read().l0_sstables.len(), 3); let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from_static(b"0"), Bytes::from_static(b"")), @@ -62,7 +61,7 @@ fn test_task1_full_compaction() { storage.force_full_compaction().unwrap(); assert!(storage.state.read().l0_sstables.is_empty()); let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![(Bytes::from_static(b"1"), Bytes::from_static(b"v2"))], ); @@ -72,7 +71,7 @@ fn test_task1_full_compaction() { storage.delete(b"1").unwrap(); sync(&storage); let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from_static(b"0"), Bytes::from_static(b"v3")), @@ -83,7 +82,7 @@ fn test_task1_full_compaction() { storage.force_full_compaction().unwrap(); assert!(storage.state.read().l0_sstables.is_empty()); let mut iter = construct_merge_iterator_over_storage(&storage.state.read()); - check_iter_result( + check_iter_result_by_key( &mut iter, vec![ (Bytes::from_static(b"0"), Bytes::from_static(b"v3")), @@ -101,7 +100,10 @@ fn generate_concat_sst( let mut builder = SsTableBuilder::new(128); for idx in start_key..end_key { let key = format!("{:05}", idx); - builder.add(key.as_bytes(), b"test"); + builder.add( + KeySlice::for_testing_from_slice_no_ts(key.as_bytes()), + b"test", + ); } let path = dir.as_ref().join(format!("{id}.sst")); builder.build_for_test(path).unwrap() @@ -122,22 +124,25 @@ fn test_task2_concat_iterator() { for key in 0..120 { let iter = SstConcatIterator::create_and_seek_to_key( sstables.clone(), - format!("{:05}", key).as_bytes(), + KeySlice::for_testing_from_slice_no_ts(format!("{:05}", key).as_bytes()), ) .unwrap(); if key < 10 { assert!(iter.is_valid()); - assert_eq!(iter.key(), b"00010"); + assert_eq!(iter.key().for_testing_key_ref(), b"00010"); } else if key >= 110 { assert!(!iter.is_valid()); } else { assert!(iter.is_valid()); - assert_eq!(iter.key(), format!("{:05}", key).as_bytes()); + assert_eq!( + iter.key().for_testing_key_ref(), + format!("{:05}", key).as_bytes() + ); } } let iter = SstConcatIterator::create_and_seek_to_first(sstables.clone()).unwrap(); assert!(iter.is_valid()); - assert_eq!(iter.key(), b"00010"); + assert_eq!(iter.key().for_testing_key_ref(), b"00010"); } #[test] @@ -169,7 +174,7 @@ fn test_task3_integration() { assert!(storage.state.read().l0_sstables.is_empty()); assert!(!storage.state.read().levels[0].1.is_empty()); - check_iter_result( + check_lsm_iter_result_by_key( &mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(), vec![ (Bytes::from("0"), Bytes::from("2333333")),