add key abstraction and prepare for MVCC (#28)
* add key abstraction and prepare for MVCC Signed-off-by: Alex Chi <iskyzh@gmail.com> * a little bit type exercise Signed-off-by: Alex Chi <iskyzh@gmail.com> * refactor tests Signed-off-by: Alex Chi <iskyzh@gmail.com> * fix clippy warnings Signed-off-by: Alex Chi <iskyzh@gmail.com> * refactor starter code Signed-off-by: Alex Chi <iskyzh@gmail.com> * final touch docs Signed-off-by: Alex Chi <iskyzh@gmail.com> --------- Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
@@ -21,6 +21,8 @@ code .
|
||||
|
||||
## Install Tools
|
||||
|
||||
You will need the latest stable Rust to compile this project. The minimum requirement is `1.74`.
|
||||
|
||||
```
|
||||
cargo x install-tools
|
||||
```
|
||||
|
||||
@@ -2,130 +2,130 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm</loc>
|
||||
<lastmod>2024-01-21T13:41:44.657Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.231Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/00-get-started</loc>
|
||||
<lastmod>2024-01-21T13:41:44.659Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.234Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/00-overview</loc>
|
||||
<lastmod>2024-01-21T13:41:44.658Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.232Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/00-preface</loc>
|
||||
<lastmod>2024-01-21T13:41:44.656Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.230Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/00-v1</loc>
|
||||
<lastmod>2024-01-21T13:41:44.677Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.256Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/01-block</loc>
|
||||
<lastmod>2024-01-21T13:41:44.678Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.257Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/02-sst</loc>
|
||||
<lastmod>2024-01-21T13:41:44.679Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.258Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/03-memtable</loc>
|
||||
<lastmod>2024-01-21T13:41:44.680Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.259Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/04-engine</loc>
|
||||
<lastmod>2024-01-21T13:41:44.681Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.260Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/05-compaction</loc>
|
||||
<lastmod>2024-01-21T13:41:44.682Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.261Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/06-recovery</loc>
|
||||
<lastmod>2024-01-21T13:41:44.682Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.262Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/07-bloom-filter</loc>
|
||||
<lastmod>2024-01-21T13:41:44.683Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.263Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/08-key-compression</loc>
|
||||
<lastmod>2024-01-21T13:41:44.684Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.264Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/09-whats-next</loc>
|
||||
<lastmod>2024-01-21T13:41:44.685Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.265Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-01-memtable</loc>
|
||||
<lastmod>2024-01-21T13:41:44.661Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.237Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-02-merge-iterator</loc>
|
||||
<lastmod>2024-01-21T13:41:44.662Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.238Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-03-block</loc>
|
||||
<lastmod>2024-01-21T13:41:44.663Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.239Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-04-sst</loc>
|
||||
<lastmod>2024-01-21T13:41:44.664Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.240Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-05-read-path</loc>
|
||||
<lastmod>2024-01-21T13:41:44.665Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.242Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-06-write-path</loc>
|
||||
<lastmod>2024-01-21T13:41:44.666Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.243Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-07-sst-optimizations</loc>
|
||||
<lastmod>2024-01-21T13:41:44.668Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.244Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week1-overview</loc>
|
||||
<lastmod>2024-01-21T13:41:44.660Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.235Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-01-compaction</loc>
|
||||
<lastmod>2024-01-21T13:41:44.669Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.246Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-02-simple</loc>
|
||||
<lastmod>2024-01-21T13:41:44.670Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.248Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-03-tiered</loc>
|
||||
<lastmod>2024-01-21T13:41:44.671Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.249Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-04-leveled</loc>
|
||||
<lastmod>2024-01-21T13:41:44.672Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.250Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-05-manifest</loc>
|
||||
<lastmod>2024-01-21T13:41:44.672Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.251Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-06-wal</loc>
|
||||
<lastmod>2024-01-21T13:41:44.673Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.252Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-07-snacks</loc>
|
||||
<lastmod>2024-01-21T13:41:44.674Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.253Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week2-overview</loc>
|
||||
<lastmod>2024-01-21T13:41:44.669Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.245Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week3-overview</loc>
|
||||
<lastmod>2024-01-21T13:41:44.675Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.254Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://skyzh.github.io/mini-lsm/week4-overview</loc>
|
||||
<lastmod>2024-01-21T13:41:44.676Z</lastmod>
|
||||
<lastmod>2024-01-25T02:56:28.255Z</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
@@ -94,7 +94,9 @@ let Some(mut inner_iter) = self.iters.peek_mut() {
|
||||
|
||||
If `next` returns an error (i.e., due to disk failure, network failure, checksum error, etc.), it is no longer valid. However, when we go out of the if condition and return the error to the caller, `PeekMut`'s drop will try move the element within the heap, which causes an access to an invalid iterator. Therefore, you will need to do all error handling by yourself instead of using `?` within the scope of `PeekMut`.
|
||||
|
||||
We want to avoid dynamic dispatch as much as possible, and therefore we do not use `Box<dyn StorageIterator>` in the system. Instead, we prefer static dispatch using generics.
|
||||
We want to avoid dynamic dispatch as much as possible, and therefore we do not use `Box<dyn StorageIterator>` in the system. Instead, we prefer static dispatch using generics. Also note that `StorageIterator` uses generic associated type (GAT), so that it can support both `KeySlice` and `&[u8]` as the key type. We will change `KeySlice` to include the timestamp in week 3 and using a separate type for it now can make the transition more smooth.
|
||||
|
||||
Starting this section, we will use `Key<T>` to represent LSM key types and distinguish them from values in the type system. You should use provided APIs of `Key<T>` instead of directly accessing the inner value. We will add timestamp to this key type in part 3, and using the key abstraction will make the transition more smooth. For now, `KeySlice` is equivalent to `&[u8]`, `KeyVec` is equivalent to `Vec<u8>`, and `KeyBytes` is equivalent to `Bytes`.
|
||||
|
||||
## Task 3: LSM Iterator + Fused Iterator
|
||||
|
||||
|
||||
@@ -4,12 +4,13 @@ use wrapper::mini_lsm_wrapper;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use bytes::{Buf, BufMut, BytesMut};
|
||||
use clap::Parser;
|
||||
use mini_lsm_wrapper::compact::{
|
||||
LeveledCompactionController, LeveledCompactionOptions, SimpleLeveledCompactionController,
|
||||
SimpleLeveledCompactionOptions, TieredCompactionController, TieredCompactionOptions,
|
||||
};
|
||||
use mini_lsm_wrapper::key::KeyBytes;
|
||||
use mini_lsm_wrapper::lsm_storage::LsmStorageState;
|
||||
use mini_lsm_wrapper::mem_table::MemTable;
|
||||
use mini_lsm_wrapper::table::SsTable;
|
||||
@@ -135,11 +136,11 @@ impl MockStorage {
|
||||
"invalid file arrangement in L{}: id={}, range={:x}..={:x}; id={}, range={:x}..={:x}",
|
||||
level,
|
||||
this_file.sst_id(),
|
||||
this_file.first_key().clone().get_u64(),
|
||||
this_file.last_key().clone().get_u64(),
|
||||
this_file.first_key().for_testing_key_ref().get_u64(),
|
||||
this_file.last_key().for_testing_key_ref().get_u64(),
|
||||
next_file.sst_id(),
|
||||
next_file.first_key().clone().get_u64(),
|
||||
next_file.last_key().clone().get_u64()
|
||||
next_file.first_key().for_testing_key_ref().get_u64(),
|
||||
next_file.last_key().for_testing_key_ref().get_u64()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -184,7 +185,7 @@ impl MockStorage {
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_random_key_range() -> (Bytes, Bytes) {
|
||||
fn generate_random_key_range() -> (KeyBytes, KeyBytes) {
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
let begin: usize = rng.gen_range(0..(1 << 31));
|
||||
@@ -193,16 +194,19 @@ fn generate_random_key_range() -> (Bytes, Bytes) {
|
||||
let mut end_bytes = BytesMut::new();
|
||||
begin_bytes.put_u64(begin as u64);
|
||||
end_bytes.put_u64(end as u64);
|
||||
(begin_bytes.into(), end_bytes.into())
|
||||
(
|
||||
KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()),
|
||||
KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()),
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_random_split(
|
||||
mut begin_bytes: Bytes,
|
||||
mut end_bytes: Bytes,
|
||||
begin_bytes: KeyBytes,
|
||||
end_bytes: KeyBytes,
|
||||
split: usize,
|
||||
) -> Vec<(Bytes, Bytes)> {
|
||||
let begin = begin_bytes.get_u64();
|
||||
let end = end_bytes.get_u64();
|
||||
) -> Vec<(KeyBytes, KeyBytes)> {
|
||||
let begin = begin_bytes.for_testing_key_ref().get_u64();
|
||||
let end = end_bytes.for_testing_key_ref().get_u64();
|
||||
let len = end - begin + 1;
|
||||
let mut result = Vec::new();
|
||||
let split = split as u64;
|
||||
@@ -214,7 +218,10 @@ fn generate_random_split(
|
||||
let mut end_bytes = BytesMut::new();
|
||||
begin_bytes.put_u64(nb);
|
||||
end_bytes.put_u64(ne);
|
||||
result.push((begin_bytes.into(), end_bytes.into()));
|
||||
result.push((
|
||||
KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()),
|
||||
KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()),
|
||||
));
|
||||
}
|
||||
result
|
||||
}
|
||||
@@ -502,8 +509,14 @@ fn main() {
|
||||
.map(|id| format!(
|
||||
"{}.sst {:x}..={:x}",
|
||||
id,
|
||||
storage.snapshot.sstables[id].first_key().clone().get_u64(),
|
||||
storage.snapshot.sstables[id].last_key().clone().get_u64()
|
||||
storage.snapshot.sstables[id]
|
||||
.first_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64(),
|
||||
storage.snapshot.sstables[id]
|
||||
.last_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64()
|
||||
))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
@@ -516,8 +529,14 @@ fn main() {
|
||||
.map(|id| format!(
|
||||
"{}.sst {:x}..={:x}",
|
||||
id,
|
||||
storage.snapshot.sstables[id].first_key().clone().get_u64(),
|
||||
storage.snapshot.sstables[id].last_key().clone().get_u64()
|
||||
storage.snapshot.sstables[id]
|
||||
.first_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64(),
|
||||
storage.snapshot.sstables[id]
|
||||
.last_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64()
|
||||
))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
@@ -529,8 +548,14 @@ fn main() {
|
||||
.map(|id| format!(
|
||||
"{}.sst {:x}..={:x}",
|
||||
id,
|
||||
storage.snapshot.sstables[id].first_key().clone().get_u64(),
|
||||
storage.snapshot.sstables[id].last_key().clone().get_u64()
|
||||
storage.snapshot.sstables[id]
|
||||
.first_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64(),
|
||||
storage.snapshot.sstables[id]
|
||||
.last_key()
|
||||
.for_testing_key_ref()
|
||||
.get_u64()
|
||||
))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
|
||||
use super::Block;
|
||||
|
||||
/// Builds a block.
|
||||
pub struct BlockBuilder {}
|
||||
pub struct BlockBuilder {
|
||||
/// Offsets of each key-value entries.
|
||||
offsets: Vec<u16>,
|
||||
/// All serialized key-value pairs in the block.
|
||||
data: Vec<u8>,
|
||||
/// The expected block size.
|
||||
block_size: usize,
|
||||
/// The first key in the block
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
impl BlockBuilder {
|
||||
/// Creates a new block builder.
|
||||
@@ -14,7 +25,7 @@ impl BlockBuilder {
|
||||
|
||||
/// Adds a key-value pair to the block. Returns false when the block is full.
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
|
||||
use super::Block;
|
||||
|
||||
/// Iterates on a block.
|
||||
@@ -10,20 +12,23 @@ pub struct BlockIterator {
|
||||
/// The internal `Block`, wrapped by an `Arc`
|
||||
block: Arc<Block>,
|
||||
/// The current key, empty represents the iterator is invalid
|
||||
key: Vec<u8>,
|
||||
key: KeyVec,
|
||||
/// the value range from the block
|
||||
value_range: (usize, usize),
|
||||
/// Current index of the key-value pair, should be in range of [0, num_of_elements)
|
||||
idx: usize,
|
||||
/// The first key in the block
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
impl BlockIterator {
|
||||
fn new(block: Arc<Block>) -> Self {
|
||||
Self {
|
||||
block,
|
||||
key: Vec::new(),
|
||||
key: KeyVec::new(),
|
||||
value_range: (0, 0),
|
||||
idx: 0,
|
||||
first_key: KeyVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,12 +38,12 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
/// Creates a block iterator and seek to the first key that >= `key`.
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: KeySlice) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Returns the key of the current entry.
|
||||
pub fn key(&self) -> &[u8] {
|
||||
pub fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -66,7 +71,7 @@ impl BlockIterator {
|
||||
/// Seek to the first key that >= `key`.
|
||||
/// Note: You should assume the key-value pairs in the block are sorted when being added by
|
||||
/// callers.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,11 +3,15 @@ pub mod merge_iterator;
|
||||
pub mod two_merge_iterator;
|
||||
|
||||
pub trait StorageIterator {
|
||||
type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Get the current value.
|
||||
fn value(&self) -> &[u8];
|
||||
|
||||
/// Get the current key.
|
||||
fn key(&self) -> &[u8];
|
||||
fn key(&self) -> Self::KeyType<'_>;
|
||||
|
||||
/// Check if the current iterator is valid.
|
||||
fn is_valid(&self) -> bool;
|
||||
|
||||
@@ -6,7 +6,10 @@ use std::sync::Arc;
|
||||
use anyhow::Result;
|
||||
|
||||
use super::StorageIterator;
|
||||
use crate::table::{SsTable, SsTableIterator};
|
||||
use crate::{
|
||||
key::KeySlice,
|
||||
table::{SsTable, SsTableIterator},
|
||||
};
|
||||
|
||||
/// Concat multiple iterators ordered in key order and their key ranges do not overlap. We do not want to create the
|
||||
/// iterators when initializing this iterator to reduce the overhead of seeking.
|
||||
@@ -21,13 +24,15 @@ impl SstConcatIterator {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: &[u8]) -> Result<Self> {
|
||||
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: KeySlice) -> Result<Self> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageIterator for SstConcatIterator {
|
||||
fn key(&self) -> &[u8] {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@ use std::collections::BinaryHeap;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::key::KeySlice;
|
||||
|
||||
use super::StorageIterator;
|
||||
|
||||
struct HeapWrapper<I: StorageIterator>(pub usize, pub Box<I>);
|
||||
@@ -21,7 +23,7 @@ impl<I: StorageIterator> Eq for HeapWrapper<I> {}
|
||||
impl<I: StorageIterator> PartialOrd for HeapWrapper<I> {
|
||||
#[allow(clippy::non_canonical_partial_ord_impl)]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
||||
match self.1.key().cmp(other.1.key()) {
|
||||
match self.1.key().cmp(&other.1.key()) {
|
||||
cmp::Ordering::Greater => Some(cmp::Ordering::Greater),
|
||||
cmp::Ordering::Less => Some(cmp::Ordering::Less),
|
||||
cmp::Ordering::Equal => self.0.partial_cmp(&other.0),
|
||||
@@ -49,8 +51,12 @@ impl<I: StorageIterator> MergeIterator<I> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> StorageIterator for MergeIterator<I> {
|
||||
fn key(&self) -> &[u8] {
|
||||
impl<I: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>> StorageIterator
|
||||
for MergeIterator<I>
|
||||
{
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::key::KeySlice;
|
||||
|
||||
use super::StorageIterator;
|
||||
|
||||
/// Merges two iterators of different types into one. If the two iterators have the same key, only
|
||||
@@ -13,14 +15,24 @@ pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
|
||||
// Add fields as need
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
|
||||
impl<
|
||||
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
> TwoMergeIterator<A, B>
|
||||
{
|
||||
pub fn create(a: A, b: B) -> Result<Self> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
|
||||
fn key(&self) -> &[u8] {
|
||||
impl<
|
||||
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
> StorageIterator for TwoMergeIterator<A, B>
|
||||
{
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
159
mini-lsm-starter/src/key.rs
Normal file
159
mini-lsm-starter/src/key.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
use std::fmt::Debug;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
pub struct Key<T: AsRef<[u8]>>(T);
|
||||
|
||||
pub type KeySlice<'a> = Key<&'a [u8]>;
|
||||
pub type KeyVec = Key<Vec<u8>>;
|
||||
pub type KeyBytes = Key<Bytes>;
|
||||
|
||||
impl<T: AsRef<[u8]>> Key<T> {
|
||||
pub fn into_inner(self) -> T {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.as_ref().len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.as_ref().is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Key<Vec<u8>> {
|
||||
pub fn new() -> Self {
|
||||
Self(Vec::new())
|
||||
}
|
||||
|
||||
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
|
||||
pub fn from_vec(key: Vec<u8>) -> Self {
|
||||
Self(key)
|
||||
}
|
||||
|
||||
/// Clears the key and set ts to 0.
|
||||
pub fn clear(&mut self) {
|
||||
self.0.clear()
|
||||
}
|
||||
|
||||
/// Append a slice to the end of the key
|
||||
pub fn append(&mut self, data: &[u8]) {
|
||||
self.0.extend(data)
|
||||
}
|
||||
|
||||
/// Set the key from a slice without re-allocating. The signature will change in week 3.
|
||||
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
|
||||
self.0.clear();
|
||||
self.0.extend(key_slice.0);
|
||||
}
|
||||
|
||||
pub fn as_key_slice(&self) -> KeySlice {
|
||||
Key(self.0.as_slice())
|
||||
}
|
||||
|
||||
pub fn into_key_bytes(self) -> KeyBytes {
|
||||
Key(self.0.into())
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
|
||||
Self(key)
|
||||
}
|
||||
}
|
||||
|
||||
impl Key<Bytes> {
|
||||
pub fn as_key_slice(&self) -> KeySlice {
|
||||
Key(&self.0)
|
||||
}
|
||||
|
||||
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
|
||||
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
|
||||
Key(bytes)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
|
||||
Key(bytes)
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Key<&'a [u8]> {
|
||||
pub fn to_key_vec(self) -> KeyVec {
|
||||
Key(self.0.to_vec())
|
||||
}
|
||||
|
||||
/// Create a key slice from a slice. Will be removed in week 3.
|
||||
pub fn from_slice(slice: &'a [u8]) -> Self {
|
||||
Self(slice)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
|
||||
Self(slice)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Default> Default for Key<T> {
|
||||
fn default() -> Self {
|
||||
Self(T::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + PartialEq> PartialEq for Key<T> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.eq(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Eq> Eq for Key<T> {}
|
||||
|
||||
impl<T: AsRef<[u8]> + Clone> Clone for Key<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self(self.0.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Copy> Copy for Key<T> {}
|
||||
|
||||
impl<T: AsRef<[u8]> + PartialOrd> PartialOrd for Key<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.0.partial_cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Ord> Ord for Key<T> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.0.cmp(&other.0)
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ pub mod block;
|
||||
pub mod compact;
|
||||
pub mod debug;
|
||||
pub mod iterators;
|
||||
pub mod key;
|
||||
pub mod lsm_iterator;
|
||||
pub mod lsm_storage;
|
||||
pub mod manifest;
|
||||
|
||||
@@ -22,6 +22,8 @@ impl LsmIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for LsmIterator {
|
||||
type KeyType<'a> = &'a [u8];
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
@@ -53,11 +55,13 @@ impl<I: StorageIterator> FusedIterator<I> {
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> StorageIterator for FusedIterator<I> {
|
||||
type KeyType<'a> = I::KeyType<'a> where Self: 'a;
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> Self::KeyType<'_> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ use crossbeam_skiplist::SkipMap;
|
||||
use ouroboros::self_referencing;
|
||||
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
use crate::table::SsTableBuilder;
|
||||
use crate::wal::Wal;
|
||||
|
||||
@@ -115,11 +116,13 @@ pub struct MemTableIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for MemTableIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -11,10 +11,11 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
pub use builder::SsTableBuilder;
|
||||
use bytes::{Buf, Bytes};
|
||||
use bytes::Buf;
|
||||
pub use iterator::SsTableIterator;
|
||||
|
||||
use crate::block::Block;
|
||||
use crate::key::{KeyBytes, KeySlice};
|
||||
use crate::lsm_storage::BlockCache;
|
||||
|
||||
use self::bloom::Bloom;
|
||||
@@ -24,9 +25,9 @@ pub struct BlockMeta {
|
||||
/// Offset of this data block.
|
||||
pub offset: usize,
|
||||
/// The first key of the data block.
|
||||
pub first_key: Bytes,
|
||||
pub first_key: KeyBytes,
|
||||
/// The last key of the data block.
|
||||
pub last_key: Bytes,
|
||||
pub last_key: KeyBytes,
|
||||
}
|
||||
|
||||
impl BlockMeta {
|
||||
@@ -92,8 +93,8 @@ pub struct SsTable {
|
||||
pub(crate) block_meta_offset: usize,
|
||||
id: usize,
|
||||
block_cache: Option<Arc<BlockCache>>,
|
||||
first_key: Bytes,
|
||||
last_key: Bytes,
|
||||
first_key: KeyBytes,
|
||||
last_key: KeyBytes,
|
||||
pub(crate) bloom: Option<Bloom>,
|
||||
}
|
||||
|
||||
@@ -109,7 +110,12 @@ impl SsTable {
|
||||
}
|
||||
|
||||
/// Create a mock SST with only first key + last key metadata
|
||||
pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self {
|
||||
pub fn create_meta_only(
|
||||
id: usize,
|
||||
file_size: u64,
|
||||
first_key: KeyBytes,
|
||||
last_key: KeyBytes,
|
||||
) -> Self {
|
||||
Self {
|
||||
file: FileObject(None, file_size),
|
||||
block_meta: vec![],
|
||||
@@ -135,7 +141,7 @@ impl SsTable {
|
||||
/// Find the block that may contain `key`.
|
||||
/// Note: You may want to make use of the `first_key` stored in `BlockMeta`.
|
||||
/// You may also assume the key-value pairs stored in each consecutive block are sorted.
|
||||
pub fn find_block_idx(&self, key: &[u8]) -> usize {
|
||||
pub fn find_block_idx(&self, key: KeySlice) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -144,11 +150,11 @@ impl SsTable {
|
||||
self.block_meta.len()
|
||||
}
|
||||
|
||||
pub fn first_key(&self) -> &Bytes {
|
||||
pub fn first_key(&self) -> &KeyBytes {
|
||||
&self.first_key
|
||||
}
|
||||
|
||||
pub fn last_key(&self) -> &Bytes {
|
||||
pub fn last_key(&self) -> &KeyBytes {
|
||||
&self.last_key
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::sync::Arc;
|
||||
use anyhow::Result;
|
||||
|
||||
use super::{BlockMeta, SsTable};
|
||||
use crate::{block::BlockBuilder, lsm_storage::BlockCache};
|
||||
use crate::{block::BlockBuilder, key::KeySlice, lsm_storage::BlockCache};
|
||||
|
||||
/// Builds an SSTable from key-value pairs.
|
||||
pub struct SsTableBuilder {
|
||||
@@ -29,7 +29,7 @@ impl SsTableBuilder {
|
||||
///
|
||||
/// Note: You should split a new block when the current block is full.(`std::mem::replace` may
|
||||
/// be helpful here)
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) {
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
use anyhow::Result;
|
||||
|
||||
use super::SsTable;
|
||||
use crate::{block::BlockIterator, iterators::StorageIterator};
|
||||
use crate::{block::BlockIterator, iterators::StorageIterator, key::KeySlice};
|
||||
|
||||
/// An iterator over the contents of an SSTable.
|
||||
pub struct SsTableIterator {
|
||||
@@ -27,21 +27,23 @@ impl SsTableIterator {
|
||||
}
|
||||
|
||||
/// Create a new iterator and seek to the first key-value pair which >= `key`.
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Seek to the first key-value pair which >= `key`.
|
||||
/// Note: You probably want to review the handout for detailed explanation when implementing
|
||||
/// this function.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageIterator for SsTableIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
/// Return the `key` that's held by the underlying block iterator.
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> KeySlice {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
|
||||
//! DO NOT MODIFY -- Mini-LSM tests modules
|
||||
//! This file will be automatically rewritten by the copy-test command.
|
||||
|
||||
0
mini-lsm-starter/src/tests/.gitkeep
Normal file
0
mini-lsm-starter/src/tests/.gitkeep
Normal file
@@ -1,5 +1,7 @@
|
||||
use bytes::BufMut;
|
||||
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
|
||||
use super::{Block, SIZEOF_U16};
|
||||
|
||||
/// Builds a block.
|
||||
@@ -11,16 +13,16 @@ pub struct BlockBuilder {
|
||||
/// The expected block size.
|
||||
block_size: usize,
|
||||
/// The first key in the block
|
||||
first_key: Vec<u8>,
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
fn compute_overlap(first_key: &[u8], key: &[u8]) -> usize {
|
||||
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
|
||||
let mut i = 0;
|
||||
loop {
|
||||
if i >= first_key.len() || i >= key.len() {
|
||||
break;
|
||||
}
|
||||
if first_key[i] != key[i] {
|
||||
if first_key.raw_ref()[i] != key.raw_ref()[i] {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
@@ -35,7 +37,7 @@ impl BlockBuilder {
|
||||
offsets: Vec::new(),
|
||||
data: Vec::new(),
|
||||
block_size,
|
||||
first_key: Vec::new(),
|
||||
first_key: KeyVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,7 +48,7 @@ impl BlockBuilder {
|
||||
|
||||
/// Adds a key-value pair to the block. Returns false when the block is full.
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
|
||||
assert!(!key.is_empty(), "key must not be empty");
|
||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
|
||||
&& !self.is_empty()
|
||||
@@ -55,20 +57,20 @@ impl BlockBuilder {
|
||||
}
|
||||
// Add the offset of the data into the offset array.
|
||||
self.offsets.push(self.data.len() as u16);
|
||||
let overlap = compute_overlap(&self.first_key, key);
|
||||
let overlap = compute_overlap(self.first_key.as_key_slice(), key);
|
||||
// Encode key overlap.
|
||||
self.data.put_u16(overlap as u16);
|
||||
// Encode key length.
|
||||
self.data.put_u16((key.len() - overlap) as u16);
|
||||
// Encode key content.
|
||||
self.data.put(&key[overlap..]);
|
||||
self.data.put(&key.raw_ref()[overlap..]);
|
||||
// Encode value length.
|
||||
self.data.put_u16(value.len() as u16);
|
||||
// Encode value content.
|
||||
self.data.put(value);
|
||||
|
||||
if self.first_key.is_empty() {
|
||||
self.first_key = key.to_vec();
|
||||
self.first_key = key.to_key_vec();
|
||||
}
|
||||
|
||||
true
|
||||
|
||||
@@ -2,7 +2,10 @@ use std::sync::Arc;
|
||||
|
||||
use bytes::Buf;
|
||||
|
||||
use crate::block::SIZEOF_U16;
|
||||
use crate::{
|
||||
block::SIZEOF_U16,
|
||||
key::{KeySlice, KeyVec},
|
||||
};
|
||||
|
||||
use super::Block;
|
||||
|
||||
@@ -11,22 +14,22 @@ pub struct BlockIterator {
|
||||
/// reference to the block
|
||||
block: Arc<Block>,
|
||||
/// the current key at the iterator position
|
||||
key: Vec<u8>,
|
||||
key: KeyVec,
|
||||
/// the value range from the block
|
||||
value_range: (usize, usize),
|
||||
/// the current index at the iterator position
|
||||
idx: usize,
|
||||
/// the first key in the block
|
||||
first_key: Vec<u8>,
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
impl Block {
|
||||
fn get_first_key(&self) -> Vec<u8> {
|
||||
fn get_first_key(&self) -> KeyVec {
|
||||
let mut buf = &self.data[..];
|
||||
buf.get_u16();
|
||||
let key_len = buf.get_u16();
|
||||
let key = &buf[..key_len as usize];
|
||||
key.to_vec()
|
||||
KeyVec::from_vec(key.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +38,7 @@ impl BlockIterator {
|
||||
Self {
|
||||
first_key: block.get_first_key(),
|
||||
block,
|
||||
key: Vec::new(),
|
||||
key: KeyVec::new(),
|
||||
value_range: (0, 0),
|
||||
idx: 0,
|
||||
}
|
||||
@@ -49,16 +52,16 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
/// Creates a block iterator and seek to the first key that >= `key`.
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: KeySlice) -> Self {
|
||||
let mut iter = Self::new(block);
|
||||
iter.seek_to_key(key);
|
||||
iter
|
||||
}
|
||||
|
||||
/// Returns the key of the current entry.
|
||||
pub fn key(&self) -> &[u8] {
|
||||
pub fn key(&self) -> KeySlice {
|
||||
debug_assert!(!self.key.is_empty(), "invalid iterator");
|
||||
&self.key
|
||||
self.key.as_key_slice()
|
||||
}
|
||||
|
||||
/// Returns the value of the current entry.
|
||||
@@ -103,11 +106,11 @@ impl BlockIterator {
|
||||
// we don't need to manually advance it
|
||||
let overlap_len = entry.get_u16() as usize;
|
||||
let key_len = entry.get_u16() as usize;
|
||||
let key = entry[..key_len].to_vec();
|
||||
entry.advance(key_len);
|
||||
let key = &entry[..key_len];
|
||||
self.key.clear();
|
||||
self.key.extend(&self.first_key[..overlap_len]);
|
||||
self.key.extend(key);
|
||||
self.key.append(&self.first_key.raw_ref()[..overlap_len]);
|
||||
self.key.append(key);
|
||||
entry.advance(key_len);
|
||||
let value_len = entry.get_u16() as usize;
|
||||
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16;
|
||||
let value_offset_end = value_offset_begin + value_len;
|
||||
@@ -116,14 +119,14 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
/// Seek to the first key that is >= `key`.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) {
|
||||
let mut low = 0;
|
||||
let mut high = self.block.offsets.len();
|
||||
while low < high {
|
||||
let mid = low + (high - low) / 2;
|
||||
self.seek_to(mid);
|
||||
assert!(self.is_valid());
|
||||
match self.key().cmp(key) {
|
||||
match self.key().cmp(&key) {
|
||||
std::cmp::Ordering::Less => low = mid + 1,
|
||||
std::cmp::Ordering::Greater => high = mid,
|
||||
std::cmp::Ordering::Equal => return,
|
||||
|
||||
@@ -18,6 +18,7 @@ use crate::iterators::concat_iterator::SstConcatIterator;
|
||||
use crate::iterators::merge_iterator::MergeIterator;
|
||||
use crate::iterators::two_merge_iterator::TwoMergeIterator;
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
use crate::lsm_storage::{LsmStorageInner, LsmStorageState};
|
||||
use crate::manifest::ManifestRecord;
|
||||
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
|
||||
@@ -112,7 +113,7 @@ pub enum CompactionOptions {
|
||||
impl LsmStorageInner {
|
||||
fn compact_generate_sst_from_iter(
|
||||
&self,
|
||||
mut iter: impl StorageIterator,
|
||||
mut iter: impl for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
compact_to_bottom_level: bool,
|
||||
) -> Result<Vec<Arc<SsTable>>> {
|
||||
let mut builder = None;
|
||||
|
||||
@@ -3,11 +3,15 @@ pub mod merge_iterator;
|
||||
pub mod two_merge_iterator;
|
||||
|
||||
pub trait StorageIterator {
|
||||
type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Get the current value.
|
||||
fn value(&self) -> &[u8];
|
||||
|
||||
/// Get the current key.
|
||||
fn key(&self) -> &[u8];
|
||||
fn key(&self) -> Self::KeyType<'_>;
|
||||
|
||||
/// Check if the current iterator is valid.
|
||||
fn is_valid(&self) -> bool;
|
||||
|
||||
@@ -2,7 +2,10 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::table::{SsTable, SsTableIterator};
|
||||
use crate::{
|
||||
key::KeySlice,
|
||||
table::{SsTable, SsTableIterator},
|
||||
};
|
||||
|
||||
use super::StorageIterator;
|
||||
|
||||
@@ -46,10 +49,10 @@ impl SstConcatIterator {
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: &[u8]) -> Result<Self> {
|
||||
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: KeySlice) -> Result<Self> {
|
||||
Self::check_sst_valid(&sstables);
|
||||
let idx: usize = sstables
|
||||
.partition_point(|table| table.first_key() <= key)
|
||||
.partition_point(|table| table.first_key().as_key_slice() <= key)
|
||||
.saturating_sub(1);
|
||||
if idx >= sstables.len() {
|
||||
return Ok(Self {
|
||||
@@ -89,7 +92,9 @@ impl SstConcatIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for SstConcatIterator {
|
||||
fn key(&self) -> &[u8] {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
self.current.as_ref().unwrap().key()
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ use std::collections::BinaryHeap;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::key::KeySlice;
|
||||
|
||||
use super::StorageIterator;
|
||||
|
||||
struct HeapWrapper<I: StorageIterator>(pub usize, pub Box<I>);
|
||||
@@ -19,7 +21,7 @@ impl<I: StorageIterator> Eq for HeapWrapper<I> {}
|
||||
impl<I: StorageIterator> PartialOrd for HeapWrapper<I> {
|
||||
#[allow(clippy::non_canonical_partial_ord_impl)]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
||||
match self.1.key().cmp(other.1.key()) {
|
||||
match self.1.key().cmp(&other.1.key()) {
|
||||
cmp::Ordering::Greater => Some(cmp::Ordering::Greater),
|
||||
cmp::Ordering::Less => Some(cmp::Ordering::Less),
|
||||
cmp::Ordering::Equal => self.0.partial_cmp(&other.0),
|
||||
@@ -75,8 +77,12 @@ impl<I: StorageIterator> MergeIterator<I> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> StorageIterator for MergeIterator<I> {
|
||||
fn key(&self) -> &[u8] {
|
||||
impl<I: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>> StorageIterator
|
||||
for MergeIterator<I>
|
||||
{
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
self.current.as_ref().unwrap().1.key()
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::key::KeySlice;
|
||||
|
||||
use super::StorageIterator;
|
||||
|
||||
/// Merges two iterators of different types into one. If the two iterators have the same key, only
|
||||
@@ -10,7 +12,11 @@ pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
|
||||
choose_a: bool,
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
|
||||
impl<
|
||||
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
> TwoMergeIterator<A, B>
|
||||
{
|
||||
fn choose_a(a: &A, b: &B) -> bool {
|
||||
if !a.is_valid() {
|
||||
return false;
|
||||
@@ -40,8 +46,14 @@ impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
|
||||
fn key(&self) -> &[u8] {
|
||||
impl<
|
||||
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
> StorageIterator for TwoMergeIterator<A, B>
|
||||
{
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
if self.choose_a {
|
||||
self.a.key()
|
||||
} else {
|
||||
|
||||
159
mini-lsm/src/key.rs
Normal file
159
mini-lsm/src/key.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
use std::fmt::Debug;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
pub struct Key<T: AsRef<[u8]>>(T);
|
||||
|
||||
pub type KeySlice<'a> = Key<&'a [u8]>;
|
||||
pub type KeyVec = Key<Vec<u8>>;
|
||||
pub type KeyBytes = Key<Bytes>;
|
||||
|
||||
impl<T: AsRef<[u8]>> Key<T> {
|
||||
pub fn into_inner(self) -> T {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.as_ref().len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.as_ref().is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Key<Vec<u8>> {
|
||||
pub fn new() -> Self {
|
||||
Self(Vec::new())
|
||||
}
|
||||
|
||||
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
|
||||
pub fn from_vec(key: Vec<u8>) -> Self {
|
||||
Self(key)
|
||||
}
|
||||
|
||||
/// Clears the key and set ts to 0.
|
||||
pub fn clear(&mut self) {
|
||||
self.0.clear()
|
||||
}
|
||||
|
||||
/// Append a slice to the end of the key
|
||||
pub fn append(&mut self, data: &[u8]) {
|
||||
self.0.extend(data)
|
||||
}
|
||||
|
||||
/// Set the key from a slice without re-allocating. The signature will change in week 3.
|
||||
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
|
||||
self.0.clear();
|
||||
self.0.extend(key_slice.0);
|
||||
}
|
||||
|
||||
pub fn as_key_slice(&self) -> KeySlice {
|
||||
Key(self.0.as_slice())
|
||||
}
|
||||
|
||||
pub fn into_key_bytes(self) -> KeyBytes {
|
||||
Key(self.0.into())
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
|
||||
Self(key)
|
||||
}
|
||||
}
|
||||
|
||||
impl Key<Bytes> {
|
||||
pub fn as_key_slice(&self) -> KeySlice {
|
||||
Key(&self.0)
|
||||
}
|
||||
|
||||
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
|
||||
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
|
||||
Key(bytes)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
|
||||
Key(bytes)
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Key<&'a [u8]> {
|
||||
pub fn to_key_vec(self) -> KeyVec {
|
||||
Key(self.0.to_vec())
|
||||
}
|
||||
|
||||
/// Create a key slice from a slice. Will be removed in week 3.
|
||||
pub fn from_slice(slice: &'a [u8]) -> Self {
|
||||
Self(slice)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
|
||||
Self(slice)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Default> Default for Key<T> {
|
||||
fn default() -> Self {
|
||||
Self(T::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + PartialEq> PartialEq for Key<T> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.eq(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Eq> Eq for Key<T> {}
|
||||
|
||||
impl<T: AsRef<[u8]> + Clone> Clone for Key<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self(self.0.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Copy> Copy for Key<T> {}
|
||||
|
||||
impl<T: AsRef<[u8]> + PartialOrd> PartialOrd for Key<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.0.partial_cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]> + Ord> Ord for Key<T> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.0.cmp(&other.0)
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ pub mod block;
|
||||
pub mod compact;
|
||||
pub mod debug;
|
||||
pub mod iterators;
|
||||
pub mod key;
|
||||
pub mod lsm_iterator;
|
||||
pub mod lsm_storage;
|
||||
pub mod manifest;
|
||||
|
||||
@@ -41,8 +41,8 @@ impl LsmIterator {
|
||||
}
|
||||
match self.end_bound.as_ref() {
|
||||
Bound::Unbounded => {}
|
||||
Bound::Included(key) => self.is_valid = self.inner.key() <= key.as_ref(),
|
||||
Bound::Excluded(key) => self.is_valid = self.inner.key() < key.as_ref(),
|
||||
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(),
|
||||
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -56,12 +56,14 @@ impl LsmIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for LsmIterator {
|
||||
type KeyType<'a> = &'a [u8];
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
self.is_valid
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
self.inner.key()
|
||||
self.inner.key().raw_ref()
|
||||
}
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
@@ -97,11 +99,13 @@ impl<I: StorageIterator> FusedIterator<I> {
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> StorageIterator for FusedIterator<I> {
|
||||
type KeyType<'a> = I::KeyType<'a> where Self: 'a;
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
!self.has_errored && self.iter.is_valid()
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> Self::KeyType<'_> {
|
||||
if self.has_errored || !self.iter.is_valid() {
|
||||
panic!("invalid access to the underlying iterator");
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use crate::iterators::concat_iterator::SstConcatIterator;
|
||||
use crate::iterators::merge_iterator::MergeIterator;
|
||||
use crate::iterators::two_merge_iterator::TwoMergeIterator;
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
use crate::lsm_iterator::{FusedIterator, LsmIterator};
|
||||
use crate::manifest::{Manifest, ManifestRecord};
|
||||
use crate::mem_table::{map_bound, MemTable};
|
||||
@@ -98,23 +99,23 @@ impl LsmStorageOptions {
|
||||
fn range_overlap(
|
||||
user_begin: Bound<&[u8]>,
|
||||
user_end: Bound<&[u8]>,
|
||||
table_begin: &[u8],
|
||||
table_end: &[u8],
|
||||
table_begin: KeySlice,
|
||||
table_end: KeySlice,
|
||||
) -> bool {
|
||||
match user_end {
|
||||
Bound::Excluded(key) if key <= table_begin => {
|
||||
Bound::Excluded(key) if key <= table_begin.raw_ref() => {
|
||||
return false;
|
||||
}
|
||||
Bound::Included(key) if key < table_begin => {
|
||||
Bound::Included(key) if key < table_begin.raw_ref() => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
match user_begin {
|
||||
Bound::Excluded(key) if key >= table_end => {
|
||||
Bound::Excluded(key) if key >= table_end.raw_ref() => {
|
||||
return false;
|
||||
}
|
||||
Bound::Included(key) if key > table_end => {
|
||||
Bound::Included(key) if key > table_end.raw_ref() => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
@@ -122,8 +123,8 @@ fn range_overlap(
|
||||
true
|
||||
}
|
||||
|
||||
fn key_within(user_key: &[u8], table_begin: &[u8], table_end: &[u8]) -> bool {
|
||||
table_begin <= user_key && user_key <= table_end
|
||||
fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool {
|
||||
table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref()
|
||||
}
|
||||
|
||||
/// The storage interface of the LSM tree.
|
||||
@@ -425,7 +426,11 @@ impl LsmStorageInner {
|
||||
let mut l0_iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
||||
|
||||
let keep_table = |key: &[u8], table: &SsTable| {
|
||||
if key_within(key, table.first_key(), table.last_key()) {
|
||||
if key_within(
|
||||
key,
|
||||
table.first_key().as_key_slice(),
|
||||
table.last_key().as_key_slice(),
|
||||
) {
|
||||
if let Some(bloom) = &table.bloom {
|
||||
if bloom.may_contain(farmhash::fingerprint32(key)) {
|
||||
return true;
|
||||
@@ -441,7 +446,8 @@ impl LsmStorageInner {
|
||||
let table = snapshot.sstables[table].clone();
|
||||
if keep_table(key, &table) {
|
||||
l0_iters.push(Box::new(SsTableIterator::create_and_seek_to_key(
|
||||
table, key,
|
||||
table,
|
||||
KeySlice::from_slice(key),
|
||||
)?));
|
||||
}
|
||||
}
|
||||
@@ -455,13 +461,14 @@ impl LsmStorageInner {
|
||||
level_ssts.push(table);
|
||||
}
|
||||
}
|
||||
let level_iter = SstConcatIterator::create_and_seek_to_key(level_ssts, key)?;
|
||||
let level_iter =
|
||||
SstConcatIterator::create_and_seek_to_key(level_ssts, KeySlice::from_slice(key))?;
|
||||
level_iters.push(Box::new(level_iter));
|
||||
}
|
||||
|
||||
let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?;
|
||||
|
||||
if iter.is_valid() && iter.key() == key && !iter.value().is_empty() {
|
||||
if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() {
|
||||
return Ok(Some(Bytes::copy_from_slice(iter.value())));
|
||||
}
|
||||
Ok(None)
|
||||
@@ -653,12 +660,22 @@ impl LsmStorageInner {
|
||||
let mut table_iters = Vec::with_capacity(snapshot.l0_sstables.len());
|
||||
for table_id in snapshot.l0_sstables.iter() {
|
||||
let table = snapshot.sstables[table_id].clone();
|
||||
if range_overlap(lower, upper, table.first_key(), table.last_key()) {
|
||||
if range_overlap(
|
||||
lower,
|
||||
upper,
|
||||
table.first_key().as_key_slice(),
|
||||
table.last_key().as_key_slice(),
|
||||
) {
|
||||
let iter = match lower {
|
||||
Bound::Included(key) => SsTableIterator::create_and_seek_to_key(table, key)?,
|
||||
Bound::Included(key) => {
|
||||
SsTableIterator::create_and_seek_to_key(table, KeySlice::from_slice(key))?
|
||||
}
|
||||
Bound::Excluded(key) => {
|
||||
let mut iter = SsTableIterator::create_and_seek_to_key(table, key)?;
|
||||
if iter.is_valid() && iter.key() == key {
|
||||
let mut iter = SsTableIterator::create_and_seek_to_key(
|
||||
table,
|
||||
KeySlice::from_slice(key),
|
||||
)?;
|
||||
if iter.is_valid() && iter.key().raw_ref() == key {
|
||||
iter.next()?;
|
||||
}
|
||||
iter
|
||||
@@ -676,16 +693,27 @@ impl LsmStorageInner {
|
||||
let mut level_ssts = Vec::with_capacity(level_sst_ids.len());
|
||||
for table in level_sst_ids {
|
||||
let table = snapshot.sstables[table].clone();
|
||||
if range_overlap(lower, upper, table.first_key(), table.last_key()) {
|
||||
if range_overlap(
|
||||
lower,
|
||||
upper,
|
||||
table.first_key().as_key_slice(),
|
||||
table.last_key().as_key_slice(),
|
||||
) {
|
||||
level_ssts.push(table);
|
||||
}
|
||||
}
|
||||
|
||||
let level_iter = match lower {
|
||||
Bound::Included(key) => SstConcatIterator::create_and_seek_to_key(level_ssts, key)?,
|
||||
Bound::Included(key) => SstConcatIterator::create_and_seek_to_key(
|
||||
level_ssts,
|
||||
KeySlice::from_slice(key),
|
||||
)?,
|
||||
Bound::Excluded(key) => {
|
||||
let mut iter = SstConcatIterator::create_and_seek_to_key(level_ssts, key)?;
|
||||
if iter.is_valid() && iter.key() == key {
|
||||
let mut iter = SstConcatIterator::create_and_seek_to_key(
|
||||
level_ssts,
|
||||
KeySlice::from_slice(key),
|
||||
)?;
|
||||
if iter.is_valid() && iter.key().raw_ref() == key {
|
||||
iter.next()?;
|
||||
}
|
||||
iter
|
||||
|
||||
@@ -10,6 +10,7 @@ use crossbeam_skiplist::SkipMap;
|
||||
use ouroboros::self_referencing;
|
||||
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
use crate::table::SsTableBuilder;
|
||||
use crate::wal::Wal;
|
||||
|
||||
@@ -110,7 +111,7 @@ impl MemTable {
|
||||
/// Flush the mem-table to SSTable. Implement in week 1 day 6.
|
||||
pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> {
|
||||
for entry in self.map.iter() {
|
||||
builder.add(&entry.key()[..], &entry.value()[..]);
|
||||
builder.add(KeySlice::from_slice(&entry.key()[..]), &entry.value()[..]);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -158,12 +159,14 @@ impl MemTableIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for MemTableIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
&self.borrow_item().1[..]
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
&self.borrow_item().0[..]
|
||||
fn key(&self) -> KeySlice {
|
||||
KeySlice::from_slice(&self.borrow_item().0[..])
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
|
||||
@@ -8,10 +8,11 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
pub use builder::SsTableBuilder;
|
||||
use bytes::{Buf, BufMut, Bytes};
|
||||
use bytes::{Buf, BufMut};
|
||||
pub use iterator::SsTableIterator;
|
||||
|
||||
use crate::block::Block;
|
||||
use crate::key::{KeyBytes, KeySlice};
|
||||
use crate::lsm_storage::BlockCache;
|
||||
|
||||
use self::bloom::Bloom;
|
||||
@@ -21,9 +22,9 @@ pub struct BlockMeta {
|
||||
/// Offset of this data block.
|
||||
pub offset: usize,
|
||||
/// The first key of the data block.
|
||||
pub first_key: Bytes,
|
||||
pub first_key: KeyBytes,
|
||||
/// The last key of the data block.
|
||||
pub last_key: Bytes,
|
||||
pub last_key: KeyBytes,
|
||||
}
|
||||
|
||||
impl BlockMeta {
|
||||
@@ -49,9 +50,9 @@ impl BlockMeta {
|
||||
for meta in block_meta {
|
||||
buf.put_u32(meta.offset as u32);
|
||||
buf.put_u16(meta.first_key.len() as u16);
|
||||
buf.put_slice(&meta.first_key);
|
||||
buf.put_slice(meta.first_key.raw_ref());
|
||||
buf.put_u16(meta.last_key.len() as u16);
|
||||
buf.put_slice(&meta.last_key);
|
||||
buf.put_slice(meta.last_key.raw_ref());
|
||||
}
|
||||
assert_eq!(estimated_size, buf.len() - original_len);
|
||||
}
|
||||
@@ -62,9 +63,9 @@ impl BlockMeta {
|
||||
while buf.has_remaining() {
|
||||
let offset = buf.get_u32() as usize;
|
||||
let first_key_len = buf.get_u16() as usize;
|
||||
let first_key = buf.copy_to_bytes(first_key_len);
|
||||
let last_key_len = buf.get_u16() as usize;
|
||||
let last_key = buf.copy_to_bytes(last_key_len);
|
||||
let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len));
|
||||
let last_key_len: usize = buf.get_u16() as usize;
|
||||
let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len));
|
||||
block_meta.push(BlockMeta {
|
||||
offset,
|
||||
first_key,
|
||||
@@ -120,8 +121,8 @@ pub struct SsTable {
|
||||
pub(crate) block_meta_offset: usize,
|
||||
id: usize,
|
||||
block_cache: Option<Arc<BlockCache>>,
|
||||
first_key: Bytes,
|
||||
last_key: Bytes,
|
||||
first_key: KeyBytes,
|
||||
last_key: KeyBytes,
|
||||
pub(crate) bloom: Option<Bloom>,
|
||||
}
|
||||
impl SsTable {
|
||||
@@ -154,7 +155,12 @@ impl SsTable {
|
||||
}
|
||||
|
||||
/// Create a mock SST with only first key + last key metadata
|
||||
pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self {
|
||||
pub fn create_meta_only(
|
||||
id: usize,
|
||||
file_size: u64,
|
||||
first_key: KeyBytes,
|
||||
last_key: KeyBytes,
|
||||
) -> Self {
|
||||
Self {
|
||||
file: FileObject(None, file_size),
|
||||
block_meta: vec![],
|
||||
@@ -193,9 +199,9 @@ impl SsTable {
|
||||
}
|
||||
|
||||
/// Find the block that may contain `key`.
|
||||
pub fn find_block_idx(&self, key: &[u8]) -> usize {
|
||||
pub fn find_block_idx(&self, key: KeySlice) -> usize {
|
||||
self.block_meta
|
||||
.partition_point(|meta| meta.first_key <= key)
|
||||
.partition_point(|meta| meta.first_key.as_key_slice() <= key)
|
||||
.saturating_sub(1)
|
||||
}
|
||||
|
||||
@@ -204,11 +210,11 @@ impl SsTable {
|
||||
self.block_meta.len()
|
||||
}
|
||||
|
||||
pub fn first_key(&self) -> &Bytes {
|
||||
pub fn first_key(&self) -> &KeyBytes {
|
||||
&self.first_key
|
||||
}
|
||||
|
||||
pub fn last_key(&self) -> &Bytes {
|
||||
pub fn last_key(&self) -> &KeyBytes {
|
||||
&self.last_key
|
||||
}
|
||||
|
||||
|
||||
@@ -7,13 +7,14 @@ use bytes::BufMut;
|
||||
use super::bloom::Bloom;
|
||||
use super::{BlockMeta, FileObject, SsTable};
|
||||
use crate::block::BlockBuilder;
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
use crate::lsm_storage::BlockCache;
|
||||
|
||||
/// Builds an SSTable from key-value pairs.
|
||||
pub struct SsTableBuilder {
|
||||
builder: BlockBuilder,
|
||||
first_key: Vec<u8>,
|
||||
last_key: Vec<u8>,
|
||||
first_key: KeyVec,
|
||||
last_key: KeyVec,
|
||||
data: Vec<u8>,
|
||||
pub(crate) meta: Vec<BlockMeta>,
|
||||
block_size: usize,
|
||||
@@ -26,8 +27,8 @@ impl SsTableBuilder {
|
||||
Self {
|
||||
data: Vec::new(),
|
||||
meta: Vec::new(),
|
||||
first_key: Vec::new(),
|
||||
last_key: Vec::new(),
|
||||
first_key: KeyVec::new(),
|
||||
last_key: KeyVec::new(),
|
||||
block_size,
|
||||
builder: BlockBuilder::new(block_size),
|
||||
key_hashes: Vec::new(),
|
||||
@@ -35,17 +36,15 @@ impl SsTableBuilder {
|
||||
}
|
||||
|
||||
/// Adds a key-value pair to SSTable
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) {
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
|
||||
if self.first_key.is_empty() {
|
||||
self.first_key.clear();
|
||||
self.first_key.extend(key);
|
||||
self.first_key.set_from_slice(key);
|
||||
}
|
||||
|
||||
self.key_hashes.push(farmhash::fingerprint32(key));
|
||||
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
|
||||
|
||||
if self.builder.add(key, value) {
|
||||
self.last_key.clear();
|
||||
self.last_key.extend(key);
|
||||
self.last_key.set_from_slice(key);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -54,10 +53,8 @@ impl SsTableBuilder {
|
||||
|
||||
// add the key-value pair to the next block
|
||||
assert!(self.builder.add(key, value));
|
||||
self.first_key.clear();
|
||||
self.first_key.extend(key);
|
||||
self.last_key.clear();
|
||||
self.last_key.extend(key);
|
||||
self.first_key.set_from_slice(key);
|
||||
self.last_key.set_from_slice(key);
|
||||
}
|
||||
|
||||
/// Get the estimated size of the SSTable.
|
||||
@@ -70,8 +67,8 @@ impl SsTableBuilder {
|
||||
let encoded_block = builder.build().encode();
|
||||
self.meta.push(BlockMeta {
|
||||
offset: self.data.len(),
|
||||
first_key: std::mem::take(&mut self.first_key).into(),
|
||||
last_key: std::mem::take(&mut self.last_key).into(),
|
||||
first_key: std::mem::take(&mut self.first_key).into_key_bytes(),
|
||||
last_key: std::mem::take(&mut self.last_key).into_key_bytes(),
|
||||
});
|
||||
self.data.extend(encoded_block);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use anyhow::Result;
|
||||
use super::SsTable;
|
||||
use crate::block::BlockIterator;
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
|
||||
/// An iterator over the contents of an SSTable.
|
||||
pub struct SsTableIterator {
|
||||
@@ -40,7 +41,7 @@ impl SsTableIterator {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn seek_to_key_inner(table: &Arc<SsTable>, key: &[u8]) -> Result<(usize, BlockIterator)> {
|
||||
fn seek_to_key_inner(table: &Arc<SsTable>, key: KeySlice) -> Result<(usize, BlockIterator)> {
|
||||
let mut blk_idx = table.find_block_idx(key);
|
||||
let mut blk_iter =
|
||||
BlockIterator::create_and_seek_to_key(table.read_block_cached(blk_idx)?, key);
|
||||
@@ -55,7 +56,7 @@ impl SsTableIterator {
|
||||
}
|
||||
|
||||
/// Create a new iterator and seek to the first key-value pair which >= `key`.
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?;
|
||||
let iter = Self {
|
||||
blk_iter,
|
||||
@@ -66,7 +67,7 @@ impl SsTableIterator {
|
||||
}
|
||||
|
||||
/// Seek to the first key-value pair which >= `key`.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?;
|
||||
self.blk_iter = blk_iter;
|
||||
self.blk_idx = blk_idx;
|
||||
@@ -75,11 +76,13 @@ impl SsTableIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for SsTableIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
self.blk_iter.value()
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> KeySlice {
|
||||
self.blk_iter.key()
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use bytes::Bytes;
|
||||
|
||||
use crate::{
|
||||
iterators::StorageIterator,
|
||||
key::KeySlice,
|
||||
lsm_storage::{BlockCache, LsmStorageInner},
|
||||
table::{SsTable, SsTableBuilder},
|
||||
};
|
||||
@@ -35,6 +36,8 @@ impl MockIterator {
|
||||
}
|
||||
|
||||
impl StorageIterator for MockIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
if self.index < self.data.len() {
|
||||
self.index += 1;
|
||||
@@ -47,13 +50,13 @@ impl StorageIterator for MockIterator {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
fn key(&self) -> KeySlice {
|
||||
if let Some(error_when) = self.error_when {
|
||||
if self.index >= error_when {
|
||||
panic!("invalid access after next returns an error!");
|
||||
}
|
||||
}
|
||||
self.data[self.index].0.as_ref()
|
||||
KeySlice::for_testing_from_slice_no_ts(self.data[self.index].0.as_ref())
|
||||
}
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
@@ -79,7 +82,35 @@ pub fn as_bytes(x: &[u8]) -> Bytes {
|
||||
Bytes::copy_from_slice(x)
|
||||
}
|
||||
|
||||
pub fn check_iter_result(iter: &mut impl StorageIterator, expected: Vec<(Bytes, Bytes)>) {
|
||||
pub fn check_iter_result_by_key<I>(iter: &mut I, expected: Vec<(Bytes, Bytes)>)
|
||||
where
|
||||
I: for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
|
||||
{
|
||||
for (k, v) in expected {
|
||||
assert!(iter.is_valid());
|
||||
assert_eq!(
|
||||
k,
|
||||
iter.key().for_testing_key_ref(),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
k,
|
||||
as_bytes(iter.key().for_testing_key_ref()),
|
||||
);
|
||||
assert_eq!(
|
||||
v,
|
||||
iter.value(),
|
||||
"expected value: {:?}, actual value: {:?}",
|
||||
v,
|
||||
as_bytes(iter.value()),
|
||||
);
|
||||
iter.next().unwrap();
|
||||
}
|
||||
assert!(!iter.is_valid());
|
||||
}
|
||||
|
||||
pub fn check_lsm_iter_result_by_key<I>(iter: &mut I, expected: Vec<(Bytes, Bytes)>)
|
||||
where
|
||||
I: for<'a> StorageIterator<KeyType<'a> = &'a [u8]>,
|
||||
{
|
||||
for (k, v) in expected {
|
||||
assert!(iter.is_valid());
|
||||
assert_eq!(
|
||||
@@ -119,7 +150,7 @@ pub fn generate_sst(
|
||||
) -> SsTable {
|
||||
let mut builder = SsTableBuilder::new(128);
|
||||
for (key, value) in data {
|
||||
builder.add(&key[..], &value[..]);
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]);
|
||||
}
|
||||
builder.build(id, block_cache, path.as_ref()).unwrap()
|
||||
}
|
||||
|
||||
@@ -8,9 +8,10 @@ use crate::{
|
||||
lsm_iterator::FusedIterator,
|
||||
lsm_storage::{LsmStorageInner, LsmStorageOptions},
|
||||
mem_table::MemTable,
|
||||
tests::harness::check_lsm_iter_result_by_key,
|
||||
};
|
||||
|
||||
use super::harness::{check_iter_result, expect_iter_error, MockIterator};
|
||||
use super::harness::{check_iter_result_by_key, expect_iter_error, MockIterator};
|
||||
|
||||
#[test]
|
||||
fn test_task1_memtable_iter() {
|
||||
@@ -22,15 +23,15 @@ fn test_task1_memtable_iter() {
|
||||
|
||||
{
|
||||
let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded);
|
||||
assert_eq!(iter.key(), b"key1");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key1");
|
||||
assert_eq!(iter.value(), b"value1");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
assert_eq!(iter.key(), b"key2");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key2");
|
||||
assert_eq!(iter.value(), b"value2");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
assert_eq!(iter.key(), b"key3");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key3");
|
||||
assert_eq!(iter.value(), b"value3");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
@@ -39,11 +40,11 @@ fn test_task1_memtable_iter() {
|
||||
|
||||
{
|
||||
let mut iter = memtable.scan(Bound::Included(b"key1"), Bound::Included(b"key2"));
|
||||
assert_eq!(iter.key(), b"key1");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key1");
|
||||
assert_eq!(iter.value(), b"value1");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
assert_eq!(iter.key(), b"key2");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key2");
|
||||
assert_eq!(iter.value(), b"value2");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
@@ -52,7 +53,7 @@ fn test_task1_memtable_iter() {
|
||||
|
||||
{
|
||||
let mut iter = memtable.scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3"));
|
||||
assert_eq!(iter.key(), b"key2");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"key2");
|
||||
assert_eq!(iter.value(), b"value2");
|
||||
assert!(iter.is_valid());
|
||||
iter.next().unwrap();
|
||||
@@ -104,7 +105,7 @@ fn test_task2_merge_1() {
|
||||
Box::new(i3.clone()),
|
||||
]);
|
||||
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -117,7 +118,7 @@ fn test_task2_merge_1() {
|
||||
|
||||
let mut iter = MergeIterator::create(vec![Box::new(i3), Box::new(i1), Box::new(i2)]);
|
||||
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -169,7 +170,7 @@ fn test_task2_merge_2() {
|
||||
Box::new(i3.clone()),
|
||||
Box::new(i4.clone()),
|
||||
]);
|
||||
check_iter_result(&mut iter, result.clone());
|
||||
check_iter_result_by_key(&mut iter, result.clone());
|
||||
|
||||
let mut iter = MergeIterator::create(vec![
|
||||
Box::new(i2.clone()),
|
||||
@@ -177,17 +178,17 @@ fn test_task2_merge_2() {
|
||||
Box::new(i3.clone()),
|
||||
Box::new(i1.clone()),
|
||||
]);
|
||||
check_iter_result(&mut iter, result.clone());
|
||||
check_iter_result_by_key(&mut iter, result.clone());
|
||||
|
||||
let mut iter =
|
||||
MergeIterator::create(vec![Box::new(i4), Box::new(i3), Box::new(i2), Box::new(i1)]);
|
||||
check_iter_result(&mut iter, result);
|
||||
check_iter_result_by_key(&mut iter, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task2_merge_empty() {
|
||||
let mut iter = MergeIterator::<MockIterator>::create(vec![]);
|
||||
check_iter_result(&mut iter, vec![]);
|
||||
check_iter_result_by_key(&mut iter, vec![]);
|
||||
|
||||
let i1 = MockIterator::new(vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -196,7 +197,7 @@ fn test_task2_merge_empty() {
|
||||
]);
|
||||
let i2 = MockIterator::new(vec![]);
|
||||
let mut iter = MergeIterator::<MockIterator>::create(vec![Box::new(i1), Box::new(i2)]);
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -209,7 +210,7 @@ fn test_task2_merge_empty() {
|
||||
#[test]
|
||||
fn test_task2_merge_error() {
|
||||
let mut iter = MergeIterator::<MockIterator>::create(vec![]);
|
||||
check_iter_result(&mut iter, vec![]);
|
||||
check_iter_result_by_key(&mut iter, vec![]);
|
||||
|
||||
let i1 = MockIterator::new(vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -276,7 +277,7 @@ fn test_task4_integration() {
|
||||
storage.put(b"3", b"233333").unwrap();
|
||||
{
|
||||
let mut iter = storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap();
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from_static(b"1"), Bytes::from_static(b"233333")),
|
||||
@@ -294,7 +295,7 @@ fn test_task4_integration() {
|
||||
let mut iter = storage
|
||||
.scan(Bound::Included(b"2"), Bound::Included(b"3"))
|
||||
.unwrap();
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![(Bytes::from_static(b"3"), Bytes::from_static(b"233333"))],
|
||||
);
|
||||
|
||||
@@ -2,39 +2,48 @@ use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::block::{Block, BlockBuilder, BlockIterator};
|
||||
use crate::{
|
||||
block::{Block, BlockBuilder, BlockIterator},
|
||||
key::{KeySlice, KeyVec},
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_block_build_single_key() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"233", b"233333"));
|
||||
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"233"), b"233333"));
|
||||
builder.build();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_build_full() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"11", b"11"));
|
||||
assert!(!builder.add(b"22", b"22"));
|
||||
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"11"));
|
||||
assert!(!builder.add(KeySlice::for_testing_from_slice_no_ts(b"22"), b"22"));
|
||||
builder.build();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_build_large_1() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"11", &b"1".repeat(100)));
|
||||
assert!(builder.add(
|
||||
KeySlice::for_testing_from_slice_no_ts(b"11"),
|
||||
&b"1".repeat(100)
|
||||
));
|
||||
builder.build();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_build_large_2() {
|
||||
let mut builder = BlockBuilder::new(16);
|
||||
assert!(builder.add(b"11", b"1"));
|
||||
assert!(!builder.add(b"11", &b"1".repeat(100)));
|
||||
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"1"));
|
||||
assert!(!builder.add(
|
||||
KeySlice::for_testing_from_slice_no_ts(b"11"),
|
||||
&b"1".repeat(100)
|
||||
));
|
||||
}
|
||||
|
||||
fn key_of(idx: usize) -> Vec<u8> {
|
||||
format!("key_{:03}", idx * 5).into_bytes()
|
||||
fn key_of(idx: usize) -> KeyVec {
|
||||
KeyVec::for_testing_from_vec_no_ts(format!("key_{:03}", idx * 5).into_bytes())
|
||||
}
|
||||
|
||||
fn value_of(idx: usize) -> Vec<u8> {
|
||||
@@ -50,7 +59,7 @@ fn generate_block() -> Block {
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
assert!(builder.add(&key[..], &value[..]));
|
||||
assert!(builder.add(key.as_key_slice(), &value[..]));
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
@@ -88,11 +97,11 @@ fn test_block_iterator() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
key.for_testing_key_ref(),
|
||||
key_of(i).for_testing_key_ref(),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
as_bytes(key_of(i).for_testing_key_ref()),
|
||||
as_bytes(key.for_testing_key_ref())
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
@@ -110,17 +119,17 @@ fn test_block_iterator() {
|
||||
#[test]
|
||||
fn test_block_seek_key() {
|
||||
let block = Arc::new(generate_block());
|
||||
let mut iter = BlockIterator::create_and_seek_to_key(block, &key_of(0));
|
||||
let mut iter = BlockIterator::create_and_seek_to_key(block, key_of(0).as_key_slice());
|
||||
for offset in 1..=5 {
|
||||
for i in 0..num_of_keys() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
key.for_testing_key_ref(),
|
||||
key_of(i).for_testing_key_ref(),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
as_bytes(key_of(i).for_testing_key_ref()),
|
||||
as_bytes(key.for_testing_key_ref())
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
@@ -129,8 +138,10 @@ fn test_block_seek_key() {
|
||||
as_bytes(&value_of(i)),
|
||||
as_bytes(value)
|
||||
);
|
||||
iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes());
|
||||
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(
|
||||
&format!("key_{:03}", i * 5 + offset).into_bytes(),
|
||||
));
|
||||
}
|
||||
iter.seek_to_key(b"k");
|
||||
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(b"k"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,13 @@ use bytes::Bytes;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};
|
||||
|
||||
#[test]
|
||||
fn test_sst_build_single_key() {
|
||||
let mut builder = SsTableBuilder::new(16);
|
||||
builder.add(b"233", b"233333");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"233"), b"233333");
|
||||
let dir = tempdir().unwrap();
|
||||
builder.build_for_test(dir.path().join("1.sst")).unwrap();
|
||||
}
|
||||
@@ -17,19 +18,19 @@ fn test_sst_build_single_key() {
|
||||
#[test]
|
||||
fn test_sst_build_two_blocks() {
|
||||
let mut builder = SsTableBuilder::new(16);
|
||||
builder.add(b"11", b"11");
|
||||
builder.add(b"22", b"22");
|
||||
builder.add(b"33", b"11");
|
||||
builder.add(b"44", b"22");
|
||||
builder.add(b"55", b"11");
|
||||
builder.add(b"66", b"22");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"11");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"22"), b"22");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"33"), b"11");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"44"), b"22");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"55"), b"11");
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(b"66"), b"22");
|
||||
assert!(builder.meta.len() >= 2);
|
||||
let dir = tempdir().unwrap();
|
||||
builder.build_for_test(dir.path().join("1.sst")).unwrap();
|
||||
}
|
||||
|
||||
fn key_of(idx: usize) -> Vec<u8> {
|
||||
format!("key_{:03}", idx * 5).into_bytes()
|
||||
fn key_of(idx: usize) -> KeyVec {
|
||||
KeyVec::for_testing_from_vec_no_ts(format!("key_{:03}", idx * 5).into_bytes())
|
||||
}
|
||||
|
||||
fn value_of(idx: usize) -> Vec<u8> {
|
||||
@@ -45,7 +46,7 @@ fn generate_sst() -> (TempDir, SsTable) {
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
builder.add(&key[..], &value[..]);
|
||||
builder.add(key.as_key_slice(), &value[..]);
|
||||
}
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("1.sst");
|
||||
@@ -63,8 +64,14 @@ fn test_sst_decode() {
|
||||
let meta = sst.block_meta.clone();
|
||||
let new_sst = SsTable::open_for_test(sst.file).unwrap();
|
||||
assert_eq!(new_sst.block_meta, meta);
|
||||
assert_eq!(new_sst.first_key(), &key_of(0));
|
||||
assert_eq!(new_sst.last_key(), &key_of(num_of_keys() - 1));
|
||||
assert_eq!(
|
||||
new_sst.first_key().for_testing_key_ref(),
|
||||
key_of(0).for_testing_key_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
new_sst.last_key().for_testing_key_ref(),
|
||||
key_of(num_of_keys() - 1).for_testing_key_ref()
|
||||
);
|
||||
}
|
||||
|
||||
fn as_bytes(x: &[u8]) -> Bytes {
|
||||
@@ -81,11 +88,11 @@ fn test_sst_iterator() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
key.for_testing_key_ref(),
|
||||
key_of(i).for_testing_key_ref(),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
as_bytes(key_of(i).for_testing_key_ref()),
|
||||
as_bytes(key.for_testing_key_ref())
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
@@ -104,17 +111,17 @@ fn test_sst_iterator() {
|
||||
fn test_sst_seek_key() {
|
||||
let (_dir, sst) = generate_sst();
|
||||
let sst = Arc::new(sst);
|
||||
let mut iter = SsTableIterator::create_and_seek_to_key(sst, &key_of(0)).unwrap();
|
||||
let mut iter = SsTableIterator::create_and_seek_to_key(sst, key_of(0).as_key_slice()).unwrap();
|
||||
for offset in 1..=5 {
|
||||
for i in 0..num_of_keys() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
key.for_testing_key_ref(),
|
||||
key_of(i).for_testing_key_ref(),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
as_bytes(key_of(i).for_testing_key_ref()),
|
||||
as_bytes(key.for_testing_key_ref())
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
@@ -123,9 +130,12 @@ fn test_sst_seek_key() {
|
||||
as_bytes(&value_of(i)),
|
||||
as_bytes(value)
|
||||
);
|
||||
iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes())
|
||||
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(
|
||||
&format!("key_{:03}", i * 5 + offset).into_bytes(),
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
iter.seek_to_key(b"k").unwrap();
|
||||
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(b"k"))
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::ops::Bound;
|
||||
|
||||
use self::harness::generate_sst;
|
||||
use self::harness::{check_iter_result, MockIterator};
|
||||
use self::harness::{check_iter_result_by_key, MockIterator};
|
||||
use self::harness::{check_lsm_iter_result_by_key, generate_sst};
|
||||
use bytes::Bytes;
|
||||
use tempfile::tempdir;
|
||||
|
||||
@@ -25,7 +25,7 @@ fn test_task1_merge_1() {
|
||||
(Bytes::from("d"), Bytes::from("4.2")),
|
||||
]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -50,7 +50,7 @@ fn test_task1_merge_2() {
|
||||
(Bytes::from("d"), Bytes::from("4.2")),
|
||||
]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.2")),
|
||||
@@ -74,7 +74,7 @@ fn test_task1_merge_3() {
|
||||
(Bytes::from("d"), Bytes::from("4.2")),
|
||||
]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("a"), Bytes::from("1.1")),
|
||||
@@ -94,7 +94,7 @@ fn test_task1_merge_4() {
|
||||
(Bytes::from("d"), Bytes::from("4.2")),
|
||||
]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("b"), Bytes::from("2.2")),
|
||||
@@ -109,7 +109,7 @@ fn test_task1_merge_4() {
|
||||
(Bytes::from("d"), Bytes::from("4.2")),
|
||||
]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from("b"), Bytes::from("2.2")),
|
||||
@@ -124,7 +124,7 @@ fn test_task1_merge_5() {
|
||||
let i2 = MockIterator::new(vec![]);
|
||||
let i1 = MockIterator::new(vec![]);
|
||||
let mut iter = TwoMergeIterator::create(i1, i2).unwrap();
|
||||
check_iter_result(&mut iter, vec![])
|
||||
check_iter_result_by_key(&mut iter, vec![])
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -164,7 +164,7 @@ fn test_task2_storage_scan() {
|
||||
snapshot.sstables.insert(sst1.sst_id(), sst1.into());
|
||||
*state = snapshot.into();
|
||||
}
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
|
||||
vec![
|
||||
(Bytes::from("0"), Bytes::from("2333333")),
|
||||
@@ -173,13 +173,13 @@ fn test_task2_storage_scan() {
|
||||
(Bytes::from("3"), Bytes::from("23333")),
|
||||
],
|
||||
);
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage
|
||||
.scan(Bound::Included(b"1"), Bound::Included(b"2"))
|
||||
.unwrap(),
|
||||
vec![(Bytes::from("2"), Bytes::from("2333"))],
|
||||
);
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage
|
||||
.scan(Bound::Excluded(b"1"), Bound::Excluded(b"3"))
|
||||
.unwrap(),
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::{ops::Bound, time::Duration};
|
||||
use bytes::Bytes;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use self::harness::{check_iter_result, sync};
|
||||
use self::harness::{check_lsm_iter_result_by_key, sync};
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
@@ -41,7 +41,7 @@ fn test_task1_storage_scan() {
|
||||
assert_eq!(state.imm_memtables.len(), 2);
|
||||
}
|
||||
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
|
||||
vec![
|
||||
(Bytes::from("0"), Bytes::from("2333333")),
|
||||
@@ -50,13 +50,13 @@ fn test_task1_storage_scan() {
|
||||
(Bytes::from("3"), Bytes::from("23333")),
|
||||
],
|
||||
);
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage
|
||||
.scan(Bound::Included(b"1"), Bound::Included(b"2"))
|
||||
.unwrap(),
|
||||
vec![(Bytes::from("2"), Bytes::from("2333"))],
|
||||
);
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage
|
||||
.scan(Bound::Excluded(b"1"), Bound::Excluded(b"3"))
|
||||
.unwrap(),
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder};
|
||||
use crate::{
|
||||
key::KeySlice,
|
||||
table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder},
|
||||
};
|
||||
|
||||
fn key_of(idx: usize) -> Vec<u8> {
|
||||
format!("key_{:010}", idx * 5).into_bytes()
|
||||
@@ -49,7 +52,7 @@ fn test_task2_sst_decode() {
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
builder.add(&key[..], &value[..]);
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]);
|
||||
}
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("1.sst");
|
||||
@@ -67,7 +70,7 @@ fn test_task3_block_key_compression() {
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
builder.add(&key[..], &value[..]);
|
||||
builder.add(KeySlice::for_testing_from_slice_no_ts(&key[..]), &value[..]);
|
||||
}
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("1.sst");
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
use std::{ops::Bound, path::Path, sync::Arc};
|
||||
|
||||
use self::harness::{check_iter_result_by_key, check_lsm_iter_result_by_key, sync};
|
||||
use bytes::Bytes;
|
||||
use tempfile::tempdir;
|
||||
use week2_day1::harness::sync;
|
||||
|
||||
use self::harness::check_iter_result;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
iterators::{
|
||||
concat_iterator::SstConcatIterator, merge_iterator::MergeIterator, StorageIterator,
|
||||
},
|
||||
key::KeySlice,
|
||||
lsm_storage::{LsmStorageInner, LsmStorageOptions, LsmStorageState},
|
||||
table::{SsTable, SsTableBuilder, SsTableIterator},
|
||||
};
|
||||
@@ -51,7 +50,7 @@ fn test_task1_full_compaction() {
|
||||
sync(&storage);
|
||||
assert_eq!(storage.state.read().l0_sstables.len(), 3);
|
||||
let mut iter = construct_merge_iterator_over_storage(&storage.state.read());
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from_static(b"0"), Bytes::from_static(b"")),
|
||||
@@ -62,7 +61,7 @@ fn test_task1_full_compaction() {
|
||||
storage.force_full_compaction().unwrap();
|
||||
assert!(storage.state.read().l0_sstables.is_empty());
|
||||
let mut iter = construct_merge_iterator_over_storage(&storage.state.read());
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![(Bytes::from_static(b"1"), Bytes::from_static(b"v2"))],
|
||||
);
|
||||
@@ -72,7 +71,7 @@ fn test_task1_full_compaction() {
|
||||
storage.delete(b"1").unwrap();
|
||||
sync(&storage);
|
||||
let mut iter = construct_merge_iterator_over_storage(&storage.state.read());
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from_static(b"0"), Bytes::from_static(b"v3")),
|
||||
@@ -83,7 +82,7 @@ fn test_task1_full_compaction() {
|
||||
storage.force_full_compaction().unwrap();
|
||||
assert!(storage.state.read().l0_sstables.is_empty());
|
||||
let mut iter = construct_merge_iterator_over_storage(&storage.state.read());
|
||||
check_iter_result(
|
||||
check_iter_result_by_key(
|
||||
&mut iter,
|
||||
vec![
|
||||
(Bytes::from_static(b"0"), Bytes::from_static(b"v3")),
|
||||
@@ -101,7 +100,10 @@ fn generate_concat_sst(
|
||||
let mut builder = SsTableBuilder::new(128);
|
||||
for idx in start_key..end_key {
|
||||
let key = format!("{:05}", idx);
|
||||
builder.add(key.as_bytes(), b"test");
|
||||
builder.add(
|
||||
KeySlice::for_testing_from_slice_no_ts(key.as_bytes()),
|
||||
b"test",
|
||||
);
|
||||
}
|
||||
let path = dir.as_ref().join(format!("{id}.sst"));
|
||||
builder.build_for_test(path).unwrap()
|
||||
@@ -122,22 +124,25 @@ fn test_task2_concat_iterator() {
|
||||
for key in 0..120 {
|
||||
let iter = SstConcatIterator::create_and_seek_to_key(
|
||||
sstables.clone(),
|
||||
format!("{:05}", key).as_bytes(),
|
||||
KeySlice::for_testing_from_slice_no_ts(format!("{:05}", key).as_bytes()),
|
||||
)
|
||||
.unwrap();
|
||||
if key < 10 {
|
||||
assert!(iter.is_valid());
|
||||
assert_eq!(iter.key(), b"00010");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"00010");
|
||||
} else if key >= 110 {
|
||||
assert!(!iter.is_valid());
|
||||
} else {
|
||||
assert!(iter.is_valid());
|
||||
assert_eq!(iter.key(), format!("{:05}", key).as_bytes());
|
||||
assert_eq!(
|
||||
iter.key().for_testing_key_ref(),
|
||||
format!("{:05}", key).as_bytes()
|
||||
);
|
||||
}
|
||||
}
|
||||
let iter = SstConcatIterator::create_and_seek_to_first(sstables.clone()).unwrap();
|
||||
assert!(iter.is_valid());
|
||||
assert_eq!(iter.key(), b"00010");
|
||||
assert_eq!(iter.key().for_testing_key_ref(), b"00010");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -169,7 +174,7 @@ fn test_task3_integration() {
|
||||
assert!(storage.state.read().l0_sstables.is_empty());
|
||||
assert!(!storage.state.read().levels[0].1.is_empty());
|
||||
|
||||
check_iter_result(
|
||||
check_lsm_iter_result_by_key(
|
||||
&mut storage.scan(Bound::Unbounded, Bound::Unbounded).unwrap(),
|
||||
vec![
|
||||
(Bytes::from("0"), Bytes::from("2333333")),
|
||||
|
||||
Reference in New Issue
Block a user