add key abstraction and prepare for MVCC (#28)

* add key abstraction and prepare for MVCC

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* a little bit type exercise

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* refactor tests

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* fix clippy warnings

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* refactor starter code

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* final touch docs

Signed-off-by: Alex Chi <iskyzh@gmail.com>

---------

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z
2024-01-25 10:59:08 +08:00
committed by GitHub
parent 2fefe3e505
commit a3a92359e1
42 changed files with 824 additions and 281 deletions

View File

@@ -4,12 +4,13 @@ use wrapper::mini_lsm_wrapper;
use std::collections::HashMap;
use std::sync::Arc;
use bytes::{Buf, BufMut, Bytes, BytesMut};
use bytes::{Buf, BufMut, BytesMut};
use clap::Parser;
use mini_lsm_wrapper::compact::{
LeveledCompactionController, LeveledCompactionOptions, SimpleLeveledCompactionController,
SimpleLeveledCompactionOptions, TieredCompactionController, TieredCompactionOptions,
};
use mini_lsm_wrapper::key::KeyBytes;
use mini_lsm_wrapper::lsm_storage::LsmStorageState;
use mini_lsm_wrapper::mem_table::MemTable;
use mini_lsm_wrapper::table::SsTable;
@@ -135,11 +136,11 @@ impl MockStorage {
"invalid file arrangement in L{}: id={}, range={:x}..={:x}; id={}, range={:x}..={:x}",
level,
this_file.sst_id(),
this_file.first_key().clone().get_u64(),
this_file.last_key().clone().get_u64(),
this_file.first_key().for_testing_key_ref().get_u64(),
this_file.last_key().for_testing_key_ref().get_u64(),
next_file.sst_id(),
next_file.first_key().clone().get_u64(),
next_file.last_key().clone().get_u64()
next_file.first_key().for_testing_key_ref().get_u64(),
next_file.last_key().for_testing_key_ref().get_u64()
);
}
}
@@ -184,7 +185,7 @@ impl MockStorage {
}
}
fn generate_random_key_range() -> (Bytes, Bytes) {
fn generate_random_key_range() -> (KeyBytes, KeyBytes) {
use rand::Rng;
let mut rng = rand::thread_rng();
let begin: usize = rng.gen_range(0..(1 << 31));
@@ -193,16 +194,19 @@ fn generate_random_key_range() -> (Bytes, Bytes) {
let mut end_bytes = BytesMut::new();
begin_bytes.put_u64(begin as u64);
end_bytes.put_u64(end as u64);
(begin_bytes.into(), end_bytes.into())
(
KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()),
KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()),
)
}
fn generate_random_split(
mut begin_bytes: Bytes,
mut end_bytes: Bytes,
begin_bytes: KeyBytes,
end_bytes: KeyBytes,
split: usize,
) -> Vec<(Bytes, Bytes)> {
let begin = begin_bytes.get_u64();
let end = end_bytes.get_u64();
) -> Vec<(KeyBytes, KeyBytes)> {
let begin = begin_bytes.for_testing_key_ref().get_u64();
let end = end_bytes.for_testing_key_ref().get_u64();
let len = end - begin + 1;
let mut result = Vec::new();
let split = split as u64;
@@ -214,7 +218,10 @@ fn generate_random_split(
let mut end_bytes = BytesMut::new();
begin_bytes.put_u64(nb);
end_bytes.put_u64(ne);
result.push((begin_bytes.into(), end_bytes.into()));
result.push((
KeyBytes::for_testing_from_bytes_no_ts(begin_bytes.freeze()),
KeyBytes::for_testing_from_bytes_no_ts(end_bytes.freeze()),
));
}
result
}
@@ -502,8 +509,14 @@ fn main() {
.map(|id| format!(
"{}.sst {:x}..={:x}",
id,
storage.snapshot.sstables[id].first_key().clone().get_u64(),
storage.snapshot.sstables[id].last_key().clone().get_u64()
storage.snapshot.sstables[id]
.first_key()
.for_testing_key_ref()
.get_u64(),
storage.snapshot.sstables[id]
.last_key()
.for_testing_key_ref()
.get_u64()
))
.collect::<Vec<_>>()
.join(", ")
@@ -516,8 +529,14 @@ fn main() {
.map(|id| format!(
"{}.sst {:x}..={:x}",
id,
storage.snapshot.sstables[id].first_key().clone().get_u64(),
storage.snapshot.sstables[id].last_key().clone().get_u64()
storage.snapshot.sstables[id]
.first_key()
.for_testing_key_ref()
.get_u64(),
storage.snapshot.sstables[id]
.last_key()
.for_testing_key_ref()
.get_u64()
))
.collect::<Vec<_>>()
.join(", ")
@@ -529,8 +548,14 @@ fn main() {
.map(|id| format!(
"{}.sst {:x}..={:x}",
id,
storage.snapshot.sstables[id].first_key().clone().get_u64(),
storage.snapshot.sstables[id].last_key().clone().get_u64()
storage.snapshot.sstables[id]
.first_key()
.for_testing_key_ref()
.get_u64(),
storage.snapshot.sstables[id]
.last_key()
.for_testing_key_ref()
.get_u64()
))
.collect::<Vec<_>>()
.join(", ")

View File

@@ -1,10 +1,21 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use crate::key::{KeySlice, KeyVec};
use super::Block;
/// Builds a block.
pub struct BlockBuilder {}
pub struct BlockBuilder {
/// Offsets of each key-value entries.
offsets: Vec<u16>,
/// All serialized key-value pairs in the block.
data: Vec<u8>,
/// The expected block size.
block_size: usize,
/// The first key in the block
first_key: KeyVec,
}
impl BlockBuilder {
/// Creates a new block builder.
@@ -14,7 +25,7 @@ impl BlockBuilder {
/// Adds a key-value pair to the block. Returns false when the block is full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
unimplemented!()
}

View File

@@ -3,6 +3,8 @@
use std::sync::Arc;
use crate::key::{KeySlice, KeyVec};
use super::Block;
/// Iterates on a block.
@@ -10,20 +12,23 @@ pub struct BlockIterator {
/// The internal `Block`, wrapped by an `Arc`
block: Arc<Block>,
/// The current key, empty represents the iterator is invalid
key: Vec<u8>,
key: KeyVec,
/// the value range from the block
value_range: (usize, usize),
/// Current index of the key-value pair, should be in range of [0, num_of_elements)
idx: usize,
/// The first key in the block
first_key: KeyVec,
}
impl BlockIterator {
fn new(block: Arc<Block>) -> Self {
Self {
block,
key: Vec::new(),
key: KeyVec::new(),
value_range: (0, 0),
idx: 0,
first_key: KeyVec::new(),
}
}
@@ -33,12 +38,12 @@ impl BlockIterator {
}
/// Creates a block iterator and seek to the first key that >= `key`.
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
pub fn create_and_seek_to_key(block: Arc<Block>, key: KeySlice) -> Self {
unimplemented!()
}
/// Returns the key of the current entry.
pub fn key(&self) -> &[u8] {
pub fn key(&self) -> KeySlice {
unimplemented!()
}
@@ -66,7 +71,7 @@ impl BlockIterator {
/// Seek to the first key that >= `key`.
/// Note: You should assume the key-value pairs in the block are sorted when being added by
/// callers.
pub fn seek_to_key(&mut self, key: &[u8]) {
pub fn seek_to_key(&mut self, key: KeySlice) {
unimplemented!()
}
}

View File

@@ -3,11 +3,15 @@ pub mod merge_iterator;
pub mod two_merge_iterator;
pub trait StorageIterator {
type KeyType<'a>: PartialEq + Eq + PartialOrd + Ord
where
Self: 'a;
/// Get the current value.
fn value(&self) -> &[u8];
/// Get the current key.
fn key(&self) -> &[u8];
fn key(&self) -> Self::KeyType<'_>;
/// Check if the current iterator is valid.
fn is_valid(&self) -> bool;

View File

@@ -6,7 +6,10 @@ use std::sync::Arc;
use anyhow::Result;
use super::StorageIterator;
use crate::table::{SsTable, SsTableIterator};
use crate::{
key::KeySlice,
table::{SsTable, SsTableIterator},
};
/// Concat multiple iterators ordered in key order and their key ranges do not overlap. We do not want to create the
/// iterators when initializing this iterator to reduce the overhead of seeking.
@@ -21,13 +24,15 @@ impl SstConcatIterator {
unimplemented!()
}
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: &[u8]) -> Result<Self> {
pub fn create_and_seek_to_key(sstables: Vec<Arc<SsTable>>, key: KeySlice) -> Result<Self> {
unimplemented!()
}
}
impl StorageIterator for SstConcatIterator {
fn key(&self) -> &[u8] {
type KeyType<'a> = KeySlice<'a>;
fn key(&self) -> KeySlice {
unimplemented!()
}

View File

@@ -6,6 +6,8 @@ use std::collections::BinaryHeap;
use anyhow::Result;
use crate::key::KeySlice;
use super::StorageIterator;
struct HeapWrapper<I: StorageIterator>(pub usize, pub Box<I>);
@@ -21,7 +23,7 @@ impl<I: StorageIterator> Eq for HeapWrapper<I> {}
impl<I: StorageIterator> PartialOrd for HeapWrapper<I> {
#[allow(clippy::non_canonical_partial_ord_impl)]
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
match self.1.key().cmp(other.1.key()) {
match self.1.key().cmp(&other.1.key()) {
cmp::Ordering::Greater => Some(cmp::Ordering::Greater),
cmp::Ordering::Less => Some(cmp::Ordering::Less),
cmp::Ordering::Equal => self.0.partial_cmp(&other.0),
@@ -49,8 +51,12 @@ impl<I: StorageIterator> MergeIterator<I> {
}
}
impl<I: StorageIterator> StorageIterator for MergeIterator<I> {
fn key(&self) -> &[u8] {
impl<I: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>> StorageIterator
for MergeIterator<I>
{
type KeyType<'a> = KeySlice<'a>;
fn key(&self) -> KeySlice {
unimplemented!()
}

View File

@@ -3,6 +3,8 @@
use anyhow::Result;
use crate::key::KeySlice;
use super::StorageIterator;
/// Merges two iterators of different types into one. If the two iterators have the same key, only
@@ -13,14 +15,24 @@ pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
// Add fields as need
}
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
impl<
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
> TwoMergeIterator<A, B>
{
pub fn create(a: A, b: B) -> Result<Self> {
unimplemented!()
}
}
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
fn key(&self) -> &[u8] {
impl<
A: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
B: 'static + for<'a> StorageIterator<KeyType<'a> = KeySlice<'a>>,
> StorageIterator for TwoMergeIterator<A, B>
{
type KeyType<'a> = KeySlice<'a>;
fn key(&self) -> KeySlice {
unimplemented!()
}

159
mini-lsm-starter/src/key.rs Normal file
View File

@@ -0,0 +1,159 @@
use std::fmt::Debug;
use bytes::Bytes;
pub struct Key<T: AsRef<[u8]>>(T);
pub type KeySlice<'a> = Key<&'a [u8]>;
pub type KeyVec = Key<Vec<u8>>;
pub type KeyBytes = Key<Bytes>;
impl<T: AsRef<[u8]>> Key<T> {
pub fn into_inner(self) -> T {
self.0
}
pub fn len(&self) -> usize {
self.0.as_ref().len()
}
pub fn is_empty(&self) -> bool {
self.0.as_ref().is_empty()
}
}
impl Key<Vec<u8>> {
pub fn new() -> Self {
Self(Vec::new())
}
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
pub fn from_vec(key: Vec<u8>) -> Self {
Self(key)
}
/// Clears the key and set ts to 0.
pub fn clear(&mut self) {
self.0.clear()
}
/// Append a slice to the end of the key
pub fn append(&mut self, data: &[u8]) {
self.0.extend(data)
}
/// Set the key from a slice without re-allocating. The signature will change in week 3.
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
self.0.clear();
self.0.extend(key_slice.0);
}
pub fn as_key_slice(&self) -> KeySlice {
Key(self.0.as_slice())
}
pub fn into_key_bytes(self) -> KeyBytes {
Key(self.0.into())
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn for_testing_key_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
Self(key)
}
}
impl Key<Bytes> {
pub fn as_key_slice(&self) -> KeySlice {
Key(&self.0)
}
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
Key(bytes)
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
Key(bytes)
}
pub fn for_testing_key_ref(&self) -> &[u8] {
self.0.as_ref()
}
}
impl<'a> Key<&'a [u8]> {
pub fn to_key_vec(self) -> KeyVec {
Key(self.0.to_vec())
}
/// Create a key slice from a slice. Will be removed in week 3.
pub fn from_slice(slice: &'a [u8]) -> Self {
Self(slice)
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(self) -> &'a [u8] {
self.0
}
pub fn for_testing_key_ref(self) -> &'a [u8] {
self.0
}
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
Self(slice)
}
}
impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl<T: AsRef<[u8]> + Default> Default for Key<T> {
fn default() -> Self {
Self(T::default())
}
}
impl<T: AsRef<[u8]> + PartialEq> PartialEq for Key<T> {
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
impl<T: AsRef<[u8]> + Eq> Eq for Key<T> {}
impl<T: AsRef<[u8]> + Clone> Clone for Key<T> {
fn clone(&self) -> Self {
Self(self.0.clone())
}
}
impl<T: AsRef<[u8]> + Copy> Copy for Key<T> {}
impl<T: AsRef<[u8]> + PartialOrd> PartialOrd for Key<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<T: AsRef<[u8]> + Ord> Ord for Key<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.cmp(&other.0)
}
}

View File

@@ -2,6 +2,7 @@ pub mod block;
pub mod compact;
pub mod debug;
pub mod iterators;
pub mod key;
pub mod lsm_iterator;
pub mod lsm_storage;
pub mod manifest;

View File

@@ -22,6 +22,8 @@ impl LsmIterator {
}
impl StorageIterator for LsmIterator {
type KeyType<'a> = &'a [u8];
fn is_valid(&self) -> bool {
unimplemented!()
}
@@ -53,11 +55,13 @@ impl<I: StorageIterator> FusedIterator<I> {
}
impl<I: StorageIterator> StorageIterator for FusedIterator<I> {
type KeyType<'a> = I::KeyType<'a> where Self: 'a;
fn is_valid(&self) -> bool {
unimplemented!()
}
fn key(&self) -> &[u8] {
fn key(&self) -> Self::KeyType<'_> {
unimplemented!()
}

View File

@@ -11,6 +11,7 @@ use crossbeam_skiplist::SkipMap;
use ouroboros::self_referencing;
use crate::iterators::StorageIterator;
use crate::key::KeySlice;
use crate::table::SsTableBuilder;
use crate::wal::Wal;
@@ -115,11 +116,13 @@ pub struct MemTableIterator {
}
impl StorageIterator for MemTableIterator {
type KeyType<'a> = KeySlice<'a>;
fn value(&self) -> &[u8] {
unimplemented!()
}
fn key(&self) -> &[u8] {
fn key(&self) -> KeySlice {
unimplemented!()
}

View File

@@ -11,10 +11,11 @@ use std::sync::Arc;
use anyhow::Result;
pub use builder::SsTableBuilder;
use bytes::{Buf, Bytes};
use bytes::Buf;
pub use iterator::SsTableIterator;
use crate::block::Block;
use crate::key::{KeyBytes, KeySlice};
use crate::lsm_storage::BlockCache;
use self::bloom::Bloom;
@@ -24,9 +25,9 @@ pub struct BlockMeta {
/// Offset of this data block.
pub offset: usize,
/// The first key of the data block.
pub first_key: Bytes,
pub first_key: KeyBytes,
/// The last key of the data block.
pub last_key: Bytes,
pub last_key: KeyBytes,
}
impl BlockMeta {
@@ -92,8 +93,8 @@ pub struct SsTable {
pub(crate) block_meta_offset: usize,
id: usize,
block_cache: Option<Arc<BlockCache>>,
first_key: Bytes,
last_key: Bytes,
first_key: KeyBytes,
last_key: KeyBytes,
pub(crate) bloom: Option<Bloom>,
}
@@ -109,7 +110,12 @@ impl SsTable {
}
/// Create a mock SST with only first key + last key metadata
pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self {
pub fn create_meta_only(
id: usize,
file_size: u64,
first_key: KeyBytes,
last_key: KeyBytes,
) -> Self {
Self {
file: FileObject(None, file_size),
block_meta: vec![],
@@ -135,7 +141,7 @@ impl SsTable {
/// Find the block that may contain `key`.
/// Note: You may want to make use of the `first_key` stored in `BlockMeta`.
/// You may also assume the key-value pairs stored in each consecutive block are sorted.
pub fn find_block_idx(&self, key: &[u8]) -> usize {
pub fn find_block_idx(&self, key: KeySlice) -> usize {
unimplemented!()
}
@@ -144,11 +150,11 @@ impl SsTable {
self.block_meta.len()
}
pub fn first_key(&self) -> &Bytes {
pub fn first_key(&self) -> &KeyBytes {
&self.first_key
}
pub fn last_key(&self) -> &Bytes {
pub fn last_key(&self) -> &KeyBytes {
&self.last_key
}

View File

@@ -7,7 +7,7 @@ use std::sync::Arc;
use anyhow::Result;
use super::{BlockMeta, SsTable};
use crate::{block::BlockBuilder, lsm_storage::BlockCache};
use crate::{block::BlockBuilder, key::KeySlice, lsm_storage::BlockCache};
/// Builds an SSTable from key-value pairs.
pub struct SsTableBuilder {
@@ -29,7 +29,7 @@ impl SsTableBuilder {
///
/// Note: You should split a new block when the current block is full.(`std::mem::replace` may
/// be helpful here)
pub fn add(&mut self, key: &[u8], value: &[u8]) {
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
unimplemented!()
}

View File

@@ -6,7 +6,7 @@ use std::sync::Arc;
use anyhow::Result;
use super::SsTable;
use crate::{block::BlockIterator, iterators::StorageIterator};
use crate::{block::BlockIterator, iterators::StorageIterator, key::KeySlice};
/// An iterator over the contents of an SSTable.
pub struct SsTableIterator {
@@ -27,21 +27,23 @@ impl SsTableIterator {
}
/// Create a new iterator and seek to the first key-value pair which >= `key`.
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
unimplemented!()
}
/// Seek to the first key-value pair which >= `key`.
/// Note: You probably want to review the handout for detailed explanation when implementing
/// this function.
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
unimplemented!()
}
}
impl StorageIterator for SsTableIterator {
type KeyType<'a> = KeySlice<'a>;
/// Return the `key` that's held by the underlying block iterator.
fn key(&self) -> &[u8] {
fn key(&self) -> KeySlice {
unimplemented!()
}

View File

@@ -1 +1,2 @@
//! DO NOT MODIFY -- Mini-LSM tests modules
//! This file will be automatically rewritten by the copy-test command.

View File