feat(code): part 3 iterators

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2022-12-24 10:11:06 -05:00
parent b263ea4fac
commit 4eb2177a3e
39 changed files with 1304 additions and 73 deletions

View File

@@ -4,12 +4,12 @@
mod builder;
mod iterator;
use bytes::Bytes;
pub use builder::BlockBuilder;
use bytes::Bytes;
pub use iterator::BlockIterator;
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs.
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted
/// key-value pairs.
pub struct Block {
data: Vec<u8>,
offsets: Vec<u16>,

View File

@@ -0,0 +1,6 @@
pub mod impls;
pub mod merge_iterator;
pub mod two_merge_iterator;
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,15 @@
use anyhow::Result;
pub trait StorageIterator {
/// Get the current value.
fn value(&self) -> &[u8];
/// Get the current key.
fn key(&self) -> &[u8];
/// Check if the current iterator is valid.
fn is_valid(&self) -> bool;
/// Move to the next position.
fn next(&mut self) -> Result<()>;
}

View File

@@ -0,0 +1,67 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use std::cmp::{self};
use std::collections::BinaryHeap;
use anyhow::Result;
use super::impls::StorageIterator;
struct HeapWrapper<I: StorageIterator>(pub usize, pub Box<I>);
impl<I: StorageIterator> PartialEq for HeapWrapper<I> {
fn eq(&self, other: &Self) -> bool {
self.partial_cmp(other).unwrap() == cmp::Ordering::Equal
}
}
impl<I: StorageIterator> Eq for HeapWrapper<I> {}
impl<I: StorageIterator> PartialOrd for HeapWrapper<I> {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
match self.1.key().cmp(other.1.key()) {
cmp::Ordering::Greater => Some(cmp::Ordering::Greater),
cmp::Ordering::Less => Some(cmp::Ordering::Less),
cmp::Ordering::Equal => self.0.partial_cmp(&other.0),
}
.map(|x| x.reverse())
}
}
impl<I: StorageIterator> Ord for HeapWrapper<I> {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.partial_cmp(other).unwrap()
}
}
/// Merge multiple iterators of the same type. If the same key occurs multiple times in some
/// iterators, perfer the one with smaller index.
pub struct MergeIterator<I: StorageIterator> {
iters: BinaryHeap<HeapWrapper<I>>,
current: HeapWrapper<I>,
}
impl<I: StorageIterator> MergeIterator<I> {
pub fn create(iters: Vec<Box<I>>) -> Self {
unimplemented!()
}
}
impl<I: StorageIterator> StorageIterator for MergeIterator<I> {
fn key(&self) -> &[u8] {
unimplemented!()
}
fn value(&self) -> &[u8] {
unimplemented!()
}
fn is_valid(&self) -> bool {
unimplemented!()
}
fn next(&mut self) -> Result<()> {
unimplemented!()
}
}

View File

@@ -0,0 +1,4 @@
//! Please copy `mini-lsm/src/iterators/tests.rs` here so that you can run tests.
pub mod merge_iterator_test;
pub mod two_merge_iterator_test;

View File

@@ -0,0 +1,2 @@
//! Please copy `mini-lsm/src/iterators/tests/merge_iterator_test.rs` here so that you can run
//! tests.

View File

@@ -0,0 +1,2 @@
//! Please copy `mini-lsm/src/iterators/tests/two_merge_iterator_test.rs` here so that you can run
//! tests.

View File

@@ -0,0 +1,38 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use anyhow::Result;
use super::impls::StorageIterator;
/// Merges two iterators of different types into one. If the two iterators have the same key, only
/// produce the key once and prefer the entry from A.
pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
a: A,
b: B,
// Add fields as need
}
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
pub fn create(a: A, b: B) -> Result<Self> {
unimplemented!()
}
}
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
fn key(&self) -> &[u8] {
unimplemented!()
}
fn value(&self) -> &[u8] {
unimplemented!()
}
fn is_valid(&self) -> bool {
unimplemented!()
}
fn next(&mut self) -> Result<()> {
unimplemented!()
}
}

View File

@@ -1,3 +1,6 @@
pub mod block;
pub mod storage;
pub mod iterators;
pub mod lsm_iterator;
pub mod lsm_storage;
pub mod mem_table;
pub mod table;

View File

@@ -0,0 +1 @@
pub struct LsmIterator {}

View File

@@ -0,0 +1,75 @@
use std::ops::Bound;
use std::path::Path;
use std::sync::Arc;
use anyhow::Result;
use arc_swap::ArcSwap;
use bytes::Bytes;
use crate::lsm_iterator::LsmIterator;
use crate::mem_table::MemTable;
use crate::table::{SsTable, SsTableIterator};
pub struct LsmStorageInner {
memtables: Vec<Arc<MemTable>>,
sstables: Vec<Arc<SsTable>>,
}
impl LsmStorageInner {
fn create() -> Self {
Self {
memtables: vec![Arc::new(MemTable::create())],
sstables: vec![],
}
}
}
/// The storage interface of the LSM tree.
pub struct LsmStorage {
inner: ArcSwap<LsmStorageInner>,
}
impl LsmStorage {
pub fn open(_path: &Path) -> Result<Self> {
Ok(Self {
inner: ArcSwap::from_pointee(LsmStorageInner::create()),
})
}
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
let snapshot = self.inner.load();
for memtable in &snapshot.memtables {
if let Some(value) = memtable.get(key)? {
if value.is_empty() {
// found tomestone, return key not exists
return Ok(None);
}
return Ok(Some(value));
}
}
let mut iters = Vec::new();
iters.reserve(snapshot.sstables.len());
for table in snapshot.sstables.iter().rev() {
iters.push(SsTableIterator::create_and_seek_to_key(table.clone(), key)?);
}
Ok(None)
}
pub fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> {
assert!(!value.is_empty(), "value cannot be empty");
assert!(!key.is_empty(), "key cannot be empty");
unimplemented!()
}
pub fn delete(&mut self, _key: &[u8]) -> Result<()> {
unimplemented!()
}
pub fn sync(&mut self) -> Result<()> {
unimplemented!()
}
pub fn scan(&self, _lower: Bound<&[u8]>, _upper: Bound<&[u8]>) -> Result<LsmIterator> {
unimplemented!()
}
}

View File

@@ -0,0 +1,79 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use std::ops::Bound;
use anyhow::Result;
use bytes::Bytes;
use crossbeam_skiplist::SkipMap;
use crate::iterators::impls::StorageIterator;
use crate::table::SsTableBuilder;
/// A basic mem-table based on crossbeam-skiplist
pub struct MemTable {
map: SkipMap<Bytes, Bytes>,
}
impl MemTable {
/// Create a new mem-table.
pub fn create() -> Self {
unimplemented!()
}
/// Get a value by key.
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
unimplemented!()
}
/// Put a key-value pair into the mem-table.
pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> {
unimplemented!()
}
/// Get an iterator over a range of keys.
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result<MemTableIterator> {
unimplemented!()
}
/// Flush the mem-table to SSTable.
pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> {
unimplemented!()
}
}
type SkipMapRangeIter<'a> =
crossbeam_skiplist::map::Range<'a, Bytes, (Bound<Bytes>, Bound<Bytes>), Bytes, Bytes>;
/// An iterator over a range of `SkipMap`.
pub struct MemTableIterator<'a> {
_phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> MemTableIterator<'a> {
fn new(iter: SkipMapRangeIter<'a>) -> Self {
unimplemented!()
}
}
impl StorageIterator for MemTableIterator<'_> {
fn value(&self) -> &[u8] {
unimplemented!()
}
fn key(&self) -> &[u8] {
unimplemented!()
}
fn is_valid(&self) -> bool {
unimplemented!()
}
fn next(&mut self) -> Result<()> {
unimplemented!()
}
}
#[cfg(test)]
#[path = "mem_table_test.rs"]
mod tests;

View File

@@ -0,0 +1 @@
//! Please copy `mini-lsm/src/mem_table_test.rs` here so that you can run tests.

View File

@@ -4,14 +4,15 @@
mod builder;
mod iterator;
use std::{path::Path, sync::Arc};
use std::path::Path;
use std::sync::Arc;
use anyhow::Result;
pub use builder::SsTableBuilder;
use bytes::{Buf, Bytes};
pub use iterator::SsTableIterator;
use crate::block::Block;
use anyhow::Result;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BlockMeta {
@@ -25,7 +26,8 @@ impl BlockMeta {
/// Encode block meta to a buffer.
pub fn encode_block_meta(
block_meta: &[BlockMeta],
#[allow(clippy::ptr_arg)] /* remove this allow after you finish */ buf: &mut Vec<u8>,
#[allow(clippy::ptr_arg)] // remove this allow after you finish
buf: &mut Vec<u8>,
) {
unimplemented!()
}

View File

@@ -1,9 +1,10 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use anyhow::Result;
use std::path::Path;
use anyhow::Result;
use super::{BlockMeta, SsTable};
/// Builds an SSTable from key-value pairs.
@@ -14,17 +15,22 @@ pub struct SsTableBuilder {
impl SsTableBuilder {
/// Create a builder based on target SST size and target block size.
pub fn new(target_size: usize, block_size: usize) -> Self {
pub fn new(block_size: usize) -> Self {
unimplemented!()
}
/// Adds a key-value pair to SSTable, return false when SST full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
/// Adds a key-value pair to SSTable
pub fn add(&mut self, key: &[u8], value: &[u8]) {
unimplemented!()
}
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache.
/// Get the estimated size of the SSTable.
pub fn estimated_size(&self) -> usize {
unimplemented!()
}
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until
/// chapter 4 block cache.
pub fn build(self, path: impl AsRef<Path>) -> Result<SsTable> {
unimplemented!()
}

View File

@@ -1,9 +1,10 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use anyhow::Result;
use std::sync::Arc;
use anyhow::Result;
use super::SsTable;
/// An iterator over the contents of an SSTable.