feat(code): part 3 iterators
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
@@ -6,4 +6,7 @@ publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
arc-swap = "1"
|
||||
bytes = "1"
|
||||
crossbeam-skiplist = "0.1"
|
||||
parking_lot = "0.12"
|
||||
|
||||
@@ -4,12 +4,12 @@
|
||||
mod builder;
|
||||
mod iterator;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
pub use builder::BlockBuilder;
|
||||
use bytes::Bytes;
|
||||
pub use iterator::BlockIterator;
|
||||
|
||||
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs.
|
||||
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted
|
||||
/// key-value pairs.
|
||||
pub struct Block {
|
||||
data: Vec<u8>,
|
||||
offsets: Vec<u16>,
|
||||
|
||||
6
mini-lsm-starter/src/iterators.rs
Normal file
6
mini-lsm-starter/src/iterators.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
pub mod impls;
|
||||
pub mod merge_iterator;
|
||||
pub mod two_merge_iterator;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
15
mini-lsm-starter/src/iterators/impls.rs
Normal file
15
mini-lsm-starter/src/iterators/impls.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
use anyhow::Result;
|
||||
|
||||
pub trait StorageIterator {
|
||||
/// Get the current value.
|
||||
fn value(&self) -> &[u8];
|
||||
|
||||
/// Get the current key.
|
||||
fn key(&self) -> &[u8];
|
||||
|
||||
/// Check if the current iterator is valid.
|
||||
fn is_valid(&self) -> bool;
|
||||
|
||||
/// Move to the next position.
|
||||
fn next(&mut self) -> Result<()>;
|
||||
}
|
||||
67
mini-lsm-starter/src/iterators/merge_iterator.rs
Normal file
67
mini-lsm-starter/src/iterators/merge_iterator.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use std::cmp::{self};
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use super::impls::StorageIterator;
|
||||
|
||||
struct HeapWrapper<I: StorageIterator>(pub usize, pub Box<I>);
|
||||
|
||||
impl<I: StorageIterator> PartialEq for HeapWrapper<I> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.partial_cmp(other).unwrap() == cmp::Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> Eq for HeapWrapper<I> {}
|
||||
|
||||
impl<I: StorageIterator> PartialOrd for HeapWrapper<I> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
||||
match self.1.key().cmp(other.1.key()) {
|
||||
cmp::Ordering::Greater => Some(cmp::Ordering::Greater),
|
||||
cmp::Ordering::Less => Some(cmp::Ordering::Less),
|
||||
cmp::Ordering::Equal => self.0.partial_cmp(&other.0),
|
||||
}
|
||||
.map(|x| x.reverse())
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> Ord for HeapWrapper<I> {
|
||||
fn cmp(&self, other: &Self) -> cmp::Ordering {
|
||||
self.partial_cmp(other).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge multiple iterators of the same type. If the same key occurs multiple times in some
|
||||
/// iterators, perfer the one with smaller index.
|
||||
pub struct MergeIterator<I: StorageIterator> {
|
||||
iters: BinaryHeap<HeapWrapper<I>>,
|
||||
current: HeapWrapper<I>,
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> MergeIterator<I> {
|
||||
pub fn create(iters: Vec<Box<I>>) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: StorageIterator> StorageIterator for MergeIterator<I> {
|
||||
fn key(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
4
mini-lsm-starter/src/iterators/tests.rs
Normal file
4
mini-lsm-starter/src/iterators/tests.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Please copy `mini-lsm/src/iterators/tests.rs` here so that you can run tests.
|
||||
|
||||
pub mod merge_iterator_test;
|
||||
pub mod two_merge_iterator_test;
|
||||
@@ -0,0 +1,2 @@
|
||||
//! Please copy `mini-lsm/src/iterators/tests/merge_iterator_test.rs` here so that you can run
|
||||
//! tests.
|
||||
@@ -0,0 +1,2 @@
|
||||
//! Please copy `mini-lsm/src/iterators/tests/two_merge_iterator_test.rs` here so that you can run
|
||||
//! tests.
|
||||
38
mini-lsm-starter/src/iterators/two_merge_iterator.rs
Normal file
38
mini-lsm-starter/src/iterators/two_merge_iterator.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use super::impls::StorageIterator;
|
||||
|
||||
/// Merges two iterators of different types into one. If the two iterators have the same key, only
|
||||
/// produce the key once and prefer the entry from A.
|
||||
pub struct TwoMergeIterator<A: StorageIterator, B: StorageIterator> {
|
||||
a: A,
|
||||
b: B,
|
||||
// Add fields as need
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> TwoMergeIterator<A, B> {
|
||||
pub fn create(a: A, b: B) -> Result<Self> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: StorageIterator, B: StorageIterator> StorageIterator for TwoMergeIterator<A, B> {
|
||||
fn key(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
pub mod block;
|
||||
pub mod storage;
|
||||
pub mod iterators;
|
||||
pub mod lsm_iterator;
|
||||
pub mod lsm_storage;
|
||||
pub mod mem_table;
|
||||
pub mod table;
|
||||
|
||||
1
mini-lsm-starter/src/lsm_iterator.rs
Normal file
1
mini-lsm-starter/src/lsm_iterator.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub struct LsmIterator {}
|
||||
75
mini-lsm-starter/src/lsm_storage.rs
Normal file
75
mini-lsm-starter/src/lsm_storage.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::ops::Bound;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use arc_swap::ArcSwap;
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::lsm_iterator::LsmIterator;
|
||||
use crate::mem_table::MemTable;
|
||||
use crate::table::{SsTable, SsTableIterator};
|
||||
|
||||
pub struct LsmStorageInner {
|
||||
memtables: Vec<Arc<MemTable>>,
|
||||
sstables: Vec<Arc<SsTable>>,
|
||||
}
|
||||
|
||||
impl LsmStorageInner {
|
||||
fn create() -> Self {
|
||||
Self {
|
||||
memtables: vec![Arc::new(MemTable::create())],
|
||||
sstables: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The storage interface of the LSM tree.
|
||||
pub struct LsmStorage {
|
||||
inner: ArcSwap<LsmStorageInner>,
|
||||
}
|
||||
|
||||
impl LsmStorage {
|
||||
pub fn open(_path: &Path) -> Result<Self> {
|
||||
Ok(Self {
|
||||
inner: ArcSwap::from_pointee(LsmStorageInner::create()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
|
||||
let snapshot = self.inner.load();
|
||||
for memtable in &snapshot.memtables {
|
||||
if let Some(value) = memtable.get(key)? {
|
||||
if value.is_empty() {
|
||||
// found tomestone, return key not exists
|
||||
return Ok(None);
|
||||
}
|
||||
return Ok(Some(value));
|
||||
}
|
||||
}
|
||||
let mut iters = Vec::new();
|
||||
iters.reserve(snapshot.sstables.len());
|
||||
for table in snapshot.sstables.iter().rev() {
|
||||
iters.push(SsTableIterator::create_and_seek_to_key(table.clone(), key)?);
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> {
|
||||
assert!(!value.is_empty(), "value cannot be empty");
|
||||
assert!(!key.is_empty(), "key cannot be empty");
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn delete(&mut self, _key: &[u8]) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn sync(&mut self) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn scan(&self, _lower: Bound<&[u8]>, _upper: Bound<&[u8]>) -> Result<LsmIterator> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
79
mini-lsm-starter/src/mem_table.rs
Normal file
79
mini-lsm-starter/src/mem_table.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use std::ops::Bound;
|
||||
|
||||
use anyhow::Result;
|
||||
use bytes::Bytes;
|
||||
use crossbeam_skiplist::SkipMap;
|
||||
|
||||
use crate::iterators::impls::StorageIterator;
|
||||
use crate::table::SsTableBuilder;
|
||||
|
||||
/// A basic mem-table based on crossbeam-skiplist
|
||||
pub struct MemTable {
|
||||
map: SkipMap<Bytes, Bytes>,
|
||||
}
|
||||
|
||||
impl MemTable {
|
||||
/// Create a new mem-table.
|
||||
pub fn create() -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Get a value by key.
|
||||
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Put a key-value pair into the mem-table.
|
||||
pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Get an iterator over a range of keys.
|
||||
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result<MemTableIterator> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Flush the mem-table to SSTable.
|
||||
pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
type SkipMapRangeIter<'a> =
|
||||
crossbeam_skiplist::map::Range<'a, Bytes, (Bound<Bytes>, Bound<Bytes>), Bytes, Bytes>;
|
||||
|
||||
/// An iterator over a range of `SkipMap`.
|
||||
pub struct MemTableIterator<'a> {
|
||||
_phantom: std::marker::PhantomData<&'a ()>,
|
||||
}
|
||||
|
||||
impl<'a> MemTableIterator<'a> {
|
||||
fn new(iter: SkipMapRangeIter<'a>) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageIterator for MemTableIterator<'_> {
|
||||
fn value(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "mem_table_test.rs"]
|
||||
mod tests;
|
||||
1
mini-lsm-starter/src/mem_table_test.rs
Normal file
1
mini-lsm-starter/src/mem_table_test.rs
Normal file
@@ -0,0 +1 @@
|
||||
//! Please copy `mini-lsm/src/mem_table_test.rs` here so that you can run tests.
|
||||
@@ -4,14 +4,15 @@
|
||||
mod builder;
|
||||
mod iterator;
|
||||
|
||||
use std::{path::Path, sync::Arc};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
pub use builder::SsTableBuilder;
|
||||
use bytes::{Buf, Bytes};
|
||||
pub use iterator::SsTableIterator;
|
||||
|
||||
use crate::block::Block;
|
||||
use anyhow::Result;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct BlockMeta {
|
||||
@@ -25,7 +26,8 @@ impl BlockMeta {
|
||||
/// Encode block meta to a buffer.
|
||||
pub fn encode_block_meta(
|
||||
block_meta: &[BlockMeta],
|
||||
#[allow(clippy::ptr_arg)] /* remove this allow after you finish */ buf: &mut Vec<u8>,
|
||||
#[allow(clippy::ptr_arg)] // remove this allow after you finish
|
||||
buf: &mut Vec<u8>,
|
||||
) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use anyhow::Result;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use super::{BlockMeta, SsTable};
|
||||
|
||||
/// Builds an SSTable from key-value pairs.
|
||||
@@ -14,17 +15,22 @@ pub struct SsTableBuilder {
|
||||
|
||||
impl SsTableBuilder {
|
||||
/// Create a builder based on target SST size and target block size.
|
||||
pub fn new(target_size: usize, block_size: usize) -> Self {
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Adds a key-value pair to SSTable, return false when SST full.
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
/// Adds a key-value pair to SSTable
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache.
|
||||
/// Get the estimated size of the SSTable.
|
||||
pub fn estimated_size(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until
|
||||
/// chapter 4 block cache.
|
||||
pub fn build(self, path: impl AsRef<Path>) -> Result<SsTable> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
use anyhow::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use super::SsTable;
|
||||
|
||||
/// An iterator over the contents of an SSTable.
|
||||
|
||||
Reference in New Issue
Block a user