diff --git a/mini-lsm-book/src/00-overview.md b/mini-lsm-book/src/00-overview.md index b5ce185..d73112b 100644 --- a/mini-lsm-book/src/00-overview.md +++ b/mini-lsm-book/src/00-overview.md @@ -29,6 +29,17 @@ This architectural design makes LSM tree easy to work with. In this tutorial, we will learn how to build an LSM-Tree-based storage engine in the Rust programming language. +## Prerequisites of this Tutorial + +* You should know the basics of the Rust programming language. Reading [the Rust book](https://doc.rust-lang.org/book/) + is enough. +* You should know the basic concepts of key-value storage engines, i.e., why we need somehow complex design to achieve + persistence. If you have no experience with database systems and storage systems before, you can implement Bitcask + in [PingCAP Talent Plan](https://github.com/pingcap/talent-plan/tree/master/courses/rust/projects/project-2). +* Knowing the basics of an LSM tree is not a requirement but we recommend you to read something about it, e.g., the + overall idea of LevelDB. This would familiarize you with concepts like mutable and immutable mem-tables, SST, + compaction, WAL, etc. + ## Overview of LSM An LSM storage engine generally contains 3 parts: diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index 5b904bb..f77e50e 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -24,17 +24,17 @@ impl MemTable { } /// Get a value by key. - pub fn get(&self, key: &[u8]) -> Result> { + pub fn get(&self, key: &[u8]) -> Option { unimplemented!() } /// Put a key-value pair into the mem-table. - pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { + pub fn put(&self, key: &[u8], value: &[u8]) { unimplemented!() } /// Get an iterator over a range of keys. - pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result { + pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> MemTableIterator { unimplemented!() } diff --git a/mini-lsm/src/lsm_storage.rs b/mini-lsm/src/lsm_storage.rs index 4c134df..487a5f7 100644 --- a/mini-lsm/src/lsm_storage.rs +++ b/mini-lsm/src/lsm_storage.rs @@ -3,9 +3,8 @@ use std::path::Path; use std::sync::Arc; use anyhow::Result; -use arc_swap::ArcSwap; use bytes::Bytes; -use parking_lot::Mutex; +use parking_lot::{Mutex, RwLock}; use crate::iterators::impls::StorageIterator; use crate::iterators::merge_iterator::MergeIterator; @@ -22,6 +21,9 @@ pub struct LsmStorageInner { imm_memtables: Vec>, /// L0 SsTables, from earliest to latest. l0_sstables: Vec>, + /// L1 - L6 SsTables, sorted by key range. + #[allow(dead_code)] + levels: Vec>>, } impl LsmStorageInner { @@ -30,26 +32,32 @@ impl LsmStorageInner { memtable: Arc::new(MemTable::create()), imm_memtables: vec![], l0_sstables: vec![], + levels: vec![], } } } /// The storage interface of the LSM tree. pub struct LsmStorage { - inner: ArcSwap, + inner: Arc>>, flush_lock: Mutex<()>, } impl LsmStorage { pub fn open(_path: impl AsRef) -> Result { Ok(Self { - inner: ArcSwap::from_pointee(LsmStorageInner::create()), + inner: Arc::new(RwLock::new(Arc::new(LsmStorageInner::create()))), flush_lock: Mutex::new(()), }) } + /// Get a key from the storage. In day 7, this can be further optimized by using a bloom filter. pub fn get(&self, key: &[u8]) -> Result> { - let snapshot = self.inner.load(); + let snapshot = { + let guard = self.inner.read(); + Arc::clone(&guard) + }; // drop global lock here + // Search on the current memtable. if let Some(value) = snapshot.memtable.get(key) { if value.is_empty() { @@ -83,31 +91,29 @@ impl LsmStorage { Ok(None) } + /// Put a key-value pair into the storage by writing into the current memtable. pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { assert!(!value.is_empty(), "value cannot be empty"); assert!(!key.is_empty(), "key cannot be empty"); - loop { - let snapshot = self.inner.load(); - if snapshot.memtable.put(key, value) { - break; - } - // waiting for a new memtable to be propagated - } + + let guard = self.inner.read(); + guard.memtable.put(key, value); + Ok(()) } + /// Remove a key from the storage by writing an empty value. pub fn delete(&self, key: &[u8]) -> Result<()> { assert!(!key.is_empty(), "key cannot be empty"); - loop { - let snapshot = self.inner.load(); - if snapshot.memtable.put(key, b"") { - break; - } - // waiting for a new memtable to be propagated - } + + let guard = self.inner.read(); + guard.memtable.put(key, b""); + Ok(()) } + /// In day 3: flush the current memtable to disk as L0 SST. + /// In day 6: call `fsync` on WAL. pub fn sync(&self) -> Result<()> { let _flush_lock = self.flush_lock.lock(); @@ -115,20 +121,18 @@ impl LsmStorage { // Move mutable memtable to immutable memtables. { - let guard = self.inner.load(); + let mut guard = self.inner.write(); // Swap the current memtable with a new one. let mut snapshot = guard.as_ref().clone(); let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create())); flush_memtable = memtable.clone(); // Add the memtable to the immutable memtables. - snapshot.imm_memtables.push(memtable.clone()); - // Disable the memtable. - memtable.seal(); + snapshot.imm_memtables.push(memtable); // Update the snapshot. - self.inner.store(Arc::new(snapshot)); + *guard = Arc::new(snapshot); } - // At this point, the old memtable should be disabled for write, and all threads should be + // At this point, the old memtable should be disabled for write, and all write threads should be // operating on the new memtable. We can safely flush the old memtable to disk. let mut builder = SsTableBuilder::new(4096); @@ -137,31 +141,35 @@ impl LsmStorage { // Add the flushed L0 table to the list. { - let guard = self.inner.load(); + let mut guard = self.inner.write(); let mut snapshot = guard.as_ref().clone(); // Remove the memtable from the immutable memtables. snapshot.imm_memtables.pop(); // Add L0 table snapshot.l0_sstables.push(sst); // Update the snapshot. - self.inner.store(Arc::new(snapshot)); + *guard = Arc::new(snapshot); } Ok(()) } + /// Create an iterator over a range of keys. pub fn scan( &self, lower: Bound<&[u8]>, upper: Bound<&[u8]>, ) -> Result> { - let snapshot = self.inner.load(); + let snapshot = { + let guard = self.inner.read(); + Arc::clone(&guard) + }; // drop global lock here let mut memtable_iters = Vec::new(); memtable_iters.reserve(snapshot.imm_memtables.len() + 1); - memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper)?)); + memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper))); for memtable in snapshot.imm_memtables.iter().rev() { - memtable_iters.push(Box::new(memtable.scan(lower, upper)?)); + memtable_iters.push(Box::new(memtable.scan(lower, upper))); } let memtable_iter = MergeIterator::create(memtable_iters); diff --git a/mini-lsm/src/mem_table.rs b/mini-lsm/src/mem_table.rs index edb2076..b88b900 100644 --- a/mini-lsm/src/mem_table.rs +++ b/mini-lsm/src/mem_table.rs @@ -1,5 +1,4 @@ use std::ops::Bound; -use std::sync::atomic::AtomicBool; use std::sync::Arc; use anyhow::Result; @@ -14,7 +13,6 @@ use crate::table::SsTableBuilder; /// A basic mem-table based on crossbeam-skiplist pub struct MemTable { map: Arc>, - sealed: AtomicBool, } pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound { @@ -30,7 +28,6 @@ impl MemTable { pub fn create() -> Self { Self { map: Arc::new(SkipMap::new()), - sealed: AtomicBool::new(false), } } @@ -39,19 +36,14 @@ impl MemTable { self.map.get(key).map(|e| e.value().clone()) } - /// Put a key-value pair into the mem-table. If the current mem-table is sealed, return false. - pub fn put(&self, key: &[u8], value: &[u8]) -> bool { - use std::sync::atomic::Ordering; - if self.sealed.load(Ordering::Acquire) { - return false; - } + /// Put a key-value pair into the mem-table. + pub fn put(&self, key: &[u8], value: &[u8]) { self.map .insert(Bytes::copy_from_slice(key), Bytes::copy_from_slice(value)); - true } /// Get an iterator over a range of keys. - pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result { + pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> MemTableIterator { let (lower, upper) = (map_bound(lower), map_bound(upper)); let mut iter = MemTableIteratorBuilder { map: self.map.clone(), @@ -61,7 +53,7 @@ impl MemTable { .build(); let entry = iter.with_iter_mut(|iter| MemTableIterator::entry_to_item(iter.next())); iter.with_mut(|x| *x.item = entry); - Ok(iter) + iter } /// Flush the mem-table to SSTable. @@ -71,12 +63,6 @@ impl MemTable { } Ok(()) } - - /// Disable writes to this memtable. - pub(crate) fn seal(&self) { - use std::sync::atomic::Ordering; - self.sealed.store(true, Ordering::Release); - } } type SkipMapRangeIter<'a> = diff --git a/mini-lsm/src/mem_table/tests.rs b/mini-lsm/src/mem_table/tests.rs index e711a0a..b96f516 100644 --- a/mini-lsm/src/mem_table/tests.rs +++ b/mini-lsm/src/mem_table/tests.rs @@ -58,7 +58,7 @@ fn test_memtable_iter() { memtable.put(b"key3", b"value3"); { - let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded).unwrap(); + let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded); assert_eq!(iter.key(), b"key1"); assert_eq!(iter.value(), b"value1"); iter.next().unwrap(); @@ -72,9 +72,7 @@ fn test_memtable_iter() { } { - let mut iter = memtable - .scan(Bound::Included(b"key1"), Bound::Included(b"key2")) - .unwrap(); + let mut iter = memtable.scan(Bound::Included(b"key1"), Bound::Included(b"key2")); assert_eq!(iter.key(), b"key1"); assert_eq!(iter.value(), b"value1"); iter.next().unwrap(); @@ -85,9 +83,7 @@ fn test_memtable_iter() { } { - let mut iter = memtable - .scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3")) - .unwrap(); + let mut iter = memtable.scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3")); assert_eq!(iter.key(), b"key2"); assert_eq!(iter.value(), b"value2"); iter.next().unwrap();