fix(code): use rwlock in storage
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
@@ -29,6 +29,17 @@ This architectural design makes LSM tree easy to work with.
|
|||||||
|
|
||||||
In this tutorial, we will learn how to build an LSM-Tree-based storage engine in the Rust programming language.
|
In this tutorial, we will learn how to build an LSM-Tree-based storage engine in the Rust programming language.
|
||||||
|
|
||||||
|
## Prerequisites of this Tutorial
|
||||||
|
|
||||||
|
* You should know the basics of the Rust programming language. Reading [the Rust book](https://doc.rust-lang.org/book/)
|
||||||
|
is enough.
|
||||||
|
* You should know the basic concepts of key-value storage engines, i.e., why we need somehow complex design to achieve
|
||||||
|
persistence. If you have no experience with database systems and storage systems before, you can implement Bitcask
|
||||||
|
in [PingCAP Talent Plan](https://github.com/pingcap/talent-plan/tree/master/courses/rust/projects/project-2).
|
||||||
|
* Knowing the basics of an LSM tree is not a requirement but we recommend you to read something about it, e.g., the
|
||||||
|
overall idea of LevelDB. This would familiarize you with concepts like mutable and immutable mem-tables, SST,
|
||||||
|
compaction, WAL, etc.
|
||||||
|
|
||||||
## Overview of LSM
|
## Overview of LSM
|
||||||
|
|
||||||
An LSM storage engine generally contains 3 parts:
|
An LSM storage engine generally contains 3 parts:
|
||||||
|
|||||||
@@ -24,17 +24,17 @@ impl MemTable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get a value by key.
|
/// Get a value by key.
|
||||||
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
|
pub fn get(&self, key: &[u8]) -> Option<Bytes> {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Put a key-value pair into the mem-table.
|
/// Put a key-value pair into the mem-table.
|
||||||
pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> {
|
pub fn put(&self, key: &[u8], value: &[u8]) {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an iterator over a range of keys.
|
/// Get an iterator over a range of keys.
|
||||||
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result<MemTableIterator> {
|
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> MemTableIterator {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,9 +3,8 @@ use std::path::Path;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use arc_swap::ArcSwap;
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::{Mutex, RwLock};
|
||||||
|
|
||||||
use crate::iterators::impls::StorageIterator;
|
use crate::iterators::impls::StorageIterator;
|
||||||
use crate::iterators::merge_iterator::MergeIterator;
|
use crate::iterators::merge_iterator::MergeIterator;
|
||||||
@@ -22,6 +21,9 @@ pub struct LsmStorageInner {
|
|||||||
imm_memtables: Vec<Arc<MemTable>>,
|
imm_memtables: Vec<Arc<MemTable>>,
|
||||||
/// L0 SsTables, from earliest to latest.
|
/// L0 SsTables, from earliest to latest.
|
||||||
l0_sstables: Vec<Arc<SsTable>>,
|
l0_sstables: Vec<Arc<SsTable>>,
|
||||||
|
/// L1 - L6 SsTables, sorted by key range.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
levels: Vec<Vec<Arc<SsTable>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LsmStorageInner {
|
impl LsmStorageInner {
|
||||||
@@ -30,26 +32,32 @@ impl LsmStorageInner {
|
|||||||
memtable: Arc::new(MemTable::create()),
|
memtable: Arc::new(MemTable::create()),
|
||||||
imm_memtables: vec![],
|
imm_memtables: vec![],
|
||||||
l0_sstables: vec![],
|
l0_sstables: vec![],
|
||||||
|
levels: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The storage interface of the LSM tree.
|
/// The storage interface of the LSM tree.
|
||||||
pub struct LsmStorage {
|
pub struct LsmStorage {
|
||||||
inner: ArcSwap<LsmStorageInner>,
|
inner: Arc<RwLock<Arc<LsmStorageInner>>>,
|
||||||
flush_lock: Mutex<()>,
|
flush_lock: Mutex<()>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LsmStorage {
|
impl LsmStorage {
|
||||||
pub fn open(_path: impl AsRef<Path>) -> Result<Self> {
|
pub fn open(_path: impl AsRef<Path>) -> Result<Self> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
inner: ArcSwap::from_pointee(LsmStorageInner::create()),
|
inner: Arc::new(RwLock::new(Arc::new(LsmStorageInner::create()))),
|
||||||
flush_lock: Mutex::new(()),
|
flush_lock: Mutex::new(()),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a key from the storage. In day 7, this can be further optimized by using a bloom filter.
|
||||||
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
|
pub fn get(&self, key: &[u8]) -> Result<Option<Bytes>> {
|
||||||
let snapshot = self.inner.load();
|
let snapshot = {
|
||||||
|
let guard = self.inner.read();
|
||||||
|
Arc::clone(&guard)
|
||||||
|
}; // drop global lock here
|
||||||
|
|
||||||
// Search on the current memtable.
|
// Search on the current memtable.
|
||||||
if let Some(value) = snapshot.memtable.get(key) {
|
if let Some(value) = snapshot.memtable.get(key) {
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
@@ -83,31 +91,29 @@ impl LsmStorage {
|
|||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Put a key-value pair into the storage by writing into the current memtable.
|
||||||
pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> {
|
pub fn put(&self, key: &[u8], value: &[u8]) -> Result<()> {
|
||||||
assert!(!value.is_empty(), "value cannot be empty");
|
assert!(!value.is_empty(), "value cannot be empty");
|
||||||
assert!(!key.is_empty(), "key cannot be empty");
|
assert!(!key.is_empty(), "key cannot be empty");
|
||||||
loop {
|
|
||||||
let snapshot = self.inner.load();
|
let guard = self.inner.read();
|
||||||
if snapshot.memtable.put(key, value) {
|
guard.memtable.put(key, value);
|
||||||
break;
|
|
||||||
}
|
|
||||||
// waiting for a new memtable to be propagated
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Remove a key from the storage by writing an empty value.
|
||||||
pub fn delete(&self, key: &[u8]) -> Result<()> {
|
pub fn delete(&self, key: &[u8]) -> Result<()> {
|
||||||
assert!(!key.is_empty(), "key cannot be empty");
|
assert!(!key.is_empty(), "key cannot be empty");
|
||||||
loop {
|
|
||||||
let snapshot = self.inner.load();
|
let guard = self.inner.read();
|
||||||
if snapshot.memtable.put(key, b"") {
|
guard.memtable.put(key, b"");
|
||||||
break;
|
|
||||||
}
|
|
||||||
// waiting for a new memtable to be propagated
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// In day 3: flush the current memtable to disk as L0 SST.
|
||||||
|
/// In day 6: call `fsync` on WAL.
|
||||||
pub fn sync(&self) -> Result<()> {
|
pub fn sync(&self) -> Result<()> {
|
||||||
let _flush_lock = self.flush_lock.lock();
|
let _flush_lock = self.flush_lock.lock();
|
||||||
|
|
||||||
@@ -115,20 +121,18 @@ impl LsmStorage {
|
|||||||
|
|
||||||
// Move mutable memtable to immutable memtables.
|
// Move mutable memtable to immutable memtables.
|
||||||
{
|
{
|
||||||
let guard = self.inner.load();
|
let mut guard = self.inner.write();
|
||||||
// Swap the current memtable with a new one.
|
// Swap the current memtable with a new one.
|
||||||
let mut snapshot = guard.as_ref().clone();
|
let mut snapshot = guard.as_ref().clone();
|
||||||
let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create()));
|
let memtable = std::mem::replace(&mut snapshot.memtable, Arc::new(MemTable::create()));
|
||||||
flush_memtable = memtable.clone();
|
flush_memtable = memtable.clone();
|
||||||
// Add the memtable to the immutable memtables.
|
// Add the memtable to the immutable memtables.
|
||||||
snapshot.imm_memtables.push(memtable.clone());
|
snapshot.imm_memtables.push(memtable);
|
||||||
// Disable the memtable.
|
|
||||||
memtable.seal();
|
|
||||||
// Update the snapshot.
|
// Update the snapshot.
|
||||||
self.inner.store(Arc::new(snapshot));
|
*guard = Arc::new(snapshot);
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point, the old memtable should be disabled for write, and all threads should be
|
// At this point, the old memtable should be disabled for write, and all write threads should be
|
||||||
// operating on the new memtable. We can safely flush the old memtable to disk.
|
// operating on the new memtable. We can safely flush the old memtable to disk.
|
||||||
|
|
||||||
let mut builder = SsTableBuilder::new(4096);
|
let mut builder = SsTableBuilder::new(4096);
|
||||||
@@ -137,31 +141,35 @@ impl LsmStorage {
|
|||||||
|
|
||||||
// Add the flushed L0 table to the list.
|
// Add the flushed L0 table to the list.
|
||||||
{
|
{
|
||||||
let guard = self.inner.load();
|
let mut guard = self.inner.write();
|
||||||
let mut snapshot = guard.as_ref().clone();
|
let mut snapshot = guard.as_ref().clone();
|
||||||
// Remove the memtable from the immutable memtables.
|
// Remove the memtable from the immutable memtables.
|
||||||
snapshot.imm_memtables.pop();
|
snapshot.imm_memtables.pop();
|
||||||
// Add L0 table
|
// Add L0 table
|
||||||
snapshot.l0_sstables.push(sst);
|
snapshot.l0_sstables.push(sst);
|
||||||
// Update the snapshot.
|
// Update the snapshot.
|
||||||
self.inner.store(Arc::new(snapshot));
|
*guard = Arc::new(snapshot);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create an iterator over a range of keys.
|
||||||
pub fn scan(
|
pub fn scan(
|
||||||
&self,
|
&self,
|
||||||
lower: Bound<&[u8]>,
|
lower: Bound<&[u8]>,
|
||||||
upper: Bound<&[u8]>,
|
upper: Bound<&[u8]>,
|
||||||
) -> Result<FusedIterator<LsmIterator>> {
|
) -> Result<FusedIterator<LsmIterator>> {
|
||||||
let snapshot = self.inner.load();
|
let snapshot = {
|
||||||
|
let guard = self.inner.read();
|
||||||
|
Arc::clone(&guard)
|
||||||
|
}; // drop global lock here
|
||||||
|
|
||||||
let mut memtable_iters = Vec::new();
|
let mut memtable_iters = Vec::new();
|
||||||
memtable_iters.reserve(snapshot.imm_memtables.len() + 1);
|
memtable_iters.reserve(snapshot.imm_memtables.len() + 1);
|
||||||
memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper)?));
|
memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper)));
|
||||||
for memtable in snapshot.imm_memtables.iter().rev() {
|
for memtable in snapshot.imm_memtables.iter().rev() {
|
||||||
memtable_iters.push(Box::new(memtable.scan(lower, upper)?));
|
memtable_iters.push(Box::new(memtable.scan(lower, upper)));
|
||||||
}
|
}
|
||||||
let memtable_iter = MergeIterator::create(memtable_iters);
|
let memtable_iter = MergeIterator::create(memtable_iters);
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use std::sync::atomic::AtomicBool;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
@@ -14,7 +13,6 @@ use crate::table::SsTableBuilder;
|
|||||||
/// A basic mem-table based on crossbeam-skiplist
|
/// A basic mem-table based on crossbeam-skiplist
|
||||||
pub struct MemTable {
|
pub struct MemTable {
|
||||||
map: Arc<SkipMap<Bytes, Bytes>>,
|
map: Arc<SkipMap<Bytes, Bytes>>,
|
||||||
sealed: AtomicBool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound<Bytes> {
|
pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound<Bytes> {
|
||||||
@@ -30,7 +28,6 @@ impl MemTable {
|
|||||||
pub fn create() -> Self {
|
pub fn create() -> Self {
|
||||||
Self {
|
Self {
|
||||||
map: Arc::new(SkipMap::new()),
|
map: Arc::new(SkipMap::new()),
|
||||||
sealed: AtomicBool::new(false),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,19 +36,14 @@ impl MemTable {
|
|||||||
self.map.get(key).map(|e| e.value().clone())
|
self.map.get(key).map(|e| e.value().clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Put a key-value pair into the mem-table. If the current mem-table is sealed, return false.
|
/// Put a key-value pair into the mem-table.
|
||||||
pub fn put(&self, key: &[u8], value: &[u8]) -> bool {
|
pub fn put(&self, key: &[u8], value: &[u8]) {
|
||||||
use std::sync::atomic::Ordering;
|
|
||||||
if self.sealed.load(Ordering::Acquire) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
self.map
|
self.map
|
||||||
.insert(Bytes::copy_from_slice(key), Bytes::copy_from_slice(value));
|
.insert(Bytes::copy_from_slice(key), Bytes::copy_from_slice(value));
|
||||||
true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an iterator over a range of keys.
|
/// Get an iterator over a range of keys.
|
||||||
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> Result<MemTableIterator> {
|
pub fn scan(&self, lower: Bound<&[u8]>, upper: Bound<&[u8]>) -> MemTableIterator {
|
||||||
let (lower, upper) = (map_bound(lower), map_bound(upper));
|
let (lower, upper) = (map_bound(lower), map_bound(upper));
|
||||||
let mut iter = MemTableIteratorBuilder {
|
let mut iter = MemTableIteratorBuilder {
|
||||||
map: self.map.clone(),
|
map: self.map.clone(),
|
||||||
@@ -61,7 +53,7 @@ impl MemTable {
|
|||||||
.build();
|
.build();
|
||||||
let entry = iter.with_iter_mut(|iter| MemTableIterator::entry_to_item(iter.next()));
|
let entry = iter.with_iter_mut(|iter| MemTableIterator::entry_to_item(iter.next()));
|
||||||
iter.with_mut(|x| *x.item = entry);
|
iter.with_mut(|x| *x.item = entry);
|
||||||
Ok(iter)
|
iter
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Flush the mem-table to SSTable.
|
/// Flush the mem-table to SSTable.
|
||||||
@@ -71,12 +63,6 @@ impl MemTable {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Disable writes to this memtable.
|
|
||||||
pub(crate) fn seal(&self) {
|
|
||||||
use std::sync::atomic::Ordering;
|
|
||||||
self.sealed.store(true, Ordering::Release);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type SkipMapRangeIter<'a> =
|
type SkipMapRangeIter<'a> =
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ fn test_memtable_iter() {
|
|||||||
memtable.put(b"key3", b"value3");
|
memtable.put(b"key3", b"value3");
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded).unwrap();
|
let mut iter = memtable.scan(Bound::Unbounded, Bound::Unbounded);
|
||||||
assert_eq!(iter.key(), b"key1");
|
assert_eq!(iter.key(), b"key1");
|
||||||
assert_eq!(iter.value(), b"value1");
|
assert_eq!(iter.value(), b"value1");
|
||||||
iter.next().unwrap();
|
iter.next().unwrap();
|
||||||
@@ -72,9 +72,7 @@ fn test_memtable_iter() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut iter = memtable
|
let mut iter = memtable.scan(Bound::Included(b"key1"), Bound::Included(b"key2"));
|
||||||
.scan(Bound::Included(b"key1"), Bound::Included(b"key2"))
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(iter.key(), b"key1");
|
assert_eq!(iter.key(), b"key1");
|
||||||
assert_eq!(iter.value(), b"value1");
|
assert_eq!(iter.value(), b"value1");
|
||||||
iter.next().unwrap();
|
iter.next().unwrap();
|
||||||
@@ -85,9 +83,7 @@ fn test_memtable_iter() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut iter = memtable
|
let mut iter = memtable.scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3"));
|
||||||
.scan(Bound::Excluded(b"key1"), Bound::Excluded(b"key3"))
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(iter.key(), b"key2");
|
assert_eq!(iter.key(), b"key2");
|
||||||
assert_eq!(iter.value(), b"value2");
|
assert_eq!(iter.value(), b"value2");
|
||||||
iter.next().unwrap();
|
iter.next().unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user