From a17f34b7d957cdbc63b521a336e9ea0eb3c5b2c9 Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Tue, 30 Jan 2024 12:39:36 +0800 Subject: [PATCH] finish 3.5 Signed-off-by: Alex Chi --- README.md | 2 +- mini-lsm-book/src/week2-05-manifest.md | 2 + mini-lsm-book/src/week3-04-watermark.md | 6 +++ mini-lsm-book/src/week3-05-txn-occ.md | 38 ++++++++++++++++ mini-lsm-book/src/week3-06-serializable.md | 11 +++-- mini-lsm-mvcc/src/mvcc/txn.rs | 51 +++------------------- 6 files changed, 61 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 89c32af..e66ddbb 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ We are working on chapter 3 and more test cases for all existing contents. | 3.2 | Snapshot Read - Blocks, Memtables, and SSTs | ✅ | ✅ | ✅ | | 3.3 | Snapshot Read - Engine Read Path | ✅ | ✅ | ✅ | | 3.4 | Watermark and Garbage Collection | ✅ | ✅ | ✅ | -| 3.5 | Transactions and Optimistic Concurrency Control | ✅ | 🚧 | 🚧 | +| 3.5 | Transactions and Optimistic Concurrency Control | ✅ | ✅ | ✅ | | 3.6 | Serializable Snapshot Isolation | ✅ | 🚧 | 🚧 | | 3.7 | Compaction Filter | 🚧 | | | diff --git a/mini-lsm-book/src/week2-05-manifest.md b/mini-lsm-book/src/week2-05-manifest.md index 09d3764..c4d898c 100644 --- a/mini-lsm-book/src/week2-05-manifest.md +++ b/mini-lsm-book/src/week2-05-manifest.md @@ -46,6 +46,8 @@ For now, we only use two types of the manifest records: SST flush and compaction To sync the directory, you may implement the `sync_dir` function, where you can use `File::open(dir).sync_all()?` to sync it. On Linux, directory is a file that contains the list of files in the directory. By doing fsync on the directory, you will ensure that the newly-written (or removed) files can be visible to the user if the power goes off. +Remember to write a compaction manifest record for both the background compaction trigger (leveled/simple/universal) and when the user requests to do a force compaction. + ## Task 3: Flush on Close In this task, you will need to modify: diff --git a/mini-lsm-book/src/week3-04-watermark.md b/mini-lsm-book/src/week3-04-watermark.md index 9c05369..521b640 100644 --- a/mini-lsm-book/src/week3-04-watermark.md +++ b/mini-lsm-book/src/week3-04-watermark.md @@ -34,6 +34,12 @@ You will need to add the `read_ts` to the watermark when a transaction starts, a ## Task 3: Garbage Collection in Compaction +In this task, you will need to modify: + +``` +src/compact.rs +``` + Now that we have a watermark for the system, we can clean up unused versions during the compaction process. * If a version of a key is above watermark, keep it. diff --git a/mini-lsm-book/src/week3-05-txn-occ.md b/mini-lsm-book/src/week3-05-txn-occ.md index 3c20754..39720cd 100644 --- a/mini-lsm-book/src/week3-05-txn-occ.md +++ b/mini-lsm-book/src/week3-05-txn-occ.md @@ -1,10 +1,48 @@ # Transaction and Optimistic Concurrency Control +In this chapter, you will implement all interfaces of `Transaction`. Your implementation will maintain a private workspace for modifications inside a transaction, and commit them in batch, so that all modifications within the transaction will only be visible to the transaction itself until commit. + +To run test cases, + +``` +cargo x copy-test --week 3 --day 5 +cargo x scheck +``` + ## Task 1: Local Workspace + Put and Delete +In this task, you will need to modify: + +``` +src/txn.rs +``` + +You can now implement `put` and `delete` by inserting the corresponding key/value to the `local_storage`, which is a skiplist memtable without key timestamp. Note that for deletes, you will still need to implement it as inserting an empty value, instead of removing a value from the skiplist. + ## Task 2: Get and Scan +In this task, you will need to modify: + +``` +src/txn.rs +``` + +For `get`, you should first probe the local storage. If a value is found, return the value or `None` depending on whether it is a deletion marker. For `scan`, you will need to implement a `TxnLocalIterator` for the skiplist as in chapter 1.1 when you implement the iterator for a memtable without key timestamp. You will need to store a `TwoMergeIterator>` in the `TxnLocalIterator`. And, lastly, given that the `TwoMergeIterator` will retain the deletion markers in the child iterators, you will need to modify your `TxnIterator` implementation to correctly handle deletions. + ## Task 3: Commit +In this task, you will need to modify: + +``` +src/txn.rs +``` + +We assume that a transaction will only be used on a single thread. Once your transaction enters the commit phase, you should set `self.committed` to true, so that users cannot do any other operations on the transaction. You `put`, `delete`, `scan`, and `get` implementation should error if the transaction is already committed. + +Your commit implementation should simply collect all key-value pairs from the local storage and submit a write batch to the storage engine. + +## Test Your Understanding + +* With all the things we have implemented up to this point, does the system satisfy snapshot isolation? If not, what else do we need to do to support snapshot isolation? (Note: snapshot isolation is different from serializable snapshot isolation we will talk about in the next chapter) {{#include copyright.md}} diff --git a/mini-lsm-book/src/week3-06-serializable.md b/mini-lsm-book/src/week3-06-serializable.md index 975b50f..44e2e9d 100644 --- a/mini-lsm-book/src/week3-06-serializable.md +++ b/mini-lsm-book/src/week3-06-serializable.md @@ -1,8 +1,10 @@ -# Snapshot Isolation and Serializable Snapshot Isolation +# Serializable Snapshot Isolation -## Task 1: Snapshot Isolation: Detect Write-Write Conflict +Now, we are going to add a conflict detection algorithm at the transaction commit time, so as to make the engine serializable. -## Task 2: Serializable: Record Read Set and Write Set +## Task 1: Track Read Set in Get and Write Set + +## Task 2: Track Read Set in Scan ## Task 3: Serializable Verification @@ -14,6 +16,7 @@ We do not provide reference answers to the questions, and feel free to discuss a ## Bonus Tasks -* **Read-Only Transactions.** +* **Read-Only Transactions.** With serializable enabled, we will need to keep track of the read set for a transaction. +* **Precision/Predicate Locking.** The read set can be maintained using a range instead of a single key. This would be useful when a user scans the full key space. {{#include copyright.md}} diff --git a/mini-lsm-mvcc/src/mvcc/txn.rs b/mini-lsm-mvcc/src/mvcc/txn.rs index cf62412..70fb151 100644 --- a/mini-lsm-mvcc/src/mvcc/txn.rs +++ b/mini-lsm-mvcc/src/mvcc/txn.rs @@ -7,7 +7,7 @@ use std::{ }, }; -use anyhow::{bail, Result}; +use anyhow::Result; use bytes::Bytes; use crossbeam_skiplist::{map::Entry, SkipMap}; use ouroboros::self_referencing; @@ -20,8 +20,6 @@ use crate::{ mem_table::map_bound, }; -use super::CommittedTxnData; - pub struct Transaction { pub(crate) read_ts: u64, pub(crate) inner: Arc, @@ -37,7 +35,11 @@ impl Transaction { panic!("cannot operate on committed txn!"); } if let Some(entry) = self.local_storage.get(key) { - return Ok(Some(entry.value().clone())); + if entry.value().is_empty() { + return Ok(None); + } else { + return Ok(Some(entry.value().clone())); + } } self.inner.get_with_ts(key, self.read_ts) } @@ -94,20 +96,6 @@ impl Transaction { self.committed .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) .expect("cannot operate on committed txn!"); - if let Some(guard) = &self.key_hashes { - let guard = guard.lock(); - let (write_set, read_set) = &*guard; - if !write_set.is_empty() { - let committed_txns = self.inner.mvcc().committed_txns.lock(); - for (_, txn_data) in committed_txns.range(self.read_ts..) { - for key_hash in read_set { - if txn_data.key_hashes.contains(key_hash) { - bail!("serializable check failed"); - } - } - } - } - } let batch = self .local_storage .iter() @@ -119,32 +107,7 @@ impl Transaction { } }) .collect::>(); - let ts = self.inner.write_batch(&batch)?; - { - let mut committed_txns = self.inner.mvcc().committed_txns.lock(); - let mut key_hashes = self.key_hashes.as_ref().unwrap().lock(); - let (write_set, _) = &mut *key_hashes; - - let old_data = committed_txns.insert( - ts, - CommittedTxnData { - key_hashes: std::mem::take(write_set), - read_ts: self.read_ts, - commit_ts: ts, - }, - ); - assert!(old_data.is_none()); - - // remove unneeded txn data - let watermark = self.inner.mvcc().watermark(); - while let Some(entry) = committed_txns.first_entry() { - if *entry.key() < watermark { - entry.remove(); - } else { - break; - } - } - } + self.inner.write_batch(&batch)?; Ok(()) } }