From c6e700e2ea2b006728e08ad4842c2d7b70d3fe04 Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Sun, 21 Jan 2024 12:03:40 +0800 Subject: [PATCH] pitfall on merge iterator Signed-off-by: Alex Chi --- mini-lsm-book/src/week1-02-merge-iterator.md | 10 ++++++++++ mini-lsm-book/src/week2-02-simple.md | 3 ++- mini-lsm/src/tests/day2.rs | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/mini-lsm-book/src/week1-02-merge-iterator.md b/mini-lsm-book/src/week1-02-merge-iterator.md index 54d51a8..f39973c 100644 --- a/mini-lsm-book/src/week1-02-merge-iterator.md +++ b/mini-lsm-book/src/week1-02-merge-iterator.md @@ -76,6 +76,16 @@ a->1, b->del, c->4, d->5, e->4 The constructor of the merge iterator takes a vector of iterators. We assume the one with a lower index (i.e., the first one) has the latest data. +One common pitfall is on error handling. For example, + +```rust +let Some(mut inner_iter) = self.iters.peek_mut() { + inner_iter.next()?; // <- will cause problem +} +``` + +If `next` returns an error (i.e., due to disk failure, network failure, checksum error, etc.), it is no longer valid. However, when we go out of the if condition and return the error to the caller, `PeekMut`'s drop will try move the element within the heap, which causes an access to an invalid iterator. Therefore, you will need to do all error handling by yourself instead of using `?` within the scope of `PeekMut`. + We want to avoid dynamic dispatch as much as possible, and therefore we do not use `Box` in the system. Instead, we prefer static dispatch using generics. ## Task 3: LSM Iterator + Fused Iterator diff --git a/mini-lsm-book/src/week2-02-simple.md b/mini-lsm-book/src/week2-02-simple.md index d5a848b..9a05f7c 100644 --- a/mini-lsm-book/src/week2-02-simple.md +++ b/mini-lsm-book/src/week2-02-simple.md @@ -9,7 +9,8 @@ In this chapter, you will: ## Test Your Understanding -* (I know this is stupid but) could you please repeat the definition of read/write/space amplifications? What are the ways to accurately compute them, and what are the ways to estimate them? +* (I know this is stupid but) could you please repeat the definition of read/write/space amplifications? +* What are the ways to accurately compute the read/write/space amplifications, and what are the ways to estimate them? * Is it correct that a key will only be purged from the LSM tree if the user requests to delete it and it has been compacted in the bottom-most level? * Is it a good strategy to periodically do a full compaction on the LSM tree? Why or why not? * Actively choosing some old files/levels to compact even if they do not violate the level amplifier would be a good choice, is it true? (Look at the Lethe paper!) diff --git a/mini-lsm/src/tests/day2.rs b/mini-lsm/src/tests/day2.rs index 1d0f72c..7aecc91 100644 --- a/mini-lsm/src/tests/day2.rs +++ b/mini-lsm/src/tests/day2.rs @@ -120,6 +120,7 @@ fn test_task2_merge_1() { (Bytes::from("a"), Bytes::from("1.1")), (Bytes::from("b"), Bytes::from("2.1")), (Bytes::from("c"), Bytes::from("3.1")), + (Bytes::from("e"), Bytes::new()), ]); let i2 = MockIterator::new(vec![ (Bytes::from("a"), Bytes::from("1.2")), @@ -146,6 +147,7 @@ fn test_task2_merge_1() { (Bytes::from("b"), Bytes::from("2.1")), (Bytes::from("c"), Bytes::from("3.1")), (Bytes::from("d"), Bytes::from("4.2")), + (Bytes::from("e"), Bytes::new()), ], ); @@ -158,6 +160,7 @@ fn test_task2_merge_1() { (Bytes::from("b"), Bytes::from("2.3")), (Bytes::from("c"), Bytes::from("3.3")), (Bytes::from("d"), Bytes::from("4.3")), + (Bytes::from("e"), Bytes::new()), ], ); }