From 6fba57ac4dc728f7b4208662bdce555394b0fcdd Mon Sep 17 00:00:00 2001 From: Ligh0x74 <66345528+Ligh0x74@users.noreply.github.com> Date: Mon, 5 May 2025 18:08:21 +0800 Subject: [PATCH] fix: handle the exclude boundary logic of the memory table (#140) * fix: handle the exclude boundary logic of the memory table * add comments Signed-off-by: Alex Chi --------- Signed-off-by: Alex Chi Co-authored-by: Alex Chi --- mini-lsm-mvcc/src/lsm_storage.rs | 13 +++---- mini-lsm-mvcc/src/mem_table.rs | 36 +++++++++++++----- mini-lsm-mvcc/src/tests/week3_day3.rs | 54 +++++++++++++++++++++++++++ mini-lsm-starter/src/mem_table.rs | 3 ++ mini-lsm/src/mem_table.rs | 3 ++ mini-lsm/src/tests/harness.rs | 5 ++- 6 files changed, 95 insertions(+), 19 deletions(-) diff --git a/mini-lsm-mvcc/src/lsm_storage.rs b/mini-lsm-mvcc/src/lsm_storage.rs index ae181cb..67a82e3 100644 --- a/mini-lsm-mvcc/src/lsm_storage.rs +++ b/mini-lsm-mvcc/src/lsm_storage.rs @@ -805,15 +805,10 @@ impl LsmStorageInner { }; // drop global lock here let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1); - memtable_iters.push(Box::new(snapshot.memtable.scan( - map_key_bound_plus_ts(lower, key::TS_RANGE_BEGIN), - map_key_bound_plus_ts(upper, key::TS_RANGE_END), - ))); + let (begin, end) = map_key_bound_plus_ts(lower, upper, read_ts); + memtable_iters.push(Box::new(snapshot.memtable.scan(begin, end))); for memtable in snapshot.imm_memtables.iter() { - memtable_iters.push(Box::new(memtable.scan( - map_key_bound_plus_ts(lower, key::TS_RANGE_BEGIN), - map_key_bound_plus_ts(upper, key::TS_RANGE_END), - ))); + memtable_iters.push(Box::new(memtable.scan(begin, end))); } let memtable_iter = MergeIterator::create(memtable_iters); @@ -836,6 +831,8 @@ impl LsmStorageInner { table, KeySlice::from_slice(key, key::TS_RANGE_BEGIN), )?; + // TODO: we can implement `key.next()` so that we can directly seek to the + // right place in the previous line. while iter.is_valid() && iter.key().key_ref() == key { iter.next()?; } diff --git a/mini-lsm-mvcc/src/mem_table.rs b/mini-lsm-mvcc/src/mem_table.rs index 5a5102b..e03e2ee 100644 --- a/mini-lsm-mvcc/src/mem_table.rs +++ b/mini-lsm-mvcc/src/mem_table.rs @@ -24,7 +24,7 @@ use crossbeam_skiplist::map::Entry; use ouroboros::self_referencing; use crate::iterators::StorageIterator; -use crate::key::{KeyBytes, KeySlice, TS_DEFAULT}; +use crate::key::{KeyBytes, KeySlice, TS_DEFAULT, TS_RANGE_BEGIN, TS_RANGE_END}; use crate::table::SsTableBuilder; use crate::wal::Wal; @@ -63,13 +63,29 @@ pub(crate) fn map_key_bound(bound: Bound) -> Bound { } } -/// Create a bound of `Bytes` from a bound of `KeySlice`. -pub(crate) fn map_key_bound_plus_ts(bound: Bound<&[u8]>, ts: u64) -> Bound { - match bound { - Bound::Included(x) => Bound::Included(KeySlice::from_slice(x, ts)), - Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, ts)), - Bound::Unbounded => Bound::Unbounded, - } +/// Create a bound of `KeySlice` from a bound of `&[u8]`. +pub(crate) fn map_key_bound_plus_ts<'a>( + lower: Bound<&'a [u8]>, + upper: Bound<&'a [u8]>, + ts: u64, +) -> (Bound>, Bound>) { + ( + match lower { + Bound::Included(x) => Bound::Included(KeySlice::from_slice(x, ts)), + Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, TS_RANGE_END)), + Bound::Unbounded => Bound::Unbounded, + }, + match upper { + Bound::Included(x) => { + // Note that we order the ts descending, but for a MVCC scan, we need all the history + // so that we can access the latest key in case it is not updated in the current ts. + // Therefore, we need to scan all the way to ts 0. + Bound::Included(KeySlice::from_slice(x, TS_RANGE_END)) + } + Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, TS_RANGE_BEGIN)), + Bound::Unbounded => Bound::Unbounded, + }, + ) } impl MemTable { @@ -127,8 +143,8 @@ impl MemTable { upper: Bound<&[u8]>, ) -> MemTableIterator { self.scan( - map_key_bound_plus_ts(lower, TS_DEFAULT), - map_key_bound_plus_ts(upper, TS_DEFAULT), + lower.map(|x| KeySlice::from_slice(x, TS_DEFAULT)), + upper.map(|x| KeySlice::from_slice(x, TS_DEFAULT)), ) } diff --git a/mini-lsm-mvcc/src/tests/week3_day3.rs b/mini-lsm-mvcc/src/tests/week3_day3.rs index 23ec980..8c72d13 100644 --- a/mini-lsm-mvcc/src/tests/week3_day3.rs +++ b/mini-lsm-mvcc/src/tests/week3_day3.rs @@ -49,6 +49,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("b"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot1 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b")) + .unwrap(), + vec![], + ); assert_eq!(snapshot2.get(b"a").unwrap(), Some(Bytes::from_static(b"2"))); assert_eq!(snapshot2.get(b"b").unwrap(), Some(Bytes::from_static(b"1"))); assert_eq!(snapshot2.get(b"c").unwrap(), None); @@ -59,6 +65,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("b"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot2 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b")) + .unwrap(), + vec![], + ); assert_eq!(snapshot3.get(b"a").unwrap(), Some(Bytes::from_static(b"2"))); assert_eq!(snapshot3.get(b"b").unwrap(), None); assert_eq!(snapshot3.get(b"c").unwrap(), Some(Bytes::from_static(b"1"))); @@ -69,6 +81,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("c"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot3 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c")) + .unwrap(), + vec![], + ); storage .inner .force_freeze_memtable(&storage.inner.state_lock.lock()) @@ -91,6 +109,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("b"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot1 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b")) + .unwrap(), + vec![], + ); assert_eq!(snapshot2.get(b"a").unwrap(), Some(Bytes::from_static(b"2"))); assert_eq!(snapshot2.get(b"b").unwrap(), Some(Bytes::from_static(b"1"))); assert_eq!(snapshot2.get(b"c").unwrap(), None); @@ -101,6 +125,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("b"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot2 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b")) + .unwrap(), + vec![], + ); assert_eq!(snapshot3.get(b"a").unwrap(), Some(Bytes::from_static(b"2"))); assert_eq!(snapshot3.get(b"b").unwrap(), None); assert_eq!(snapshot3.get(b"c").unwrap(), Some(Bytes::from_static(b"1"))); @@ -111,6 +141,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("c"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot3 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c")) + .unwrap(), + vec![], + ); assert_eq!(snapshot4.get(b"a").unwrap(), Some(Bytes::from_static(b"3"))); assert_eq!(snapshot4.get(b"b").unwrap(), Some(Bytes::from_static(b"3"))); assert_eq!(snapshot4.get(b"c").unwrap(), Some(Bytes::from_static(b"1"))); @@ -122,6 +158,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("c"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot4 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c")) + .unwrap(), + vec![(Bytes::from("b"), Bytes::from("3"))], + ); assert_eq!(snapshot5.get(b"a").unwrap(), Some(Bytes::from_static(b"4"))); assert_eq!(snapshot5.get(b"b").unwrap(), Some(Bytes::from_static(b"3"))); assert_eq!(snapshot5.get(b"c").unwrap(), Some(Bytes::from_static(b"1"))); @@ -133,6 +175,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("c"), Bytes::from("1")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot5 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c")) + .unwrap(), + vec![(Bytes::from("b"), Bytes::from("3"))], + ); assert_eq!(snapshot6.get(b"a").unwrap(), Some(Bytes::from_static(b"4"))); assert_eq!(snapshot6.get(b"b").unwrap(), None); assert_eq!(snapshot6.get(b"c").unwrap(), Some(Bytes::from_static(b"5"))); @@ -143,6 +191,12 @@ fn test_task2_memtable_mvcc() { (Bytes::from("c"), Bytes::from("5")), ], ); + check_lsm_iter_result_by_key( + &mut snapshot6 + .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c")) + .unwrap(), + vec![], + ); } #[test] diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index ab559f6..c854ad7 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -79,6 +79,9 @@ impl MemTable { lower: Bound<&[u8]>, upper: Bound<&[u8]>, ) -> MemTableIterator { + // This function is only used in week 1 tests, so during the week 3 key-ts refactor, you do + // not need to consider the bound exclude/include logic. Simply provide `DEFAULT_TS` as the + // timestamp for the key-ts pair. self.scan(lower, upper) } diff --git a/mini-lsm/src/mem_table.rs b/mini-lsm/src/mem_table.rs index 3cb52c8..74b72a7 100644 --- a/mini-lsm/src/mem_table.rs +++ b/mini-lsm/src/mem_table.rs @@ -93,6 +93,9 @@ impl MemTable { lower: Bound<&[u8]>, upper: Bound<&[u8]>, ) -> MemTableIterator { + // This function is only used in week 1 tests, so during the week 3 key-ts refactor, you do + // not need to consider the bound exclude/include logic. Simply provide `DEFAULT_TS` as the + // timestamp for the key-ts pair. self.scan(lower, upper) } diff --git a/mini-lsm/src/tests/harness.rs b/mini-lsm/src/tests/harness.rs index 6bcf70e..496bd28 100644 --- a/mini-lsm/src/tests/harness.rs +++ b/mini-lsm/src/tests/harness.rs @@ -127,7 +127,10 @@ where ); iter.next().unwrap(); } - assert!(!iter.is_valid()); + assert!( + !iter.is_valid(), + "iterator should not be valid at the end of the check" + ); } pub fn check_iter_result_by_key_and_ts(iter: &mut I, expected: Vec<((Bytes, u64), Bytes)>)