fix: handle the exclude boundary logic of the memory table (#140)

* fix: handle the exclude boundary logic of the memory table * add comments Signed-off-by: Alex Chi <iskyzh@gmail.com> --------- Signed-off-by: Alex Chi <iskyzh@gmail.com> Co-authored-by: Alex Chi <iskyzh@gmail.com>
2025-05-05 18:08:21 +08:00
parent 0fbb32ecca
commit 6fba57ac4d
6 changed files with 95 additions and 19 deletions
--- a/mini-lsm-mvcc/src/lsm_storage.rs
+++ b/mini-lsm-mvcc/src/lsm_storage.rs
@@ -805,15 +805,10 @@ impl LsmStorageInner {
        }; // drop global lock here

        let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1);
-        memtable_iters.push(Box::new(snapshot.memtable.scan(
-            map_key_bound_plus_ts(lower, key::TS_RANGE_BEGIN),
-            map_key_bound_plus_ts(upper, key::TS_RANGE_END),
-        )));
+        let (begin, end) = map_key_bound_plus_ts(lower, upper, read_ts);
+        memtable_iters.push(Box::new(snapshot.memtable.scan(begin, end)));
        for memtable in snapshot.imm_memtables.iter() {
-            memtable_iters.push(Box::new(memtable.scan(
-                map_key_bound_plus_ts(lower, key::TS_RANGE_BEGIN),
-                map_key_bound_plus_ts(upper, key::TS_RANGE_END),
-            )));
+            memtable_iters.push(Box::new(memtable.scan(begin, end)));
        }
        let memtable_iter = MergeIterator::create(memtable_iters);

@@ -836,6 +831,8 @@ impl LsmStorageInner {
                            table,
                            KeySlice::from_slice(key, key::TS_RANGE_BEGIN),
                        )?;
+                        // TODO: we can implement `key.next()` so that we can directly seek to the
+                        // right place in the previous line.
                        while iter.is_valid() && iter.key().key_ref() == key {
                            iter.next()?;
                        }
--- a/mini-lsm-mvcc/src/mem_table.rs
+++ b/mini-lsm-mvcc/src/mem_table.rs
@@ -24,7 +24,7 @@ use crossbeam_skiplist::map::Entry;
 use ouroboros::self_referencing;

 use crate::iterators::StorageIterator;
-use crate::key::{KeyBytes, KeySlice, TS_DEFAULT};
+use crate::key::{KeyBytes, KeySlice, TS_DEFAULT, TS_RANGE_BEGIN, TS_RANGE_END};
 use crate::table::SsTableBuilder;
 use crate::wal::Wal;

@@ -63,13 +63,29 @@ pub(crate) fn map_key_bound(bound: Bound<KeySlice>) -> Bound<KeyBytes> {
    }
 }

-/// Create a bound of `Bytes` from a bound of `KeySlice`.
-pub(crate) fn map_key_bound_plus_ts(bound: Bound<&[u8]>, ts: u64) -> Bound<KeySlice> {
-    match bound {
-        Bound::Included(x) => Bound::Included(KeySlice::from_slice(x, ts)),
-        Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, ts)),
-        Bound::Unbounded => Bound::Unbounded,
-    }
+/// Create a bound of `KeySlice` from a bound of `&[u8]`.
+pub(crate) fn map_key_bound_plus_ts<'a>(
+    lower: Bound<&'a [u8]>,
+    upper: Bound<&'a [u8]>,
+    ts: u64,
+) -> (Bound<KeySlice<'a>>, Bound<KeySlice<'a>>) {
+    (
+        match lower {
+            Bound::Included(x) => Bound::Included(KeySlice::from_slice(x, ts)),
+            Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, TS_RANGE_END)),
+            Bound::Unbounded => Bound::Unbounded,
+        },
+        match upper {
+            Bound::Included(x) => {
+                // Note that we order the ts descending, but for a MVCC scan, we need all the history
+                // so that we can access the latest key in case it is not updated in the current ts.
+                // Therefore, we need to scan all the way to ts 0.
+                Bound::Included(KeySlice::from_slice(x, TS_RANGE_END))
+            }
+            Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, TS_RANGE_BEGIN)),
+            Bound::Unbounded => Bound::Unbounded,
+        },
+    )
 }

 impl MemTable {
@@ -127,8 +143,8 @@ impl MemTable {
        upper: Bound<&[u8]>,
    ) -> MemTableIterator {
        self.scan(
-            map_key_bound_plus_ts(lower, TS_DEFAULT),
-            map_key_bound_plus_ts(upper, TS_DEFAULT),
+            lower.map(|x| KeySlice::from_slice(x, TS_DEFAULT)),
+            upper.map(|x| KeySlice::from_slice(x, TS_DEFAULT)),
        )
    }

--- a/mini-lsm-mvcc/src/tests/week3_day3.rs
+++ b/mini-lsm-mvcc/src/tests/week3_day3.rs
@@ -49,6 +49,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("b"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot1
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b"))
+            .unwrap(),
+        vec![],
+    );
    assert_eq!(snapshot2.get(b"a").unwrap(), Some(Bytes::from_static(b"2")));
    assert_eq!(snapshot2.get(b"b").unwrap(), Some(Bytes::from_static(b"1")));
    assert_eq!(snapshot2.get(b"c").unwrap(), None);
@@ -59,6 +65,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("b"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot2
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b"))
+            .unwrap(),
+        vec![],
+    );
    assert_eq!(snapshot3.get(b"a").unwrap(), Some(Bytes::from_static(b"2")));
    assert_eq!(snapshot3.get(b"b").unwrap(), None);
    assert_eq!(snapshot3.get(b"c").unwrap(), Some(Bytes::from_static(b"1")));
@@ -69,6 +81,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("c"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot3
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c"))
+            .unwrap(),
+        vec![],
+    );
    storage
        .inner
        .force_freeze_memtable(&storage.inner.state_lock.lock())
@@ -91,6 +109,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("b"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot1
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b"))
+            .unwrap(),
+        vec![],
+    );
    assert_eq!(snapshot2.get(b"a").unwrap(), Some(Bytes::from_static(b"2")));
    assert_eq!(snapshot2.get(b"b").unwrap(), Some(Bytes::from_static(b"1")));
    assert_eq!(snapshot2.get(b"c").unwrap(), None);
@@ -101,6 +125,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("b"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot2
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"b"))
+            .unwrap(),
+        vec![],
+    );
    assert_eq!(snapshot3.get(b"a").unwrap(), Some(Bytes::from_static(b"2")));
    assert_eq!(snapshot3.get(b"b").unwrap(), None);
    assert_eq!(snapshot3.get(b"c").unwrap(), Some(Bytes::from_static(b"1")));
@@ -111,6 +141,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("c"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot3
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c"))
+            .unwrap(),
+        vec![],
+    );
    assert_eq!(snapshot4.get(b"a").unwrap(), Some(Bytes::from_static(b"3")));
    assert_eq!(snapshot4.get(b"b").unwrap(), Some(Bytes::from_static(b"3")));
    assert_eq!(snapshot4.get(b"c").unwrap(), Some(Bytes::from_static(b"1")));
@@ -122,6 +158,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("c"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot4
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c"))
+            .unwrap(),
+        vec![(Bytes::from("b"), Bytes::from("3"))],
+    );
    assert_eq!(snapshot5.get(b"a").unwrap(), Some(Bytes::from_static(b"4")));
    assert_eq!(snapshot5.get(b"b").unwrap(), Some(Bytes::from_static(b"3")));
    assert_eq!(snapshot5.get(b"c").unwrap(), Some(Bytes::from_static(b"1")));
@@ -133,6 +175,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("c"), Bytes::from("1")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot5
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c"))
+            .unwrap(),
+        vec![(Bytes::from("b"), Bytes::from("3"))],
+    );
    assert_eq!(snapshot6.get(b"a").unwrap(), Some(Bytes::from_static(b"4")));
    assert_eq!(snapshot6.get(b"b").unwrap(), None);
    assert_eq!(snapshot6.get(b"c").unwrap(), Some(Bytes::from_static(b"5")));
@@ -143,6 +191,12 @@ fn test_task2_memtable_mvcc() {
            (Bytes::from("c"), Bytes::from("5")),
        ],
    );
+    check_lsm_iter_result_by_key(
+        &mut snapshot6
+            .scan(Bound::Excluded(b"a"), Bound::Excluded(b"c"))
+            .unwrap(),
+        vec![],
+    );
 }

 #[test]
--- a/mini-lsm-starter/src/mem_table.rs
+++ b/mini-lsm-starter/src/mem_table.rs
@@ -79,6 +79,9 @@ impl MemTable {
        lower: Bound<&[u8]>,
        upper: Bound<&[u8]>,
    ) -> MemTableIterator {
+        // This function is only used in week 1 tests, so during the week 3 key-ts refactor, you do
+        // not need to consider the bound exclude/include logic. Simply provide `DEFAULT_TS` as the
+        // timestamp for the key-ts pair.
        self.scan(lower, upper)
    }

--- a/mini-lsm/src/mem_table.rs
+++ b/mini-lsm/src/mem_table.rs
@@ -93,6 +93,9 @@ impl MemTable {
        lower: Bound<&[u8]>,
        upper: Bound<&[u8]>,
    ) -> MemTableIterator {
+        // This function is only used in week 1 tests, so during the week 3 key-ts refactor, you do
+        // not need to consider the bound exclude/include logic. Simply provide `DEFAULT_TS` as the
+        // timestamp for the key-ts pair.
        self.scan(lower, upper)
    }

--- a/mini-lsm/src/tests/harness.rs
+++ b/mini-lsm/src/tests/harness.rs
@@ -127,7 +127,10 @@ where
        );
        iter.next().unwrap();
    }
-    assert!(!iter.is_valid());
+    assert!(
+        !iter.is_valid(),
+        "iterator should not be valid at the end of the check"
+    );
 }

 pub fn check_iter_result_by_key_and_ts<I>(iter: &mut I, expected: Vec<((Bytes, u64), Bytes)>)