From 971d0b1c812fbab11ffe18381bf1b10d9a39f5db Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Thu, 25 Jan 2024 12:27:16 +0800 Subject: [PATCH] make mvcc impl compile with new key module Signed-off-by: Alex Chi --- .cargo/config.toml | 1 - mini-lsm-mvcc/src/block/builder.rs | 12 +++--- mini-lsm-mvcc/src/block/iterator.rs | 15 ++++--- mini-lsm-mvcc/src/key.rs | 62 +++++++++++++++++------------ mini-lsm-mvcc/src/lsm_iterator.rs | 6 +-- mini-lsm-mvcc/src/lsm_storage.rs | 16 ++++---- mini-lsm-mvcc/src/table.rs | 20 ++++++---- mini-lsm-mvcc/src/table/builder.rs | 2 +- mini-lsm-starter/src/key.rs | 2 + mini-lsm/src/key.rs | 2 + mini-lsm/src/tests/week1_day7.rs | 20 +++++++--- 11 files changed, 96 insertions(+), 62 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index f43e1c4..776c9c9 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,4 +1,3 @@ [alias] xtask = "run --package mini-lsm-xtask --" x = "run --package mini-lsm-xtask --" -test = "nextest run" diff --git a/mini-lsm-mvcc/src/block/builder.rs b/mini-lsm-mvcc/src/block/builder.rs index 4d28a75..f41eee9 100644 --- a/mini-lsm-mvcc/src/block/builder.rs +++ b/mini-lsm-mvcc/src/block/builder.rs @@ -19,10 +19,10 @@ pub struct BlockBuilder { fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize { let mut i = 0; loop { - if i >= first_key.len() || i >= key.len() { + if i >= first_key.key_len() || i >= key.key_len() { break; } - if first_key.raw_ref()[i] != key.raw_ref()[i] { + if first_key.key_ref()[i] != key.key_ref()[i] { break; } i += 1; @@ -50,7 +50,7 @@ impl BlockBuilder { #[must_use] pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool { assert!(!key.is_empty(), "key must not be empty"); - if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size + if self.estimated_size() + key.raw_len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size && !self.is_empty() { return false; @@ -61,9 +61,11 @@ impl BlockBuilder { // Encode key overlap. self.data.put_u16(overlap as u16); // Encode key length. - self.data.put_u16((key.len() - overlap) as u16); + self.data.put_u16((key.key_len() - overlap) as u16); // Encode key content. - self.data.put(&key.raw_ref()[overlap..]); + self.data.put(&key.key_ref()[overlap..]); + // Encode key ts + self.data.put_u64(key.ts()); // Encode value length. self.data.put_u16(value.len() as u16); // Encode value content. diff --git a/mini-lsm-mvcc/src/block/iterator.rs b/mini-lsm-mvcc/src/block/iterator.rs index 65b06af..5ce0b3a 100644 --- a/mini-lsm-mvcc/src/block/iterator.rs +++ b/mini-lsm-mvcc/src/block/iterator.rs @@ -27,9 +27,10 @@ impl Block { fn get_first_key(&self) -> KeyVec { let mut buf = &self.data[..]; buf.get_u16(); - let key_len = buf.get_u16(); - let key = &buf[..key_len as usize]; - KeyVec::from_vec(key.to_vec()) + let key_len = buf.get_u16() as usize; + let key = &buf[..key_len]; + buf.advance(key_len); + KeyVec::from_vec_with_ts(key.to_vec(), buf.get_u64()) } } @@ -108,11 +109,15 @@ impl BlockIterator { let key_len = entry.get_u16() as usize; let key = &entry[..key_len]; self.key.clear(); - self.key.append(&self.first_key.raw_ref()[..overlap_len]); + self.key.append(&self.first_key.key_ref()[..overlap_len]); self.key.append(key); entry.advance(key_len); + let ts = entry.get_u64(); + self.key.set_ts(ts); let value_len = entry.get_u16() as usize; - let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16; + // REMEMBER TO CHANGE THIS every time you change the encoding! + let value_offset_begin = + offset + SIZEOF_U16 + SIZEOF_U16 + std::mem::size_of::() + key_len + SIZEOF_U16; let value_offset_end = value_offset_begin + value_len; self.value_range = (value_offset_begin, value_offset_end); entry.advance(value_len); diff --git a/mini-lsm-mvcc/src/key.rs b/mini-lsm-mvcc/src/key.rs index 34922fa..49b2e99 100644 --- a/mini-lsm-mvcc/src/key.rs +++ b/mini-lsm-mvcc/src/key.rs @@ -8,8 +8,11 @@ pub type KeySlice<'a> = Key<&'a [u8]>; pub type KeyVec = Key>; pub type KeyBytes = Key; +/// For testing purpose, should not use anywhere in your implementation. +pub const TS_ENABLED: bool = true; + /// Temporary, should remove after implementing full week 3 day 1 + 2. -pub const TS_DEFAULT: u64 = std::u64::MAX; +pub const TS_DEFAULT: u64 = 0; pub const TS_MAX: u64 = std::u64::MAX; pub const TS_MIN: u64 = std::u64::MIN; @@ -21,10 +24,14 @@ impl> Key { self.0 } - pub fn len(&self) -> usize { + pub fn key_len(&self) -> usize { self.0.as_ref().len() } + pub fn raw_len(&self) -> usize { + self.0.as_ref().len() + std::mem::size_of::() + } + pub fn is_empty(&self) -> bool { self.0.as_ref().is_empty() } @@ -32,14 +39,10 @@ impl> Key { impl Key> { pub fn new() -> Self { - Self(Vec::new(), 0) + Self(Vec::new(), TS_DEFAULT) } - pub fn from_vec(key: Vec) -> Self { - unimplemented!() - } - - /// Create a `KeyVec` from a `Vec`. Will be removed in week 3. + /// Create a `KeyVec` from a `Vec` and a ts. Will be removed in week 3. pub fn from_vec_with_ts(key: Vec, ts: u64) -> Self { Self(key, ts) } @@ -54,10 +57,15 @@ impl Key> { self.0.extend(data) } - /// Set the key from a slice without re-allocating. The signature will change in week 3. + pub fn set_ts(&mut self, ts: u64) { + self.1 = ts; + } + + /// Set the key from a slice without re-allocating. pub fn set_from_slice(&mut self, key_slice: KeySlice) { self.0.clear(); self.0.extend(key_slice.0); + self.1 = key_slice.1; } pub fn as_key_slice(&self) -> KeySlice { @@ -68,17 +76,20 @@ impl Key> { Key(self.0.into(), self.1) } - /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. - pub fn raw_ref(&self) -> &[u8] { + pub fn key_ref(&self) -> &[u8] { self.0.as_ref() } + pub fn ts(&self) -> u64 { + self.1 + } + pub fn for_testing_key_ref(&self) -> &[u8] { self.0.as_ref() } pub fn for_testing_from_vec_no_ts(key: Vec) -> Self { - Self(key, 0) + Self(key, TS_DEFAULT) } } @@ -87,23 +98,21 @@ impl Key { Key(&self.0, self.1) } - /// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3. - pub fn from_bytes(bytes: Bytes) -> KeyBytes { - unimplemented!() - } - - /// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3. + /// Create a `KeyBytes` from a `Bytes` and a ts. pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes { Key(bytes, ts) } - /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. - pub fn raw_ref(&self) -> &[u8] { + pub fn key_ref(&self) -> &[u8] { self.0.as_ref() } + pub fn ts(&self) -> u64 { + self.1 + } + pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes { - Key(bytes, 0) + Key(bytes, TS_DEFAULT) } pub fn for_testing_key_ref(&self) -> &[u8] { @@ -121,17 +130,20 @@ impl<'a> Key<&'a [u8]> { Self(slice, ts) } - /// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. - pub fn raw_ref(self) -> &'a [u8] { + pub fn key_ref(self) -> &'a [u8] { self.0 } + pub fn ts(&self) -> u64 { + self.1 + } + pub fn for_testing_key_ref(self) -> &'a [u8] { self.0 } pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self { - Self(slice, 0) + Self(slice, TS_DEFAULT) } } @@ -143,7 +155,7 @@ impl + Debug> Debug for Key { impl + Default> Default for Key { fn default() -> Self { - Self(T::default(), 0) + Self(T::default(), TS_DEFAULT) } } diff --git a/mini-lsm-mvcc/src/lsm_iterator.rs b/mini-lsm-mvcc/src/lsm_iterator.rs index 044769c..6b84147 100644 --- a/mini-lsm-mvcc/src/lsm_iterator.rs +++ b/mini-lsm-mvcc/src/lsm_iterator.rs @@ -41,8 +41,8 @@ impl LsmIterator { } match self.end_bound.as_ref() { Bound::Unbounded => {} - Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(), - Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(), + Bound::Included(key) => self.is_valid = self.inner.key().key_ref() <= key.as_ref(), + Bound::Excluded(key) => self.is_valid = self.inner.key().key_ref() < key.as_ref(), } Ok(()) } @@ -63,7 +63,7 @@ impl StorageIterator for LsmIterator { } fn key(&self) -> &[u8] { - self.inner.key().raw_ref() + self.inner.key().key_ref() } fn value(&self) -> &[u8] { diff --git a/mini-lsm-mvcc/src/lsm_storage.rs b/mini-lsm-mvcc/src/lsm_storage.rs index 10218f2..1bc5320 100644 --- a/mini-lsm-mvcc/src/lsm_storage.rs +++ b/mini-lsm-mvcc/src/lsm_storage.rs @@ -103,19 +103,19 @@ fn range_overlap( table_end: KeySlice, ) -> bool { match user_end { - Bound::Excluded(key) if key <= table_begin.raw_ref() => { + Bound::Excluded(key) if key <= table_begin.key_ref() => { return false; } - Bound::Included(key) if key < table_begin.raw_ref() => { + Bound::Included(key) if key < table_begin.key_ref() => { return false; } _ => {} } match user_begin { - Bound::Excluded(key) if key >= table_end.raw_ref() => { + Bound::Excluded(key) if key >= table_end.key_ref() => { return false; } - Bound::Included(key) if key > table_end.raw_ref() => { + Bound::Included(key) if key > table_end.key_ref() => { return false; } _ => {} @@ -124,7 +124,7 @@ fn range_overlap( } fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool { - table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref() + table_begin.key_ref() <= user_key && user_key <= table_end.key_ref() } /// The storage interface of the LSM tree. @@ -470,7 +470,7 @@ impl LsmStorageInner { let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?; - if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() { + if iter.is_valid() && iter.key().key_ref() == key && !iter.value().is_empty() { return Ok(Some(Bytes::copy_from_slice(iter.value()))); } Ok(None) @@ -678,7 +678,7 @@ impl LsmStorageInner { table, KeySlice::from_slice(key, key::TS_DEFAULT), )?; - if iter.is_valid() && iter.key().raw_ref() == key { + if iter.is_valid() && iter.key().key_ref() == key { iter.next()?; } iter @@ -716,7 +716,7 @@ impl LsmStorageInner { level_ssts, KeySlice::from_slice(key, key::TS_DEFAULT), )?; - if iter.is_valid() && iter.key().raw_ref() == key { + if iter.is_valid() && iter.key().key_ref() == key { iter.next()?; } iter diff --git a/mini-lsm-mvcc/src/table.rs b/mini-lsm-mvcc/src/table.rs index 99fa818..1dfda93 100644 --- a/mini-lsm-mvcc/src/table.rs +++ b/mini-lsm-mvcc/src/table.rs @@ -37,11 +37,11 @@ impl BlockMeta { // The size of key length estimated_size += std::mem::size_of::(); // The size of actual key - estimated_size += meta.first_key.len(); + estimated_size += meta.first_key.raw_len(); // The size of key length estimated_size += std::mem::size_of::(); // The size of actual key - estimated_size += meta.last_key.len(); + estimated_size += meta.last_key.raw_len(); } // Reserve the space to improve performance, especially when the size of incoming data is // large @@ -49,10 +49,12 @@ impl BlockMeta { let original_len = buf.len(); for meta in block_meta { buf.put_u32(meta.offset as u32); - buf.put_u16(meta.first_key.len() as u16); - buf.put_slice(meta.first_key.raw_ref()); - buf.put_u16(meta.last_key.len() as u16); - buf.put_slice(meta.last_key.raw_ref()); + buf.put_u16(meta.first_key.key_len() as u16); + buf.put_slice(meta.first_key.key_ref()); + buf.put_u64(meta.first_key.ts()); + buf.put_u16(meta.last_key.key_len() as u16); + buf.put_slice(meta.last_key.key_ref()); + buf.put_u64(meta.last_key.ts()); } assert_eq!(estimated_size, buf.len() - original_len); } @@ -63,9 +65,11 @@ impl BlockMeta { while buf.has_remaining() { let offset = buf.get_u32() as usize; let first_key_len = buf.get_u16() as usize; - let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len)); + let first_key = + KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(first_key_len), buf.get_u64()); let last_key_len: usize = buf.get_u16() as usize; - let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len)); + let last_key = + KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(last_key_len), buf.get_u64()); block_meta.push(BlockMeta { offset, first_key, diff --git a/mini-lsm-mvcc/src/table/builder.rs b/mini-lsm-mvcc/src/table/builder.rs index a8753d1..a25bcb4 100644 --- a/mini-lsm-mvcc/src/table/builder.rs +++ b/mini-lsm-mvcc/src/table/builder.rs @@ -41,7 +41,7 @@ impl SsTableBuilder { self.first_key.set_from_slice(key); } - self.key_hashes.push(farmhash::fingerprint32(key.raw_ref())); + self.key_hashes.push(farmhash::fingerprint32(key.key_ref())); if self.builder.add(key, value) { self.last_key.set_from_slice(key); diff --git a/mini-lsm-starter/src/key.rs b/mini-lsm-starter/src/key.rs index edf6342..00898b1 100644 --- a/mini-lsm-starter/src/key.rs +++ b/mini-lsm-starter/src/key.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; use bytes::Bytes; +pub const TS_ENABLED: bool = false; + pub struct Key>(T); pub type KeySlice<'a> = Key<&'a [u8]>; diff --git a/mini-lsm/src/key.rs b/mini-lsm/src/key.rs index edf6342..00898b1 100644 --- a/mini-lsm/src/key.rs +++ b/mini-lsm/src/key.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; use bytes::Bytes; +pub const TS_ENABLED: bool = false; + pub struct Key>(T); pub type KeySlice<'a> = Key<&'a [u8]>; diff --git a/mini-lsm/src/tests/week1_day7.rs b/mini-lsm/src/tests/week1_day7.rs index ef29b6f..d510a57 100644 --- a/mini-lsm/src/tests/week1_day7.rs +++ b/mini-lsm/src/tests/week1_day7.rs @@ -1,7 +1,7 @@ use tempfile::tempdir; use crate::{ - key::KeySlice, + key::{KeySlice, TS_ENABLED}, table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder}, }; @@ -75,9 +75,17 @@ fn test_task3_block_key_compression() { let dir = tempdir().unwrap(); let path = dir.path().join("1.sst"); let sst = builder.build_for_test(path).unwrap(); - assert!( - sst.block_meta.len() <= 25, - "you have {} blocks, expect 25", - sst.block_meta.len() - ); + if TS_ENABLED { + assert!( + sst.block_meta.len() <= 34, + "you have {} blocks, expect 34", + sst.block_meta.len() + ); + } else { + assert!( + sst.block_meta.len() <= 25, + "you have {} blocks, expect 25", + sst.block_meta.len() + ); + } }