make mvcc impl compile with new key module

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2024-01-25 12:27:16 +08:00
committed by Alex Chi Z
parent 753e6d4f9e
commit 971d0b1c81
11 changed files with 96 additions and 62 deletions

View File

@@ -1,4 +1,3 @@
[alias] [alias]
xtask = "run --package mini-lsm-xtask --" xtask = "run --package mini-lsm-xtask --"
x = "run --package mini-lsm-xtask --" x = "run --package mini-lsm-xtask --"
test = "nextest run"

View File

@@ -19,10 +19,10 @@ pub struct BlockBuilder {
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize { fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
let mut i = 0; let mut i = 0;
loop { loop {
if i >= first_key.len() || i >= key.len() { if i >= first_key.key_len() || i >= key.key_len() {
break; break;
} }
if first_key.raw_ref()[i] != key.raw_ref()[i] { if first_key.key_ref()[i] != key.key_ref()[i] {
break; break;
} }
i += 1; i += 1;
@@ -50,7 +50,7 @@ impl BlockBuilder {
#[must_use] #[must_use]
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool { pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty"); assert!(!key.is_empty(), "key must not be empty");
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size if self.estimated_size() + key.raw_len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
&& !self.is_empty() && !self.is_empty()
{ {
return false; return false;
@@ -61,9 +61,11 @@ impl BlockBuilder {
// Encode key overlap. // Encode key overlap.
self.data.put_u16(overlap as u16); self.data.put_u16(overlap as u16);
// Encode key length. // Encode key length.
self.data.put_u16((key.len() - overlap) as u16); self.data.put_u16((key.key_len() - overlap) as u16);
// Encode key content. // Encode key content.
self.data.put(&key.raw_ref()[overlap..]); self.data.put(&key.key_ref()[overlap..]);
// Encode key ts
self.data.put_u64(key.ts());
// Encode value length. // Encode value length.
self.data.put_u16(value.len() as u16); self.data.put_u16(value.len() as u16);
// Encode value content. // Encode value content.

View File

@@ -27,9 +27,10 @@ impl Block {
fn get_first_key(&self) -> KeyVec { fn get_first_key(&self) -> KeyVec {
let mut buf = &self.data[..]; let mut buf = &self.data[..];
buf.get_u16(); buf.get_u16();
let key_len = buf.get_u16(); let key_len = buf.get_u16() as usize;
let key = &buf[..key_len as usize]; let key = &buf[..key_len];
KeyVec::from_vec(key.to_vec()) buf.advance(key_len);
KeyVec::from_vec_with_ts(key.to_vec(), buf.get_u64())
} }
} }
@@ -108,11 +109,15 @@ impl BlockIterator {
let key_len = entry.get_u16() as usize; let key_len = entry.get_u16() as usize;
let key = &entry[..key_len]; let key = &entry[..key_len];
self.key.clear(); self.key.clear();
self.key.append(&self.first_key.raw_ref()[..overlap_len]); self.key.append(&self.first_key.key_ref()[..overlap_len]);
self.key.append(key); self.key.append(key);
entry.advance(key_len); entry.advance(key_len);
let ts = entry.get_u64();
self.key.set_ts(ts);
let value_len = entry.get_u16() as usize; let value_len = entry.get_u16() as usize;
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16; // REMEMBER TO CHANGE THIS every time you change the encoding!
let value_offset_begin =
offset + SIZEOF_U16 + SIZEOF_U16 + std::mem::size_of::<u64>() + key_len + SIZEOF_U16;
let value_offset_end = value_offset_begin + value_len; let value_offset_end = value_offset_begin + value_len;
self.value_range = (value_offset_begin, value_offset_end); self.value_range = (value_offset_begin, value_offset_end);
entry.advance(value_len); entry.advance(value_len);

View File

@@ -8,8 +8,11 @@ pub type KeySlice<'a> = Key<&'a [u8]>;
pub type KeyVec = Key<Vec<u8>>; pub type KeyVec = Key<Vec<u8>>;
pub type KeyBytes = Key<Bytes>; pub type KeyBytes = Key<Bytes>;
/// For testing purpose, should not use anywhere in your implementation.
pub const TS_ENABLED: bool = true;
/// Temporary, should remove after implementing full week 3 day 1 + 2. /// Temporary, should remove after implementing full week 3 day 1 + 2.
pub const TS_DEFAULT: u64 = std::u64::MAX; pub const TS_DEFAULT: u64 = 0;
pub const TS_MAX: u64 = std::u64::MAX; pub const TS_MAX: u64 = std::u64::MAX;
pub const TS_MIN: u64 = std::u64::MIN; pub const TS_MIN: u64 = std::u64::MIN;
@@ -21,10 +24,14 @@ impl<T: AsRef<[u8]>> Key<T> {
self.0 self.0
} }
pub fn len(&self) -> usize { pub fn key_len(&self) -> usize {
self.0.as_ref().len() self.0.as_ref().len()
} }
pub fn raw_len(&self) -> usize {
self.0.as_ref().len() + std::mem::size_of::<u64>()
}
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.0.as_ref().is_empty() self.0.as_ref().is_empty()
} }
@@ -32,14 +39,10 @@ impl<T: AsRef<[u8]>> Key<T> {
impl Key<Vec<u8>> { impl Key<Vec<u8>> {
pub fn new() -> Self { pub fn new() -> Self {
Self(Vec::new(), 0) Self(Vec::new(), TS_DEFAULT)
} }
pub fn from_vec(key: Vec<u8>) -> Self { /// Create a `KeyVec` from a `Vec<u8>` and a ts. Will be removed in week 3.
unimplemented!()
}
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
pub fn from_vec_with_ts(key: Vec<u8>, ts: u64) -> Self { pub fn from_vec_with_ts(key: Vec<u8>, ts: u64) -> Self {
Self(key, ts) Self(key, ts)
} }
@@ -54,10 +57,15 @@ impl Key<Vec<u8>> {
self.0.extend(data) self.0.extend(data)
} }
/// Set the key from a slice without re-allocating. The signature will change in week 3. pub fn set_ts(&mut self, ts: u64) {
self.1 = ts;
}
/// Set the key from a slice without re-allocating.
pub fn set_from_slice(&mut self, key_slice: KeySlice) { pub fn set_from_slice(&mut self, key_slice: KeySlice) {
self.0.clear(); self.0.clear();
self.0.extend(key_slice.0); self.0.extend(key_slice.0);
self.1 = key_slice.1;
} }
pub fn as_key_slice(&self) -> KeySlice { pub fn as_key_slice(&self) -> KeySlice {
@@ -68,17 +76,20 @@ impl Key<Vec<u8>> {
Key(self.0.into(), self.1) Key(self.0.into(), self.1)
} }
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. pub fn key_ref(&self) -> &[u8] {
pub fn raw_ref(&self) -> &[u8] {
self.0.as_ref() self.0.as_ref()
} }
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_key_ref(&self) -> &[u8] { pub fn for_testing_key_ref(&self) -> &[u8] {
self.0.as_ref() self.0.as_ref()
} }
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self { pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
Self(key, 0) Self(key, TS_DEFAULT)
} }
} }
@@ -87,23 +98,21 @@ impl Key<Bytes> {
Key(&self.0, self.1) Key(&self.0, self.1)
} }
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3. /// Create a `KeyBytes` from a `Bytes` and a ts.
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
unimplemented!()
}
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes { pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes {
Key(bytes, ts) Key(bytes, ts)
} }
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. pub fn key_ref(&self) -> &[u8] {
pub fn raw_ref(&self) -> &[u8] {
self.0.as_ref() self.0.as_ref()
} }
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes { pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
Key(bytes, 0) Key(bytes, TS_DEFAULT)
} }
pub fn for_testing_key_ref(&self) -> &[u8] { pub fn for_testing_key_ref(&self) -> &[u8] {
@@ -121,17 +130,20 @@ impl<'a> Key<&'a [u8]> {
Self(slice, ts) Self(slice, ts)
} }
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3. pub fn key_ref(self) -> &'a [u8] {
pub fn raw_ref(self) -> &'a [u8] {
self.0 self.0
} }
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_key_ref(self) -> &'a [u8] { pub fn for_testing_key_ref(self) -> &'a [u8] {
self.0 self.0
} }
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self { pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
Self(slice, 0) Self(slice, TS_DEFAULT)
} }
} }
@@ -143,7 +155,7 @@ impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
impl<T: AsRef<[u8]> + Default> Default for Key<T> { impl<T: AsRef<[u8]> + Default> Default for Key<T> {
fn default() -> Self { fn default() -> Self {
Self(T::default(), 0) Self(T::default(), TS_DEFAULT)
} }
} }

View File

@@ -41,8 +41,8 @@ impl LsmIterator {
} }
match self.end_bound.as_ref() { match self.end_bound.as_ref() {
Bound::Unbounded => {} Bound::Unbounded => {}
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(), Bound::Included(key) => self.is_valid = self.inner.key().key_ref() <= key.as_ref(),
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(), Bound::Excluded(key) => self.is_valid = self.inner.key().key_ref() < key.as_ref(),
} }
Ok(()) Ok(())
} }
@@ -63,7 +63,7 @@ impl StorageIterator for LsmIterator {
} }
fn key(&self) -> &[u8] { fn key(&self) -> &[u8] {
self.inner.key().raw_ref() self.inner.key().key_ref()
} }
fn value(&self) -> &[u8] { fn value(&self) -> &[u8] {

View File

@@ -103,19 +103,19 @@ fn range_overlap(
table_end: KeySlice, table_end: KeySlice,
) -> bool { ) -> bool {
match user_end { match user_end {
Bound::Excluded(key) if key <= table_begin.raw_ref() => { Bound::Excluded(key) if key <= table_begin.key_ref() => {
return false; return false;
} }
Bound::Included(key) if key < table_begin.raw_ref() => { Bound::Included(key) if key < table_begin.key_ref() => {
return false; return false;
} }
_ => {} _ => {}
} }
match user_begin { match user_begin {
Bound::Excluded(key) if key >= table_end.raw_ref() => { Bound::Excluded(key) if key >= table_end.key_ref() => {
return false; return false;
} }
Bound::Included(key) if key > table_end.raw_ref() => { Bound::Included(key) if key > table_end.key_ref() => {
return false; return false;
} }
_ => {} _ => {}
@@ -124,7 +124,7 @@ fn range_overlap(
} }
fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool { fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool {
table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref() table_begin.key_ref() <= user_key && user_key <= table_end.key_ref()
} }
/// The storage interface of the LSM tree. /// The storage interface of the LSM tree.
@@ -470,7 +470,7 @@ impl LsmStorageInner {
let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?; let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?;
if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() { if iter.is_valid() && iter.key().key_ref() == key && !iter.value().is_empty() {
return Ok(Some(Bytes::copy_from_slice(iter.value()))); return Ok(Some(Bytes::copy_from_slice(iter.value())));
} }
Ok(None) Ok(None)
@@ -678,7 +678,7 @@ impl LsmStorageInner {
table, table,
KeySlice::from_slice(key, key::TS_DEFAULT), KeySlice::from_slice(key, key::TS_DEFAULT),
)?; )?;
if iter.is_valid() && iter.key().raw_ref() == key { if iter.is_valid() && iter.key().key_ref() == key {
iter.next()?; iter.next()?;
} }
iter iter
@@ -716,7 +716,7 @@ impl LsmStorageInner {
level_ssts, level_ssts,
KeySlice::from_slice(key, key::TS_DEFAULT), KeySlice::from_slice(key, key::TS_DEFAULT),
)?; )?;
if iter.is_valid() && iter.key().raw_ref() == key { if iter.is_valid() && iter.key().key_ref() == key {
iter.next()?; iter.next()?;
} }
iter iter

View File

@@ -37,11 +37,11 @@ impl BlockMeta {
// The size of key length // The size of key length
estimated_size += std::mem::size_of::<u16>(); estimated_size += std::mem::size_of::<u16>();
// The size of actual key // The size of actual key
estimated_size += meta.first_key.len(); estimated_size += meta.first_key.raw_len();
// The size of key length // The size of key length
estimated_size += std::mem::size_of::<u16>(); estimated_size += std::mem::size_of::<u16>();
// The size of actual key // The size of actual key
estimated_size += meta.last_key.len(); estimated_size += meta.last_key.raw_len();
} }
// Reserve the space to improve performance, especially when the size of incoming data is // Reserve the space to improve performance, especially when the size of incoming data is
// large // large
@@ -49,10 +49,12 @@ impl BlockMeta {
let original_len = buf.len(); let original_len = buf.len();
for meta in block_meta { for meta in block_meta {
buf.put_u32(meta.offset as u32); buf.put_u32(meta.offset as u32);
buf.put_u16(meta.first_key.len() as u16); buf.put_u16(meta.first_key.key_len() as u16);
buf.put_slice(meta.first_key.raw_ref()); buf.put_slice(meta.first_key.key_ref());
buf.put_u16(meta.last_key.len() as u16); buf.put_u64(meta.first_key.ts());
buf.put_slice(meta.last_key.raw_ref()); buf.put_u16(meta.last_key.key_len() as u16);
buf.put_slice(meta.last_key.key_ref());
buf.put_u64(meta.last_key.ts());
} }
assert_eq!(estimated_size, buf.len() - original_len); assert_eq!(estimated_size, buf.len() - original_len);
} }
@@ -63,9 +65,11 @@ impl BlockMeta {
while buf.has_remaining() { while buf.has_remaining() {
let offset = buf.get_u32() as usize; let offset = buf.get_u32() as usize;
let first_key_len = buf.get_u16() as usize; let first_key_len = buf.get_u16() as usize;
let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len)); let first_key =
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(first_key_len), buf.get_u64());
let last_key_len: usize = buf.get_u16() as usize; let last_key_len: usize = buf.get_u16() as usize;
let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len)); let last_key =
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(last_key_len), buf.get_u64());
block_meta.push(BlockMeta { block_meta.push(BlockMeta {
offset, offset,
first_key, first_key,

View File

@@ -41,7 +41,7 @@ impl SsTableBuilder {
self.first_key.set_from_slice(key); self.first_key.set_from_slice(key);
} }
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref())); self.key_hashes.push(farmhash::fingerprint32(key.key_ref()));
if self.builder.add(key, value) { if self.builder.add(key, value) {
self.last_key.set_from_slice(key); self.last_key.set_from_slice(key);

View File

@@ -2,6 +2,8 @@ use std::fmt::Debug;
use bytes::Bytes; use bytes::Bytes;
pub const TS_ENABLED: bool = false;
pub struct Key<T: AsRef<[u8]>>(T); pub struct Key<T: AsRef<[u8]>>(T);
pub type KeySlice<'a> = Key<&'a [u8]>; pub type KeySlice<'a> = Key<&'a [u8]>;

View File

@@ -2,6 +2,8 @@ use std::fmt::Debug;
use bytes::Bytes; use bytes::Bytes;
pub const TS_ENABLED: bool = false;
pub struct Key<T: AsRef<[u8]>>(T); pub struct Key<T: AsRef<[u8]>>(T);
pub type KeySlice<'a> = Key<&'a [u8]>; pub type KeySlice<'a> = Key<&'a [u8]>;

View File

@@ -1,7 +1,7 @@
use tempfile::tempdir; use tempfile::tempdir;
use crate::{ use crate::{
key::KeySlice, key::{KeySlice, TS_ENABLED},
table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder}, table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder},
}; };
@@ -75,9 +75,17 @@ fn test_task3_block_key_compression() {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("1.sst"); let path = dir.path().join("1.sst");
let sst = builder.build_for_test(path).unwrap(); let sst = builder.build_for_test(path).unwrap();
if TS_ENABLED {
assert!(
sst.block_meta.len() <= 34,
"you have {} blocks, expect 34",
sst.block_meta.len()
);
} else {
assert!( assert!(
sst.block_meta.len() <= 25, sst.block_meta.len() <= 25,
"you have {} blocks, expect 25", "you have {} blocks, expect 25",
sst.block_meta.len() sst.block_meta.len()
); );
} }
}