make mvcc impl compile with new key module

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2024-01-25 12:27:16 +08:00
committed by Alex Chi Z
parent 753e6d4f9e
commit 971d0b1c81
11 changed files with 96 additions and 62 deletions

View File

@@ -1,4 +1,3 @@
[alias]
xtask = "run --package mini-lsm-xtask --"
x = "run --package mini-lsm-xtask --"
test = "nextest run"

View File

@@ -19,10 +19,10 @@ pub struct BlockBuilder {
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
let mut i = 0;
loop {
if i >= first_key.len() || i >= key.len() {
if i >= first_key.key_len() || i >= key.key_len() {
break;
}
if first_key.raw_ref()[i] != key.raw_ref()[i] {
if first_key.key_ref()[i] != key.key_ref()[i] {
break;
}
i += 1;
@@ -50,7 +50,7 @@ impl BlockBuilder {
#[must_use]
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty");
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
if self.estimated_size() + key.raw_len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
&& !self.is_empty()
{
return false;
@@ -61,9 +61,11 @@ impl BlockBuilder {
// Encode key overlap.
self.data.put_u16(overlap as u16);
// Encode key length.
self.data.put_u16((key.len() - overlap) as u16);
self.data.put_u16((key.key_len() - overlap) as u16);
// Encode key content.
self.data.put(&key.raw_ref()[overlap..]);
self.data.put(&key.key_ref()[overlap..]);
// Encode key ts
self.data.put_u64(key.ts());
// Encode value length.
self.data.put_u16(value.len() as u16);
// Encode value content.

View File

@@ -27,9 +27,10 @@ impl Block {
fn get_first_key(&self) -> KeyVec {
let mut buf = &self.data[..];
buf.get_u16();
let key_len = buf.get_u16();
let key = &buf[..key_len as usize];
KeyVec::from_vec(key.to_vec())
let key_len = buf.get_u16() as usize;
let key = &buf[..key_len];
buf.advance(key_len);
KeyVec::from_vec_with_ts(key.to_vec(), buf.get_u64())
}
}
@@ -108,11 +109,15 @@ impl BlockIterator {
let key_len = entry.get_u16() as usize;
let key = &entry[..key_len];
self.key.clear();
self.key.append(&self.first_key.raw_ref()[..overlap_len]);
self.key.append(&self.first_key.key_ref()[..overlap_len]);
self.key.append(key);
entry.advance(key_len);
let ts = entry.get_u64();
self.key.set_ts(ts);
let value_len = entry.get_u16() as usize;
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16;
// REMEMBER TO CHANGE THIS every time you change the encoding!
let value_offset_begin =
offset + SIZEOF_U16 + SIZEOF_U16 + std::mem::size_of::<u64>() + key_len + SIZEOF_U16;
let value_offset_end = value_offset_begin + value_len;
self.value_range = (value_offset_begin, value_offset_end);
entry.advance(value_len);

View File

@@ -8,8 +8,11 @@ pub type KeySlice<'a> = Key<&'a [u8]>;
pub type KeyVec = Key<Vec<u8>>;
pub type KeyBytes = Key<Bytes>;
/// For testing purpose, should not use anywhere in your implementation.
pub const TS_ENABLED: bool = true;
/// Temporary, should remove after implementing full week 3 day 1 + 2.
pub const TS_DEFAULT: u64 = std::u64::MAX;
pub const TS_DEFAULT: u64 = 0;
pub const TS_MAX: u64 = std::u64::MAX;
pub const TS_MIN: u64 = std::u64::MIN;
@@ -21,10 +24,14 @@ impl<T: AsRef<[u8]>> Key<T> {
self.0
}
pub fn len(&self) -> usize {
pub fn key_len(&self) -> usize {
self.0.as_ref().len()
}
pub fn raw_len(&self) -> usize {
self.0.as_ref().len() + std::mem::size_of::<u64>()
}
pub fn is_empty(&self) -> bool {
self.0.as_ref().is_empty()
}
@@ -32,14 +39,10 @@ impl<T: AsRef<[u8]>> Key<T> {
impl Key<Vec<u8>> {
pub fn new() -> Self {
Self(Vec::new(), 0)
Self(Vec::new(), TS_DEFAULT)
}
pub fn from_vec(key: Vec<u8>) -> Self {
unimplemented!()
}
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
/// Create a `KeyVec` from a `Vec<u8>` and a ts. Will be removed in week 3.
pub fn from_vec_with_ts(key: Vec<u8>, ts: u64) -> Self {
Self(key, ts)
}
@@ -54,10 +57,15 @@ impl Key<Vec<u8>> {
self.0.extend(data)
}
/// Set the key from a slice without re-allocating. The signature will change in week 3.
pub fn set_ts(&mut self, ts: u64) {
self.1 = ts;
}
/// Set the key from a slice without re-allocating.
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
self.0.clear();
self.0.extend(key_slice.0);
self.1 = key_slice.1;
}
pub fn as_key_slice(&self) -> KeySlice {
@@ -68,17 +76,20 @@ impl Key<Vec<u8>> {
Key(self.0.into(), self.1)
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
pub fn key_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_key_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
Self(key, 0)
Self(key, TS_DEFAULT)
}
}
@@ -87,23 +98,21 @@ impl Key<Bytes> {
Key(&self.0, self.1)
}
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
unimplemented!()
}
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
/// Create a `KeyBytes` from a `Bytes` and a ts.
pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes {
Key(bytes, ts)
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
pub fn key_ref(&self) -> &[u8] {
self.0.as_ref()
}
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
Key(bytes, 0)
Key(bytes, TS_DEFAULT)
}
pub fn for_testing_key_ref(&self) -> &[u8] {
@@ -121,17 +130,20 @@ impl<'a> Key<&'a [u8]> {
Self(slice, ts)
}
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(self) -> &'a [u8] {
pub fn key_ref(self) -> &'a [u8] {
self.0
}
pub fn ts(&self) -> u64 {
self.1
}
pub fn for_testing_key_ref(self) -> &'a [u8] {
self.0
}
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
Self(slice, 0)
Self(slice, TS_DEFAULT)
}
}
@@ -143,7 +155,7 @@ impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
impl<T: AsRef<[u8]> + Default> Default for Key<T> {
fn default() -> Self {
Self(T::default(), 0)
Self(T::default(), TS_DEFAULT)
}
}

View File

@@ -41,8 +41,8 @@ impl LsmIterator {
}
match self.end_bound.as_ref() {
Bound::Unbounded => {}
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(),
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(),
Bound::Included(key) => self.is_valid = self.inner.key().key_ref() <= key.as_ref(),
Bound::Excluded(key) => self.is_valid = self.inner.key().key_ref() < key.as_ref(),
}
Ok(())
}
@@ -63,7 +63,7 @@ impl StorageIterator for LsmIterator {
}
fn key(&self) -> &[u8] {
self.inner.key().raw_ref()
self.inner.key().key_ref()
}
fn value(&self) -> &[u8] {

View File

@@ -103,19 +103,19 @@ fn range_overlap(
table_end: KeySlice,
) -> bool {
match user_end {
Bound::Excluded(key) if key <= table_begin.raw_ref() => {
Bound::Excluded(key) if key <= table_begin.key_ref() => {
return false;
}
Bound::Included(key) if key < table_begin.raw_ref() => {
Bound::Included(key) if key < table_begin.key_ref() => {
return false;
}
_ => {}
}
match user_begin {
Bound::Excluded(key) if key >= table_end.raw_ref() => {
Bound::Excluded(key) if key >= table_end.key_ref() => {
return false;
}
Bound::Included(key) if key > table_end.raw_ref() => {
Bound::Included(key) if key > table_end.key_ref() => {
return false;
}
_ => {}
@@ -124,7 +124,7 @@ fn range_overlap(
}
fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool {
table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref()
table_begin.key_ref() <= user_key && user_key <= table_end.key_ref()
}
/// The storage interface of the LSM tree.
@@ -470,7 +470,7 @@ impl LsmStorageInner {
let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?;
if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() {
if iter.is_valid() && iter.key().key_ref() == key && !iter.value().is_empty() {
return Ok(Some(Bytes::copy_from_slice(iter.value())));
}
Ok(None)
@@ -678,7 +678,7 @@ impl LsmStorageInner {
table,
KeySlice::from_slice(key, key::TS_DEFAULT),
)?;
if iter.is_valid() && iter.key().raw_ref() == key {
if iter.is_valid() && iter.key().key_ref() == key {
iter.next()?;
}
iter
@@ -716,7 +716,7 @@ impl LsmStorageInner {
level_ssts,
KeySlice::from_slice(key, key::TS_DEFAULT),
)?;
if iter.is_valid() && iter.key().raw_ref() == key {
if iter.is_valid() && iter.key().key_ref() == key {
iter.next()?;
}
iter

View File

@@ -37,11 +37,11 @@ impl BlockMeta {
// The size of key length
estimated_size += std::mem::size_of::<u16>();
// The size of actual key
estimated_size += meta.first_key.len();
estimated_size += meta.first_key.raw_len();
// The size of key length
estimated_size += std::mem::size_of::<u16>();
// The size of actual key
estimated_size += meta.last_key.len();
estimated_size += meta.last_key.raw_len();
}
// Reserve the space to improve performance, especially when the size of incoming data is
// large
@@ -49,10 +49,12 @@ impl BlockMeta {
let original_len = buf.len();
for meta in block_meta {
buf.put_u32(meta.offset as u32);
buf.put_u16(meta.first_key.len() as u16);
buf.put_slice(meta.first_key.raw_ref());
buf.put_u16(meta.last_key.len() as u16);
buf.put_slice(meta.last_key.raw_ref());
buf.put_u16(meta.first_key.key_len() as u16);
buf.put_slice(meta.first_key.key_ref());
buf.put_u64(meta.first_key.ts());
buf.put_u16(meta.last_key.key_len() as u16);
buf.put_slice(meta.last_key.key_ref());
buf.put_u64(meta.last_key.ts());
}
assert_eq!(estimated_size, buf.len() - original_len);
}
@@ -63,9 +65,11 @@ impl BlockMeta {
while buf.has_remaining() {
let offset = buf.get_u32() as usize;
let first_key_len = buf.get_u16() as usize;
let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len));
let first_key =
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(first_key_len), buf.get_u64());
let last_key_len: usize = buf.get_u16() as usize;
let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len));
let last_key =
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(last_key_len), buf.get_u64());
block_meta.push(BlockMeta {
offset,
first_key,

View File

@@ -41,7 +41,7 @@ impl SsTableBuilder {
self.first_key.set_from_slice(key);
}
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
self.key_hashes.push(farmhash::fingerprint32(key.key_ref()));
if self.builder.add(key, value) {
self.last_key.set_from_slice(key);

View File

@@ -2,6 +2,8 @@ use std::fmt::Debug;
use bytes::Bytes;
pub const TS_ENABLED: bool = false;
pub struct Key<T: AsRef<[u8]>>(T);
pub type KeySlice<'a> = Key<&'a [u8]>;

View File

@@ -2,6 +2,8 @@ use std::fmt::Debug;
use bytes::Bytes;
pub const TS_ENABLED: bool = false;
pub struct Key<T: AsRef<[u8]>>(T);
pub type KeySlice<'a> = Key<&'a [u8]>;

View File

@@ -1,7 +1,7 @@
use tempfile::tempdir;
use crate::{
key::KeySlice,
key::{KeySlice, TS_ENABLED},
table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder},
};
@@ -75,9 +75,17 @@ fn test_task3_block_key_compression() {
let dir = tempdir().unwrap();
let path = dir.path().join("1.sst");
let sst = builder.build_for_test(path).unwrap();
if TS_ENABLED {
assert!(
sst.block_meta.len() <= 34,
"you have {} blocks, expect 34",
sst.block_meta.len()
);
} else {
assert!(
sst.block_meta.len() <= 25,
"you have {} blocks, expect 25",
sst.block_meta.len()
);
}
}