make mvcc impl compile with new key module
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
[alias]
|
||||
xtask = "run --package mini-lsm-xtask --"
|
||||
x = "run --package mini-lsm-xtask --"
|
||||
test = "nextest run"
|
||||
|
||||
@@ -19,10 +19,10 @@ pub struct BlockBuilder {
|
||||
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
|
||||
let mut i = 0;
|
||||
loop {
|
||||
if i >= first_key.len() || i >= key.len() {
|
||||
if i >= first_key.key_len() || i >= key.key_len() {
|
||||
break;
|
||||
}
|
||||
if first_key.raw_ref()[i] != key.raw_ref()[i] {
|
||||
if first_key.key_ref()[i] != key.key_ref()[i] {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
@@ -50,7 +50,7 @@ impl BlockBuilder {
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
|
||||
assert!(!key.is_empty(), "key must not be empty");
|
||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
|
||||
if self.estimated_size() + key.raw_len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
|
||||
&& !self.is_empty()
|
||||
{
|
||||
return false;
|
||||
@@ -61,9 +61,11 @@ impl BlockBuilder {
|
||||
// Encode key overlap.
|
||||
self.data.put_u16(overlap as u16);
|
||||
// Encode key length.
|
||||
self.data.put_u16((key.len() - overlap) as u16);
|
||||
self.data.put_u16((key.key_len() - overlap) as u16);
|
||||
// Encode key content.
|
||||
self.data.put(&key.raw_ref()[overlap..]);
|
||||
self.data.put(&key.key_ref()[overlap..]);
|
||||
// Encode key ts
|
||||
self.data.put_u64(key.ts());
|
||||
// Encode value length.
|
||||
self.data.put_u16(value.len() as u16);
|
||||
// Encode value content.
|
||||
|
||||
@@ -27,9 +27,10 @@ impl Block {
|
||||
fn get_first_key(&self) -> KeyVec {
|
||||
let mut buf = &self.data[..];
|
||||
buf.get_u16();
|
||||
let key_len = buf.get_u16();
|
||||
let key = &buf[..key_len as usize];
|
||||
KeyVec::from_vec(key.to_vec())
|
||||
let key_len = buf.get_u16() as usize;
|
||||
let key = &buf[..key_len];
|
||||
buf.advance(key_len);
|
||||
KeyVec::from_vec_with_ts(key.to_vec(), buf.get_u64())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,11 +109,15 @@ impl BlockIterator {
|
||||
let key_len = entry.get_u16() as usize;
|
||||
let key = &entry[..key_len];
|
||||
self.key.clear();
|
||||
self.key.append(&self.first_key.raw_ref()[..overlap_len]);
|
||||
self.key.append(&self.first_key.key_ref()[..overlap_len]);
|
||||
self.key.append(key);
|
||||
entry.advance(key_len);
|
||||
let ts = entry.get_u64();
|
||||
self.key.set_ts(ts);
|
||||
let value_len = entry.get_u16() as usize;
|
||||
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16;
|
||||
// REMEMBER TO CHANGE THIS every time you change the encoding!
|
||||
let value_offset_begin =
|
||||
offset + SIZEOF_U16 + SIZEOF_U16 + std::mem::size_of::<u64>() + key_len + SIZEOF_U16;
|
||||
let value_offset_end = value_offset_begin + value_len;
|
||||
self.value_range = (value_offset_begin, value_offset_end);
|
||||
entry.advance(value_len);
|
||||
|
||||
@@ -8,8 +8,11 @@ pub type KeySlice<'a> = Key<&'a [u8]>;
|
||||
pub type KeyVec = Key<Vec<u8>>;
|
||||
pub type KeyBytes = Key<Bytes>;
|
||||
|
||||
/// For testing purpose, should not use anywhere in your implementation.
|
||||
pub const TS_ENABLED: bool = true;
|
||||
|
||||
/// Temporary, should remove after implementing full week 3 day 1 + 2.
|
||||
pub const TS_DEFAULT: u64 = std::u64::MAX;
|
||||
pub const TS_DEFAULT: u64 = 0;
|
||||
|
||||
pub const TS_MAX: u64 = std::u64::MAX;
|
||||
pub const TS_MIN: u64 = std::u64::MIN;
|
||||
@@ -21,10 +24,14 @@ impl<T: AsRef<[u8]>> Key<T> {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
pub fn key_len(&self) -> usize {
|
||||
self.0.as_ref().len()
|
||||
}
|
||||
|
||||
pub fn raw_len(&self) -> usize {
|
||||
self.0.as_ref().len() + std::mem::size_of::<u64>()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.as_ref().is_empty()
|
||||
}
|
||||
@@ -32,14 +39,10 @@ impl<T: AsRef<[u8]>> Key<T> {
|
||||
|
||||
impl Key<Vec<u8>> {
|
||||
pub fn new() -> Self {
|
||||
Self(Vec::new(), 0)
|
||||
Self(Vec::new(), TS_DEFAULT)
|
||||
}
|
||||
|
||||
pub fn from_vec(key: Vec<u8>) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
|
||||
/// Create a `KeyVec` from a `Vec<u8>` and a ts. Will be removed in week 3.
|
||||
pub fn from_vec_with_ts(key: Vec<u8>, ts: u64) -> Self {
|
||||
Self(key, ts)
|
||||
}
|
||||
@@ -54,10 +57,15 @@ impl Key<Vec<u8>> {
|
||||
self.0.extend(data)
|
||||
}
|
||||
|
||||
/// Set the key from a slice without re-allocating. The signature will change in week 3.
|
||||
pub fn set_ts(&mut self, ts: u64) {
|
||||
self.1 = ts;
|
||||
}
|
||||
|
||||
/// Set the key from a slice without re-allocating.
|
||||
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
|
||||
self.0.clear();
|
||||
self.0.extend(key_slice.0);
|
||||
self.1 = key_slice.1;
|
||||
}
|
||||
|
||||
pub fn as_key_slice(&self) -> KeySlice {
|
||||
@@ -68,17 +76,20 @@ impl Key<Vec<u8>> {
|
||||
Key(self.0.into(), self.1)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
pub fn key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn ts(&self) -> u64 {
|
||||
self.1
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
|
||||
Self(key, 0)
|
||||
Self(key, TS_DEFAULT)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,23 +98,21 @@ impl Key<Bytes> {
|
||||
Key(&self.0, self.1)
|
||||
}
|
||||
|
||||
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
|
||||
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
|
||||
/// Create a `KeyBytes` from a `Bytes` and a ts.
|
||||
pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes {
|
||||
Key(bytes, ts)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(&self) -> &[u8] {
|
||||
pub fn key_ref(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
pub fn ts(&self) -> u64 {
|
||||
self.1
|
||||
}
|
||||
|
||||
pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
|
||||
Key(bytes, 0)
|
||||
Key(bytes, TS_DEFAULT)
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(&self) -> &[u8] {
|
||||
@@ -121,17 +130,20 @@ impl<'a> Key<&'a [u8]> {
|
||||
Self(slice, ts)
|
||||
}
|
||||
|
||||
/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
|
||||
pub fn raw_ref(self) -> &'a [u8] {
|
||||
pub fn key_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn ts(&self) -> u64 {
|
||||
self.1
|
||||
}
|
||||
|
||||
pub fn for_testing_key_ref(self) -> &'a [u8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
|
||||
Self(slice, 0)
|
||||
Self(slice, TS_DEFAULT)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,7 +155,7 @@ impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {
|
||||
|
||||
impl<T: AsRef<[u8]> + Default> Default for Key<T> {
|
||||
fn default() -> Self {
|
||||
Self(T::default(), 0)
|
||||
Self(T::default(), TS_DEFAULT)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,8 @@ impl LsmIterator {
|
||||
}
|
||||
match self.end_bound.as_ref() {
|
||||
Bound::Unbounded => {}
|
||||
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(),
|
||||
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(),
|
||||
Bound::Included(key) => self.is_valid = self.inner.key().key_ref() <= key.as_ref(),
|
||||
Bound::Excluded(key) => self.is_valid = self.inner.key().key_ref() < key.as_ref(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -63,7 +63,7 @@ impl StorageIterator for LsmIterator {
|
||||
}
|
||||
|
||||
fn key(&self) -> &[u8] {
|
||||
self.inner.key().raw_ref()
|
||||
self.inner.key().key_ref()
|
||||
}
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
|
||||
@@ -103,19 +103,19 @@ fn range_overlap(
|
||||
table_end: KeySlice,
|
||||
) -> bool {
|
||||
match user_end {
|
||||
Bound::Excluded(key) if key <= table_begin.raw_ref() => {
|
||||
Bound::Excluded(key) if key <= table_begin.key_ref() => {
|
||||
return false;
|
||||
}
|
||||
Bound::Included(key) if key < table_begin.raw_ref() => {
|
||||
Bound::Included(key) if key < table_begin.key_ref() => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
match user_begin {
|
||||
Bound::Excluded(key) if key >= table_end.raw_ref() => {
|
||||
Bound::Excluded(key) if key >= table_end.key_ref() => {
|
||||
return false;
|
||||
}
|
||||
Bound::Included(key) if key > table_end.raw_ref() => {
|
||||
Bound::Included(key) if key > table_end.key_ref() => {
|
||||
return false;
|
||||
}
|
||||
_ => {}
|
||||
@@ -124,7 +124,7 @@ fn range_overlap(
|
||||
}
|
||||
|
||||
fn key_within(user_key: &[u8], table_begin: KeySlice, table_end: KeySlice) -> bool {
|
||||
table_begin.raw_ref() <= user_key && user_key <= table_end.raw_ref()
|
||||
table_begin.key_ref() <= user_key && user_key <= table_end.key_ref()
|
||||
}
|
||||
|
||||
/// The storage interface of the LSM tree.
|
||||
@@ -470,7 +470,7 @@ impl LsmStorageInner {
|
||||
|
||||
let iter = TwoMergeIterator::create(l0_iter, MergeIterator::create(level_iters))?;
|
||||
|
||||
if iter.is_valid() && iter.key().raw_ref() == key && !iter.value().is_empty() {
|
||||
if iter.is_valid() && iter.key().key_ref() == key && !iter.value().is_empty() {
|
||||
return Ok(Some(Bytes::copy_from_slice(iter.value())));
|
||||
}
|
||||
Ok(None)
|
||||
@@ -678,7 +678,7 @@ impl LsmStorageInner {
|
||||
table,
|
||||
KeySlice::from_slice(key, key::TS_DEFAULT),
|
||||
)?;
|
||||
if iter.is_valid() && iter.key().raw_ref() == key {
|
||||
if iter.is_valid() && iter.key().key_ref() == key {
|
||||
iter.next()?;
|
||||
}
|
||||
iter
|
||||
@@ -716,7 +716,7 @@ impl LsmStorageInner {
|
||||
level_ssts,
|
||||
KeySlice::from_slice(key, key::TS_DEFAULT),
|
||||
)?;
|
||||
if iter.is_valid() && iter.key().raw_ref() == key {
|
||||
if iter.is_valid() && iter.key().key_ref() == key {
|
||||
iter.next()?;
|
||||
}
|
||||
iter
|
||||
|
||||
@@ -37,11 +37,11 @@ impl BlockMeta {
|
||||
// The size of key length
|
||||
estimated_size += std::mem::size_of::<u16>();
|
||||
// The size of actual key
|
||||
estimated_size += meta.first_key.len();
|
||||
estimated_size += meta.first_key.raw_len();
|
||||
// The size of key length
|
||||
estimated_size += std::mem::size_of::<u16>();
|
||||
// The size of actual key
|
||||
estimated_size += meta.last_key.len();
|
||||
estimated_size += meta.last_key.raw_len();
|
||||
}
|
||||
// Reserve the space to improve performance, especially when the size of incoming data is
|
||||
// large
|
||||
@@ -49,10 +49,12 @@ impl BlockMeta {
|
||||
let original_len = buf.len();
|
||||
for meta in block_meta {
|
||||
buf.put_u32(meta.offset as u32);
|
||||
buf.put_u16(meta.first_key.len() as u16);
|
||||
buf.put_slice(meta.first_key.raw_ref());
|
||||
buf.put_u16(meta.last_key.len() as u16);
|
||||
buf.put_slice(meta.last_key.raw_ref());
|
||||
buf.put_u16(meta.first_key.key_len() as u16);
|
||||
buf.put_slice(meta.first_key.key_ref());
|
||||
buf.put_u64(meta.first_key.ts());
|
||||
buf.put_u16(meta.last_key.key_len() as u16);
|
||||
buf.put_slice(meta.last_key.key_ref());
|
||||
buf.put_u64(meta.last_key.ts());
|
||||
}
|
||||
assert_eq!(estimated_size, buf.len() - original_len);
|
||||
}
|
||||
@@ -63,9 +65,11 @@ impl BlockMeta {
|
||||
while buf.has_remaining() {
|
||||
let offset = buf.get_u32() as usize;
|
||||
let first_key_len = buf.get_u16() as usize;
|
||||
let first_key = KeyBytes::from_bytes(buf.copy_to_bytes(first_key_len));
|
||||
let first_key =
|
||||
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(first_key_len), buf.get_u64());
|
||||
let last_key_len: usize = buf.get_u16() as usize;
|
||||
let last_key = KeyBytes::from_bytes(buf.copy_to_bytes(last_key_len));
|
||||
let last_key =
|
||||
KeyBytes::from_bytes_with_ts(buf.copy_to_bytes(last_key_len), buf.get_u64());
|
||||
block_meta.push(BlockMeta {
|
||||
offset,
|
||||
first_key,
|
||||
|
||||
@@ -41,7 +41,7 @@ impl SsTableBuilder {
|
||||
self.first_key.set_from_slice(key);
|
||||
}
|
||||
|
||||
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
|
||||
self.key_hashes.push(farmhash::fingerprint32(key.key_ref()));
|
||||
|
||||
if self.builder.add(key, value) {
|
||||
self.last_key.set_from_slice(key);
|
||||
|
||||
@@ -2,6 +2,8 @@ use std::fmt::Debug;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
pub const TS_ENABLED: bool = false;
|
||||
|
||||
pub struct Key<T: AsRef<[u8]>>(T);
|
||||
|
||||
pub type KeySlice<'a> = Key<&'a [u8]>;
|
||||
|
||||
@@ -2,6 +2,8 @@ use std::fmt::Debug;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
pub const TS_ENABLED: bool = false;
|
||||
|
||||
pub struct Key<T: AsRef<[u8]>>(T);
|
||||
|
||||
pub type KeySlice<'a> = Key<&'a [u8]>;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
key::KeySlice,
|
||||
key::{KeySlice, TS_ENABLED},
|
||||
table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder},
|
||||
};
|
||||
|
||||
@@ -75,9 +75,17 @@ fn test_task3_block_key_compression() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("1.sst");
|
||||
let sst = builder.build_for_test(path).unwrap();
|
||||
if TS_ENABLED {
|
||||
assert!(
|
||||
sst.block_meta.len() <= 34,
|
||||
"you have {} blocks, expect 34",
|
||||
sst.block_meta.len()
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
sst.block_meta.len() <= 25,
|
||||
"you have {} blocks, expect 25",
|
||||
sst.block_meta.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user