checkin initial MVCC codebase
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
94
mini-lsm-mvcc/src/block/builder.rs
Normal file
94
mini-lsm-mvcc/src/block/builder.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
use bytes::BufMut;
|
||||
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
|
||||
use super::{Block, SIZEOF_U16};
|
||||
|
||||
/// Builds a block.
|
||||
pub struct BlockBuilder {
|
||||
/// Offsets of each key-value entries.
|
||||
offsets: Vec<u16>,
|
||||
/// All serialized key-value pairs in the block.
|
||||
data: Vec<u8>,
|
||||
/// The expected block size.
|
||||
block_size: usize,
|
||||
/// The first key in the block
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
|
||||
let mut i = 0;
|
||||
loop {
|
||||
if i >= first_key.len() || i >= key.len() {
|
||||
break;
|
||||
}
|
||||
if first_key.raw_ref()[i] != key.raw_ref()[i] {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
impl BlockBuilder {
|
||||
/// Creates a new block builder.
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
Self {
|
||||
offsets: Vec::new(),
|
||||
data: Vec::new(),
|
||||
block_size,
|
||||
first_key: KeyVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn estimated_size(&self) -> usize {
|
||||
SIZEOF_U16 /* number of key-value pairs in the block */ + self.offsets.len() * SIZEOF_U16 /* offsets */ + self.data.len()
|
||||
// key-value pairs
|
||||
}
|
||||
|
||||
/// Adds a key-value pair to the block. Returns false when the block is full.
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
|
||||
assert!(!key.is_empty(), "key must not be empty");
|
||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
|
||||
&& !self.is_empty()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Add the offset of the data into the offset array.
|
||||
self.offsets.push(self.data.len() as u16);
|
||||
let overlap = compute_overlap(self.first_key.as_key_slice(), key);
|
||||
// Encode key overlap.
|
||||
self.data.put_u16(overlap as u16);
|
||||
// Encode key length.
|
||||
self.data.put_u16((key.len() - overlap) as u16);
|
||||
// Encode key content.
|
||||
self.data.put(&key.raw_ref()[overlap..]);
|
||||
// Encode value length.
|
||||
self.data.put_u16(value.len() as u16);
|
||||
// Encode value content.
|
||||
self.data.put(value);
|
||||
|
||||
if self.first_key.is_empty() {
|
||||
self.first_key = key.to_key_vec();
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Check if there are no key-value pairs in the block.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.offsets.is_empty()
|
||||
}
|
||||
|
||||
/// Finalize the block.
|
||||
pub fn build(self) -> Block {
|
||||
if self.is_empty() {
|
||||
panic!("block should not be empty");
|
||||
}
|
||||
Block {
|
||||
data: self.data,
|
||||
offsets: self.offsets,
|
||||
}
|
||||
}
|
||||
}
|
||||
137
mini-lsm-mvcc/src/block/iterator.rs
Normal file
137
mini-lsm-mvcc/src/block/iterator.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Buf;
|
||||
|
||||
use crate::{
|
||||
block::SIZEOF_U16,
|
||||
key::{KeySlice, KeyVec},
|
||||
};
|
||||
|
||||
use super::Block;
|
||||
|
||||
/// Iterates on a block.
|
||||
pub struct BlockIterator {
|
||||
/// reference to the block
|
||||
block: Arc<Block>,
|
||||
/// the current key at the iterator position
|
||||
key: KeyVec,
|
||||
/// the value range from the block
|
||||
value_range: (usize, usize),
|
||||
/// the current index at the iterator position
|
||||
idx: usize,
|
||||
/// the first key in the block
|
||||
first_key: KeyVec,
|
||||
}
|
||||
|
||||
impl Block {
|
||||
fn get_first_key(&self) -> KeyVec {
|
||||
let mut buf = &self.data[..];
|
||||
buf.get_u16();
|
||||
let key_len = buf.get_u16();
|
||||
let key = &buf[..key_len as usize];
|
||||
KeyVec::from_vec(key.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockIterator {
|
||||
fn new(block: Arc<Block>) -> Self {
|
||||
Self {
|
||||
first_key: block.get_first_key(),
|
||||
block,
|
||||
key: KeyVec::new(),
|
||||
value_range: (0, 0),
|
||||
idx: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a block iterator and seek to the first entry.
|
||||
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
|
||||
let mut iter = Self::new(block);
|
||||
iter.seek_to_first();
|
||||
iter
|
||||
}
|
||||
|
||||
/// Creates a block iterator and seek to the first key that >= `key`.
|
||||
pub fn create_and_seek_to_key(block: Arc<Block>, key: KeySlice) -> Self {
|
||||
let mut iter = Self::new(block);
|
||||
iter.seek_to_key(key);
|
||||
iter
|
||||
}
|
||||
|
||||
/// Returns the key of the current entry.
|
||||
pub fn key(&self) -> KeySlice {
|
||||
debug_assert!(!self.key.is_empty(), "invalid iterator");
|
||||
self.key.as_key_slice()
|
||||
}
|
||||
|
||||
/// Returns the value of the current entry.
|
||||
pub fn value(&self) -> &[u8] {
|
||||
debug_assert!(!self.key.is_empty(), "invalid iterator");
|
||||
&self.block.data[self.value_range.0..self.value_range.1]
|
||||
}
|
||||
|
||||
/// Returns true if the iterator is valid.
|
||||
pub fn is_valid(&self) -> bool {
|
||||
!self.key.is_empty()
|
||||
}
|
||||
|
||||
/// Seeks to the first key in the block.
|
||||
pub fn seek_to_first(&mut self) {
|
||||
self.seek_to(0);
|
||||
}
|
||||
|
||||
/// Seeks to the idx-th key in the block.
|
||||
fn seek_to(&mut self, idx: usize) {
|
||||
if idx >= self.block.offsets.len() {
|
||||
self.key.clear();
|
||||
self.value_range = (0, 0);
|
||||
return;
|
||||
}
|
||||
let offset = self.block.offsets[idx] as usize;
|
||||
self.seek_to_offset(offset);
|
||||
self.idx = idx;
|
||||
}
|
||||
|
||||
/// Move to the next key in the block.
|
||||
pub fn next(&mut self) {
|
||||
self.idx += 1;
|
||||
self.seek_to(self.idx);
|
||||
}
|
||||
|
||||
/// Seek to the specified position and update the current `key` and `value`
|
||||
/// Index update will be handled by caller
|
||||
fn seek_to_offset(&mut self, offset: usize) {
|
||||
let mut entry = &self.block.data[offset..];
|
||||
// Since `get_u16()` will automatically move the ptr 2 bytes ahead here,
|
||||
// we don't need to manually advance it
|
||||
let overlap_len = entry.get_u16() as usize;
|
||||
let key_len = entry.get_u16() as usize;
|
||||
let key = &entry[..key_len];
|
||||
self.key.clear();
|
||||
self.key.append(&self.first_key.raw_ref()[..overlap_len]);
|
||||
self.key.append(key);
|
||||
entry.advance(key_len);
|
||||
let value_len = entry.get_u16() as usize;
|
||||
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16;
|
||||
let value_offset_end = value_offset_begin + value_len;
|
||||
self.value_range = (value_offset_begin, value_offset_end);
|
||||
entry.advance(value_len);
|
||||
}
|
||||
|
||||
/// Seek to the first key that is >= `key`.
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) {
|
||||
let mut low = 0;
|
||||
let mut high = self.block.offsets.len();
|
||||
while low < high {
|
||||
let mid = low + (high - low) / 2;
|
||||
self.seek_to(mid);
|
||||
assert!(self.is_valid());
|
||||
match self.key().cmp(&key) {
|
||||
std::cmp::Ordering::Less => low = mid + 1,
|
||||
std::cmp::Ordering::Greater => high = mid,
|
||||
std::cmp::Ordering::Equal => return,
|
||||
}
|
||||
}
|
||||
self.seek_to(low);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user