checkin initial MVCC codebase

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2024-01-25 12:07:53 +08:00
committed by Alex Chi Z
parent 3211af8d74
commit 753e6d4f9e
43 changed files with 3889 additions and 2 deletions

View File

@@ -0,0 +1,94 @@
use bytes::BufMut;
use crate::key::{KeySlice, KeyVec};
use super::{Block, SIZEOF_U16};
/// Builds a block.
pub struct BlockBuilder {
/// Offsets of each key-value entries.
offsets: Vec<u16>,
/// All serialized key-value pairs in the block.
data: Vec<u8>,
/// The expected block size.
block_size: usize,
/// The first key in the block
first_key: KeyVec,
}
fn compute_overlap(first_key: KeySlice, key: KeySlice) -> usize {
let mut i = 0;
loop {
if i >= first_key.len() || i >= key.len() {
break;
}
if first_key.raw_ref()[i] != key.raw_ref()[i] {
break;
}
i += 1;
}
i
}
impl BlockBuilder {
/// Creates a new block builder.
pub fn new(block_size: usize) -> Self {
Self {
offsets: Vec::new(),
data: Vec::new(),
block_size,
first_key: KeyVec::new(),
}
}
fn estimated_size(&self) -> usize {
SIZEOF_U16 /* number of key-value pairs in the block */ + self.offsets.len() * SIZEOF_U16 /* offsets */ + self.data.len()
// key-value pairs
}
/// Adds a key-value pair to the block. Returns false when the block is full.
#[must_use]
pub fn add(&mut self, key: KeySlice, value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty");
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 /* key_len, value_len and offset */ > self.block_size
&& !self.is_empty()
{
return false;
}
// Add the offset of the data into the offset array.
self.offsets.push(self.data.len() as u16);
let overlap = compute_overlap(self.first_key.as_key_slice(), key);
// Encode key overlap.
self.data.put_u16(overlap as u16);
// Encode key length.
self.data.put_u16((key.len() - overlap) as u16);
// Encode key content.
self.data.put(&key.raw_ref()[overlap..]);
// Encode value length.
self.data.put_u16(value.len() as u16);
// Encode value content.
self.data.put(value);
if self.first_key.is_empty() {
self.first_key = key.to_key_vec();
}
true
}
/// Check if there are no key-value pairs in the block.
pub fn is_empty(&self) -> bool {
self.offsets.is_empty()
}
/// Finalize the block.
pub fn build(self) -> Block {
if self.is_empty() {
panic!("block should not be empty");
}
Block {
data: self.data,
offsets: self.offsets,
}
}
}

View File

@@ -0,0 +1,137 @@
use std::sync::Arc;
use bytes::Buf;
use crate::{
block::SIZEOF_U16,
key::{KeySlice, KeyVec},
};
use super::Block;
/// Iterates on a block.
pub struct BlockIterator {
/// reference to the block
block: Arc<Block>,
/// the current key at the iterator position
key: KeyVec,
/// the value range from the block
value_range: (usize, usize),
/// the current index at the iterator position
idx: usize,
/// the first key in the block
first_key: KeyVec,
}
impl Block {
fn get_first_key(&self) -> KeyVec {
let mut buf = &self.data[..];
buf.get_u16();
let key_len = buf.get_u16();
let key = &buf[..key_len as usize];
KeyVec::from_vec(key.to_vec())
}
}
impl BlockIterator {
fn new(block: Arc<Block>) -> Self {
Self {
first_key: block.get_first_key(),
block,
key: KeyVec::new(),
value_range: (0, 0),
idx: 0,
}
}
/// Creates a block iterator and seek to the first entry.
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
let mut iter = Self::new(block);
iter.seek_to_first();
iter
}
/// Creates a block iterator and seek to the first key that >= `key`.
pub fn create_and_seek_to_key(block: Arc<Block>, key: KeySlice) -> Self {
let mut iter = Self::new(block);
iter.seek_to_key(key);
iter
}
/// Returns the key of the current entry.
pub fn key(&self) -> KeySlice {
debug_assert!(!self.key.is_empty(), "invalid iterator");
self.key.as_key_slice()
}
/// Returns the value of the current entry.
pub fn value(&self) -> &[u8] {
debug_assert!(!self.key.is_empty(), "invalid iterator");
&self.block.data[self.value_range.0..self.value_range.1]
}
/// Returns true if the iterator is valid.
pub fn is_valid(&self) -> bool {
!self.key.is_empty()
}
/// Seeks to the first key in the block.
pub fn seek_to_first(&mut self) {
self.seek_to(0);
}
/// Seeks to the idx-th key in the block.
fn seek_to(&mut self, idx: usize) {
if idx >= self.block.offsets.len() {
self.key.clear();
self.value_range = (0, 0);
return;
}
let offset = self.block.offsets[idx] as usize;
self.seek_to_offset(offset);
self.idx = idx;
}
/// Move to the next key in the block.
pub fn next(&mut self) {
self.idx += 1;
self.seek_to(self.idx);
}
/// Seek to the specified position and update the current `key` and `value`
/// Index update will be handled by caller
fn seek_to_offset(&mut self, offset: usize) {
let mut entry = &self.block.data[offset..];
// Since `get_u16()` will automatically move the ptr 2 bytes ahead here,
// we don't need to manually advance it
let overlap_len = entry.get_u16() as usize;
let key_len = entry.get_u16() as usize;
let key = &entry[..key_len];
self.key.clear();
self.key.append(&self.first_key.raw_ref()[..overlap_len]);
self.key.append(key);
entry.advance(key_len);
let value_len = entry.get_u16() as usize;
let value_offset_begin = offset + SIZEOF_U16 + SIZEOF_U16 + key_len + SIZEOF_U16;
let value_offset_end = value_offset_begin + value_len;
self.value_range = (value_offset_begin, value_offset_end);
entry.advance(value_len);
}
/// Seek to the first key that is >= `key`.
pub fn seek_to_key(&mut self, key: KeySlice) {
let mut low = 0;
let mut high = self.block.offsets.len();
while low < high {
let mid = low + (high - low) / 2;
self.seek_to(mid);
assert!(self.is_valid());
match self.key().cmp(&key) {
std::cmp::Ordering::Less => low = mid + 1,
std::cmp::Ordering::Greater => high = mid,
std::cmp::Ordering::Equal => return,
}
}
self.seek_to(low);
}
}