checkin initial MVCC codebase

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2024-01-25 12:07:53 +08:00
committed by Alex Chi Z
parent 3211af8d74
commit 753e6d4f9e
43 changed files with 3889 additions and 2 deletions

View File

@@ -0,0 +1,113 @@
// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0.
use bytes::{BufMut, Bytes, BytesMut};
/// Implements a bloom filter
pub struct Bloom {
/// data of filter in bits
pub(crate) filter: Bytes,
/// number of hash functions
pub(crate) k: u8,
}
pub trait BitSlice {
fn get_bit(&self, idx: usize) -> bool;
fn bit_len(&self) -> usize;
}
pub trait BitSliceMut {
fn set_bit(&mut self, idx: usize, val: bool);
}
impl<T: AsRef<[u8]>> BitSlice for T {
fn get_bit(&self, idx: usize) -> bool {
let pos = idx / 8;
let offset = idx % 8;
(self.as_ref()[pos] & (1 << offset)) != 0
}
fn bit_len(&self) -> usize {
self.as_ref().len() * 8
}
}
impl<T: AsMut<[u8]>> BitSliceMut for T {
fn set_bit(&mut self, idx: usize, val: bool) {
let pos = idx / 8;
let offset = idx % 8;
if val {
self.as_mut()[pos] |= 1 << offset;
} else {
self.as_mut()[pos] &= !(1 << offset);
}
}
}
impl Bloom {
/// Decode a bloom filter
pub fn decode(buf: &[u8]) -> Self {
let filter = &buf[..buf.len() - 1];
let k = buf[buf.len() - 1];
Self {
filter: filter.to_vec().into(),
k,
}
}
/// Encode a bloom filter
pub fn encode(&self, buf: &mut Vec<u8>) {
buf.extend(&self.filter);
buf.put_u8(self.k);
}
/// Get bloom filter bits per key from entries count and FPR
pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize {
let size =
-1.0 * (entries as f64) * false_positive_rate.ln() / std::f64::consts::LN_2.powi(2);
let locs = (size / (entries as f64)).ceil();
locs as usize
}
/// Build bloom filter from key hashes
pub fn build_from_key_hashes(keys: &[u32], bits_per_key: usize) -> Self {
let k = (bits_per_key as f64 * 0.69) as u32;
let k = k.min(30).max(1);
let nbits = (keys.len() * bits_per_key).max(64);
let nbytes = (nbits + 7) / 8;
let nbits = nbytes * 8;
let mut filter = BytesMut::with_capacity(nbytes);
filter.resize(nbytes, 0);
for h in keys {
let mut h = *h;
let delta = (h >> 17) | (h << 15);
for _ in 0..k {
let bit_pos = (h as usize) % nbits;
filter.set_bit(bit_pos, true);
h = h.wrapping_add(delta);
}
}
Self {
filter: filter.freeze(),
k: k as u8,
}
}
/// Check if a bloom filter may contain some data
pub fn may_contain(&self, mut h: u32) -> bool {
if self.k > 30 {
// potential new encoding for short bloom filters
true
} else {
let nbits = self.filter.bit_len();
let delta = (h >> 17) | (h << 15);
for _ in 0..self.k {
let bit_pos = h % (nbits as u32);
if !self.filter.get_bit(bit_pos as usize) {
return false;
}
h = h.wrapping_add(delta);
}
true
}
}
}

View File

@@ -0,0 +1,112 @@
use std::path::Path;
use std::sync::Arc;
use anyhow::Result;
use bytes::BufMut;
use super::bloom::Bloom;
use super::{BlockMeta, FileObject, SsTable};
use crate::block::BlockBuilder;
use crate::key::{KeySlice, KeyVec};
use crate::lsm_storage::BlockCache;
/// Builds an SSTable from key-value pairs.
pub struct SsTableBuilder {
builder: BlockBuilder,
first_key: KeyVec,
last_key: KeyVec,
data: Vec<u8>,
pub(crate) meta: Vec<BlockMeta>,
block_size: usize,
key_hashes: Vec<u32>,
}
impl SsTableBuilder {
/// Create a builder based on target block size.
pub fn new(block_size: usize) -> Self {
Self {
data: Vec::new(),
meta: Vec::new(),
first_key: KeyVec::new(),
last_key: KeyVec::new(),
block_size,
builder: BlockBuilder::new(block_size),
key_hashes: Vec::new(),
}
}
/// Adds a key-value pair to SSTable
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
if self.first_key.is_empty() {
self.first_key.set_from_slice(key);
}
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
if self.builder.add(key, value) {
self.last_key.set_from_slice(key);
return;
}
// create a new block builder and append block data
self.finish_block();
// add the key-value pair to the next block
assert!(self.builder.add(key, value));
self.first_key.set_from_slice(key);
self.last_key.set_from_slice(key);
}
/// Get the estimated size of the SSTable.
pub fn estimated_size(&self) -> usize {
self.data.len()
}
fn finish_block(&mut self) {
let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size));
let encoded_block = builder.build().encode();
self.meta.push(BlockMeta {
offset: self.data.len(),
first_key: std::mem::take(&mut self.first_key).into_key_bytes(),
last_key: std::mem::take(&mut self.last_key).into_key_bytes(),
});
self.data.extend(encoded_block);
}
/// Builds the SSTable and writes it to the given path. Use the `FileObject` structure to manipulate the disk objects.
pub fn build(
mut self,
id: usize,
block_cache: Option<Arc<BlockCache>>,
path: impl AsRef<Path>,
) -> Result<SsTable> {
self.finish_block();
let mut buf = self.data;
let meta_offset = buf.len();
BlockMeta::encode_block_meta(&self.meta, &mut buf);
buf.put_u32(meta_offset as u32);
let bloom = Bloom::build_from_key_hashes(
&self.key_hashes,
Bloom::bloom_bits_per_key(self.key_hashes.len(), 0.01),
);
let bloom_offset = buf.len();
bloom.encode(&mut buf);
buf.put_u32(bloom_offset as u32);
let file = FileObject::create(path.as_ref(), buf)?;
Ok(SsTable {
id,
file,
first_key: self.meta.first().unwrap().first_key.clone(),
last_key: self.meta.last().unwrap().last_key.clone(),
block_meta: self.meta,
block_meta_offset: meta_offset,
block_cache,
bloom: Some(bloom),
})
}
#[cfg(test)]
pub(crate) fn build_for_test(self, path: impl AsRef<Path>) -> Result<SsTable> {
self.build(0, None, path)
}
}

View File

@@ -0,0 +1,105 @@
use std::sync::Arc;
use anyhow::Result;
use super::SsTable;
use crate::block::BlockIterator;
use crate::iterators::StorageIterator;
use crate::key::KeySlice;
/// An iterator over the contents of an SSTable.
pub struct SsTableIterator {
table: Arc<SsTable>,
blk_iter: BlockIterator,
blk_idx: usize,
}
impl SsTableIterator {
fn seek_to_first_inner(table: &Arc<SsTable>) -> Result<(usize, BlockIterator)> {
Ok((
0,
BlockIterator::create_and_seek_to_first(table.read_block_cached(0)?),
))
}
/// Create a new iterator and seek to the first key-value pair.
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?;
let iter = Self {
blk_iter,
table,
blk_idx,
};
Ok(iter)
}
/// Seek to the first key-value pair.
pub fn seek_to_first(&mut self) -> Result<()> {
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?;
self.blk_idx = blk_idx;
self.blk_iter = blk_iter;
Ok(())
}
fn seek_to_key_inner(table: &Arc<SsTable>, key: KeySlice) -> Result<(usize, BlockIterator)> {
let mut blk_idx = table.find_block_idx(key);
let mut blk_iter =
BlockIterator::create_and_seek_to_key(table.read_block_cached(blk_idx)?, key);
if !blk_iter.is_valid() {
blk_idx += 1;
if blk_idx < table.num_of_blocks() {
blk_iter =
BlockIterator::create_and_seek_to_first(table.read_block_cached(blk_idx)?);
}
}
Ok((blk_idx, blk_iter))
}
/// Create a new iterator and seek to the first key-value pair which >= `key`.
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?;
let iter = Self {
blk_iter,
table,
blk_idx,
};
Ok(iter)
}
/// Seek to the first key-value pair which >= `key`.
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?;
self.blk_iter = blk_iter;
self.blk_idx = blk_idx;
Ok(())
}
}
impl StorageIterator for SsTableIterator {
type KeyType<'a> = KeySlice<'a>;
fn value(&self) -> &[u8] {
self.blk_iter.value()
}
fn key(&self) -> KeySlice {
self.blk_iter.key()
}
fn is_valid(&self) -> bool {
self.blk_iter.is_valid()
}
fn next(&mut self) -> Result<()> {
self.blk_iter.next();
if !self.blk_iter.is_valid() {
self.blk_idx += 1;
if self.blk_idx < self.table.num_of_blocks() {
self.blk_iter = BlockIterator::create_and_seek_to_first(
self.table.read_block_cached(self.blk_idx)?,
);
}
}
Ok(())
}
}