checkin initial MVCC codebase
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
113
mini-lsm-mvcc/src/table/bloom.rs
Normal file
113
mini-lsm-mvcc/src/table/bloom.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0.
|
||||
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
|
||||
/// Implements a bloom filter
|
||||
pub struct Bloom {
|
||||
/// data of filter in bits
|
||||
pub(crate) filter: Bytes,
|
||||
/// number of hash functions
|
||||
pub(crate) k: u8,
|
||||
}
|
||||
|
||||
pub trait BitSlice {
|
||||
fn get_bit(&self, idx: usize) -> bool;
|
||||
fn bit_len(&self) -> usize;
|
||||
}
|
||||
|
||||
pub trait BitSliceMut {
|
||||
fn set_bit(&mut self, idx: usize, val: bool);
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>> BitSlice for T {
|
||||
fn get_bit(&self, idx: usize) -> bool {
|
||||
let pos = idx / 8;
|
||||
let offset = idx % 8;
|
||||
(self.as_ref()[pos] & (1 << offset)) != 0
|
||||
}
|
||||
|
||||
fn bit_len(&self) -> usize {
|
||||
self.as_ref().len() * 8
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsMut<[u8]>> BitSliceMut for T {
|
||||
fn set_bit(&mut self, idx: usize, val: bool) {
|
||||
let pos = idx / 8;
|
||||
let offset = idx % 8;
|
||||
if val {
|
||||
self.as_mut()[pos] |= 1 << offset;
|
||||
} else {
|
||||
self.as_mut()[pos] &= !(1 << offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Bloom {
|
||||
/// Decode a bloom filter
|
||||
pub fn decode(buf: &[u8]) -> Self {
|
||||
let filter = &buf[..buf.len() - 1];
|
||||
let k = buf[buf.len() - 1];
|
||||
Self {
|
||||
filter: filter.to_vec().into(),
|
||||
k,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a bloom filter
|
||||
pub fn encode(&self, buf: &mut Vec<u8>) {
|
||||
buf.extend(&self.filter);
|
||||
buf.put_u8(self.k);
|
||||
}
|
||||
|
||||
/// Get bloom filter bits per key from entries count and FPR
|
||||
pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize {
|
||||
let size =
|
||||
-1.0 * (entries as f64) * false_positive_rate.ln() / std::f64::consts::LN_2.powi(2);
|
||||
let locs = (size / (entries as f64)).ceil();
|
||||
locs as usize
|
||||
}
|
||||
|
||||
/// Build bloom filter from key hashes
|
||||
pub fn build_from_key_hashes(keys: &[u32], bits_per_key: usize) -> Self {
|
||||
let k = (bits_per_key as f64 * 0.69) as u32;
|
||||
let k = k.min(30).max(1);
|
||||
let nbits = (keys.len() * bits_per_key).max(64);
|
||||
let nbytes = (nbits + 7) / 8;
|
||||
let nbits = nbytes * 8;
|
||||
let mut filter = BytesMut::with_capacity(nbytes);
|
||||
filter.resize(nbytes, 0);
|
||||
for h in keys {
|
||||
let mut h = *h;
|
||||
let delta = (h >> 17) | (h << 15);
|
||||
for _ in 0..k {
|
||||
let bit_pos = (h as usize) % nbits;
|
||||
filter.set_bit(bit_pos, true);
|
||||
h = h.wrapping_add(delta);
|
||||
}
|
||||
}
|
||||
Self {
|
||||
filter: filter.freeze(),
|
||||
k: k as u8,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a bloom filter may contain some data
|
||||
pub fn may_contain(&self, mut h: u32) -> bool {
|
||||
if self.k > 30 {
|
||||
// potential new encoding for short bloom filters
|
||||
true
|
||||
} else {
|
||||
let nbits = self.filter.bit_len();
|
||||
let delta = (h >> 17) | (h << 15);
|
||||
for _ in 0..self.k {
|
||||
let bit_pos = h % (nbits as u32);
|
||||
if !self.filter.get_bit(bit_pos as usize) {
|
||||
return false;
|
||||
}
|
||||
h = h.wrapping_add(delta);
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
112
mini-lsm-mvcc/src/table/builder.rs
Normal file
112
mini-lsm-mvcc/src/table/builder.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use bytes::BufMut;
|
||||
|
||||
use super::bloom::Bloom;
|
||||
use super::{BlockMeta, FileObject, SsTable};
|
||||
use crate::block::BlockBuilder;
|
||||
use crate::key::{KeySlice, KeyVec};
|
||||
use crate::lsm_storage::BlockCache;
|
||||
|
||||
/// Builds an SSTable from key-value pairs.
|
||||
pub struct SsTableBuilder {
|
||||
builder: BlockBuilder,
|
||||
first_key: KeyVec,
|
||||
last_key: KeyVec,
|
||||
data: Vec<u8>,
|
||||
pub(crate) meta: Vec<BlockMeta>,
|
||||
block_size: usize,
|
||||
key_hashes: Vec<u32>,
|
||||
}
|
||||
|
||||
impl SsTableBuilder {
|
||||
/// Create a builder based on target block size.
|
||||
pub fn new(block_size: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::new(),
|
||||
meta: Vec::new(),
|
||||
first_key: KeyVec::new(),
|
||||
last_key: KeyVec::new(),
|
||||
block_size,
|
||||
builder: BlockBuilder::new(block_size),
|
||||
key_hashes: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a key-value pair to SSTable
|
||||
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
|
||||
if self.first_key.is_empty() {
|
||||
self.first_key.set_from_slice(key);
|
||||
}
|
||||
|
||||
self.key_hashes.push(farmhash::fingerprint32(key.raw_ref()));
|
||||
|
||||
if self.builder.add(key, value) {
|
||||
self.last_key.set_from_slice(key);
|
||||
return;
|
||||
}
|
||||
|
||||
// create a new block builder and append block data
|
||||
self.finish_block();
|
||||
|
||||
// add the key-value pair to the next block
|
||||
assert!(self.builder.add(key, value));
|
||||
self.first_key.set_from_slice(key);
|
||||
self.last_key.set_from_slice(key);
|
||||
}
|
||||
|
||||
/// Get the estimated size of the SSTable.
|
||||
pub fn estimated_size(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
fn finish_block(&mut self) {
|
||||
let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size));
|
||||
let encoded_block = builder.build().encode();
|
||||
self.meta.push(BlockMeta {
|
||||
offset: self.data.len(),
|
||||
first_key: std::mem::take(&mut self.first_key).into_key_bytes(),
|
||||
last_key: std::mem::take(&mut self.last_key).into_key_bytes(),
|
||||
});
|
||||
self.data.extend(encoded_block);
|
||||
}
|
||||
|
||||
/// Builds the SSTable and writes it to the given path. Use the `FileObject` structure to manipulate the disk objects.
|
||||
pub fn build(
|
||||
mut self,
|
||||
id: usize,
|
||||
block_cache: Option<Arc<BlockCache>>,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<SsTable> {
|
||||
self.finish_block();
|
||||
let mut buf = self.data;
|
||||
let meta_offset = buf.len();
|
||||
BlockMeta::encode_block_meta(&self.meta, &mut buf);
|
||||
buf.put_u32(meta_offset as u32);
|
||||
let bloom = Bloom::build_from_key_hashes(
|
||||
&self.key_hashes,
|
||||
Bloom::bloom_bits_per_key(self.key_hashes.len(), 0.01),
|
||||
);
|
||||
let bloom_offset = buf.len();
|
||||
bloom.encode(&mut buf);
|
||||
buf.put_u32(bloom_offset as u32);
|
||||
let file = FileObject::create(path.as_ref(), buf)?;
|
||||
Ok(SsTable {
|
||||
id,
|
||||
file,
|
||||
first_key: self.meta.first().unwrap().first_key.clone(),
|
||||
last_key: self.meta.last().unwrap().last_key.clone(),
|
||||
block_meta: self.meta,
|
||||
block_meta_offset: meta_offset,
|
||||
block_cache,
|
||||
bloom: Some(bloom),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn build_for_test(self, path: impl AsRef<Path>) -> Result<SsTable> {
|
||||
self.build(0, None, path)
|
||||
}
|
||||
}
|
||||
105
mini-lsm-mvcc/src/table/iterator.rs
Normal file
105
mini-lsm-mvcc/src/table/iterator.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use super::SsTable;
|
||||
use crate::block::BlockIterator;
|
||||
use crate::iterators::StorageIterator;
|
||||
use crate::key::KeySlice;
|
||||
|
||||
/// An iterator over the contents of an SSTable.
|
||||
pub struct SsTableIterator {
|
||||
table: Arc<SsTable>,
|
||||
blk_iter: BlockIterator,
|
||||
blk_idx: usize,
|
||||
}
|
||||
|
||||
impl SsTableIterator {
|
||||
fn seek_to_first_inner(table: &Arc<SsTable>) -> Result<(usize, BlockIterator)> {
|
||||
Ok((
|
||||
0,
|
||||
BlockIterator::create_and_seek_to_first(table.read_block_cached(0)?),
|
||||
))
|
||||
}
|
||||
|
||||
/// Create a new iterator and seek to the first key-value pair.
|
||||
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?;
|
||||
let iter = Self {
|
||||
blk_iter,
|
||||
table,
|
||||
blk_idx,
|
||||
};
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
/// Seek to the first key-value pair.
|
||||
pub fn seek_to_first(&mut self) -> Result<()> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?;
|
||||
self.blk_idx = blk_idx;
|
||||
self.blk_iter = blk_iter;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn seek_to_key_inner(table: &Arc<SsTable>, key: KeySlice) -> Result<(usize, BlockIterator)> {
|
||||
let mut blk_idx = table.find_block_idx(key);
|
||||
let mut blk_iter =
|
||||
BlockIterator::create_and_seek_to_key(table.read_block_cached(blk_idx)?, key);
|
||||
if !blk_iter.is_valid() {
|
||||
blk_idx += 1;
|
||||
if blk_idx < table.num_of_blocks() {
|
||||
blk_iter =
|
||||
BlockIterator::create_and_seek_to_first(table.read_block_cached(blk_idx)?);
|
||||
}
|
||||
}
|
||||
Ok((blk_idx, blk_iter))
|
||||
}
|
||||
|
||||
/// Create a new iterator and seek to the first key-value pair which >= `key`.
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?;
|
||||
let iter = Self {
|
||||
blk_iter,
|
||||
table,
|
||||
blk_idx,
|
||||
};
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
/// Seek to the first key-value pair which >= `key`.
|
||||
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?;
|
||||
self.blk_iter = blk_iter;
|
||||
self.blk_idx = blk_idx;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageIterator for SsTableIterator {
|
||||
type KeyType<'a> = KeySlice<'a>;
|
||||
|
||||
fn value(&self) -> &[u8] {
|
||||
self.blk_iter.value()
|
||||
}
|
||||
|
||||
fn key(&self) -> KeySlice {
|
||||
self.blk_iter.key()
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
self.blk_iter.is_valid()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
self.blk_iter.next();
|
||||
if !self.blk_iter.is_valid() {
|
||||
self.blk_idx += 1;
|
||||
if self.blk_idx < self.table.num_of_blocks() {
|
||||
self.blk_iter = BlockIterator::create_and_seek_to_first(
|
||||
self.table.read_block_cached(self.blk_idx)?,
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user