@@ -18,6 +18,7 @@ rand = "0.8.5"
|
||||
crossbeam-channel = "0.5.11"
|
||||
serde_json = { version = "1.0" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
farmhash = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
|
||||
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
|
||||
|
||||
pub(crate) mod bloom;
|
||||
mod builder;
|
||||
mod iterator;
|
||||
|
||||
@@ -16,6 +17,8 @@ pub use iterator::SsTableIterator;
|
||||
use crate::block::Block;
|
||||
use crate::lsm_storage::BlockCache;
|
||||
|
||||
use self::bloom::Bloom;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct BlockMeta {
|
||||
/// Offset of this data block.
|
||||
@@ -91,6 +94,7 @@ pub struct SsTable {
|
||||
block_cache: Option<Arc<BlockCache>>,
|
||||
first_key: Bytes,
|
||||
last_key: Bytes,
|
||||
pub(crate) bloom: Option<Bloom>,
|
||||
}
|
||||
|
||||
impl SsTable {
|
||||
@@ -114,6 +118,7 @@ impl SsTable {
|
||||
block_cache: None,
|
||||
first_key,
|
||||
last_key,
|
||||
bloom: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
103
mini-lsm-starter/src/table/bloom.rs
Normal file
103
mini-lsm-starter/src/table/bloom.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0.
|
||||
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
|
||||
/// Implements a bloom filter
|
||||
pub struct Bloom {
|
||||
/// data of filter in bits
|
||||
pub(crate) filter: Bytes,
|
||||
/// number of hash functions
|
||||
pub(crate) k: u8,
|
||||
}
|
||||
|
||||
pub trait BitSlice {
|
||||
fn get_bit(&self, idx: usize) -> bool;
|
||||
fn bit_len(&self) -> usize;
|
||||
}
|
||||
|
||||
pub trait BitSliceMut {
|
||||
fn set_bit(&mut self, idx: usize, val: bool);
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>> BitSlice for T {
|
||||
fn get_bit(&self, idx: usize) -> bool {
|
||||
let pos = idx / 8;
|
||||
let offset = idx % 8;
|
||||
(self.as_ref()[pos] & (1 << offset)) != 0
|
||||
}
|
||||
|
||||
fn bit_len(&self) -> usize {
|
||||
self.as_ref().len() * 8
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsMut<[u8]>> BitSliceMut for T {
|
||||
fn set_bit(&mut self, idx: usize, val: bool) {
|
||||
let pos = idx / 8;
|
||||
let offset = idx % 8;
|
||||
if val {
|
||||
self.as_mut()[pos] |= 1 << offset;
|
||||
} else {
|
||||
self.as_mut()[pos] &= !(1 << offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Bloom {
|
||||
/// Decode a bloom filter
|
||||
pub fn decode(buf: &[u8]) -> Self {
|
||||
let filter = &buf[..buf.len() - 1];
|
||||
let k = buf[buf.len() - 1];
|
||||
Self {
|
||||
filter: filter.to_vec().into(),
|
||||
k,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a bloom filter
|
||||
pub fn encode(&self, buf: &mut Vec<u8>) {
|
||||
buf.extend(&self.filter);
|
||||
buf.put_u8(self.k);
|
||||
}
|
||||
|
||||
/// Get bloom filter bits per key from entries count and FPR
|
||||
pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize {
|
||||
let size =
|
||||
-1.0 * (entries as f64) * false_positive_rate.ln() / std::f64::consts::LN_2.powi(2);
|
||||
let locs = (size / (entries as f64)).ceil();
|
||||
locs as usize
|
||||
}
|
||||
|
||||
/// Build bloom filter from key hashes
|
||||
pub fn build_from_key_hashes(keys: &[u32], bits_per_key: usize) -> Self {
|
||||
let k = (bits_per_key as f64 * 0.69) as u32;
|
||||
let k = k.min(30).max(1);
|
||||
let nbits = (keys.len() * bits_per_key).max(64);
|
||||
let nbytes = (nbits + 7) / 8;
|
||||
let nbits = nbytes * 8;
|
||||
let mut filter = BytesMut::with_capacity(nbytes);
|
||||
filter.resize(nbytes, 0);
|
||||
|
||||
// TODO: build the bloom filter
|
||||
|
||||
Self {
|
||||
filter: filter.freeze(),
|
||||
k: k as u8,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a bloom filter may contain some data
|
||||
pub fn may_contain(&self, h: u32) -> bool {
|
||||
if self.k > 30 {
|
||||
// potential new encoding for short bloom filters
|
||||
true
|
||||
} else {
|
||||
let nbits = self.filter.bit_len();
|
||||
let delta = (h >> 17) | (h << 15);
|
||||
|
||||
// TODO: probe the bloom filter
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user