From e8601433d61bc92ddb37e74c74abc5f001f4f4a5 Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Sun, 21 Jan 2024 19:33:05 +0800 Subject: [PATCH] finish week 1 day 7 Signed-off-by: Alex Chi --- Cargo.lock | 7 ++ README.md | 2 +- .../src/week1-07-sst-optimizations.md | 95 +++++++++++++++- mini-lsm-starter/Cargo.toml | 1 + mini-lsm-starter/src/table.rs | 5 + mini-lsm-starter/src/table/bloom.rs | 103 ++++++++++++++++++ mini-lsm/Cargo.toml | 8 ++ mini-lsm/src/tests/week1_day7.rs | 80 ++++++++++++++ 8 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 mini-lsm-starter/src/table/bloom.rs create mode 100644 mini-lsm/src/tests/week1_day7.rs diff --git a/Cargo.lock b/Cargo.lock index 9684f2c..9615396 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,6 +274,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "farmhash" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f35ce9c8fb9891c75ceadbc330752951a4e369b50af10775955aeb9af3eee34b" + [[package]] name = "fastrand" version = "1.8.0" @@ -434,6 +440,7 @@ dependencies = [ "crossbeam-channel", "crossbeam-epoch", "crossbeam-skiplist", + "farmhash", "moka", "ouroboros 0.18.2", "parking_lot", diff --git a/README.md b/README.md index 5aecd2a..eae2e4d 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ We are working on a new version of the mini-lsm tutorial that is split into 3 we | 1.4 | Table Format | ✅ | ✅ | ✅ | | 1.5 | Storage Engine - Read Path | ✅ | ✅ | ✅ | | 1.6 | Storage Engine - Write Path | ✅ | ✅ | ✅ | -| 1.7 | Bloom Filter and Key Compression | | | | +| 1.7 | Bloom Filter and Key Compression | ✅ | ✅ | ✅ | | 2.1 | Compaction Implementation | ✅ | 🚧 | 🚧 | | 2.2 | Compaction Strategy - Simple | ✅ | 🚧 | 🚧 | | 2.3 | Compaction Strategy - Tiered | ✅ | 🚧 | | diff --git a/mini-lsm-book/src/week1-07-sst-optimizations.md b/mini-lsm-book/src/week1-07-sst-optimizations.md index 91f637e..ea87eb1 100644 --- a/mini-lsm-book/src/week1-07-sst-optimizations.md +++ b/mini-lsm-book/src/week1-07-sst-optimizations.md @@ -19,14 +19,107 @@ cargo x scheck ## Task 1: Bloom Filters +Bloom filters are probabilistic data structures that maintains a set of keys. You can add keys to a bloom filter, and you can know what key may exist / must not exist in the set of keys being added to the bloom filter. + +You usually need to have a hash function in order to construct a bloom filter, and a key can have multiple hashes. Let us take a look at the below example. Assume that we already have hashes of some keys and the bloom filter has 7 bits. + +```plaintext +hash1 = ((character - a) * 13) % 7 +hash1 = ((character - a) * 11) % 7 +b -> 6 4 +c -> 5 1 +d -> 4 5 +e -> 3 2 +g -> 1 3 +h -> 0 0 +``` + +If we insert b, c, d into the 6-bit bloom filter, we will get: + +``` + bit 0123456 +insert b 1 1 +insert c 1 1 +insert d 11 +result 0101111 +``` + +When probing the bloom filter, we generate the hashes for a key, and see if the corresponding bit has been set. If all of them are set to true, then the key may exist in the bloom filter. Otherwise, the key must NOT exist in the bloom filter. + +For `e -> 3 2`, as the bit 2 is not set, it should not be in the original set. For `g -> 1 3`, because two bits are all set, it may or may not exist in the set. For `h -> 0 0`, both of the bits (actually it's one bit) are not set, and therefore it should not be in the original set. + +``` +b -> maybe (actual: yes) +c -> maybe (actual: yes) +d -> maybe (actual: yes) +e -> MUST not (actual: no) +g -> maybe (actual: no) +h -> MUST not (actual: no) +``` + +Remember that at the end of last chapter, we implemented SST filtering based on key range. Now, on the `get` read path, we can also use the bloom filter to ignore SSTs that do not contain the key that the user wants to lookup, therefore reducing the number of files to be read from the disk. + +In this task, you will need to modify: + +``` +src/table/bloom.rs +``` + +In the implementation, you will build a bloom filter from key hashes (which are u32 numbers). For each of the hash, you will need to set `k` bits. The bits are computed by: + +```rust,no_run +let delta = (h >> 17) | (h << 15); // h is the key hash +for _ in 0..k { + // TODO: use the hash to set the corresponding bit + h = h.wrapping_add(delta); +} +``` + +We provide all the skeleton code for doing the magic mathematics. You only need to implement the procedure of building a bloom filter and probing a bloom filter. + ## Task 2: Integrate Bloom Filter on the Read Path +In this task, you will need to modify: + +``` +src/table/builder.rs +src/table.rs +src/lsm_storage.rs +``` + +For the bloom filter encoding, you can append the bloom filter to the end of your SST file. You will need to store the bloom filter offset at the end of the file, and compute meta offsets accordingly. + +We use the `farmhash` crate to compute the hashes of the keys. When building the SST, you will need also to build the bloom filter by computing the key hash using `farmhash::fingerprint32`. You will need to encode/decode the bloom filters with the block meta. You can choose false positive rate 0.01 for your bloom filter. You may need to add new fields to the structures apart from the ones provided in the starter code as necessary. + +After that, you can modify the `get` read path to filter SSTs based on bloom filters. + +We do not have integration test for this part and you will need to ensure that your implementation still pass all previous chapter tests. + ## Task 3: Key Compression Encoding + Decoding +In this task, you will need to modify: + +``` +src/block/builder.rs +src/block/iterator.rs +``` + +As the SST file stores keys in order, it is possible that the user stores keys of the same prefix, and we can compress the prefix in the SST encoding so as to save space. + +We compare the current key with the first key in the block. We store the key as follows: + +``` +key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len) +``` + +The `key_overlap_len` indicates how many bytes are the same as the first key in the block. For example, if we see a record: `5|3|LSM`, where the first key in the block is `mini-something`, we can recover the current key to `mini-LSM`. + +After you finish the encoding, you will also need to implement decoding in the block iterator. You may need to add new fields to the structures apart from the ones provided in the starter code as necessary. + ## Test Your Understanding * How does the bloom filter help with the SST filtering process? What kind of information can it tell you about a key? (may not exist/may exist/must exist/must not exist) -* Consider the case that we need a backward iterator. How does key compression affect backward iterators? Any way to improve it? +* Consider the case that we need a backward iterator. Does our key compression affect backward iterators? * Can you use bloom filters on scan? We do not provide reference answers to the questions, and feel free to discuss about them in the Discord community. diff --git a/mini-lsm-starter/Cargo.toml b/mini-lsm-starter/Cargo.toml index 611436d..5a2fe17 100644 --- a/mini-lsm-starter/Cargo.toml +++ b/mini-lsm-starter/Cargo.toml @@ -18,6 +18,7 @@ rand = "0.8.5" crossbeam-channel = "0.5.11" serde_json = { version = "1.0" } serde = { version = "1.0", features = ["derive"] } +farmhash = "1" [dev-dependencies] tempfile = "3" diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index 7cc6e84..e5241dd 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -1,6 +1,7 @@ #![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod #![allow(dead_code)] // TODO(you): remove this lint after implementing this mod +pub(crate) mod bloom; mod builder; mod iterator; @@ -16,6 +17,8 @@ pub use iterator::SsTableIterator; use crate::block::Block; use crate::lsm_storage::BlockCache; +use self::bloom::Bloom; + #[derive(Clone, Debug, PartialEq, Eq)] pub struct BlockMeta { /// Offset of this data block. @@ -91,6 +94,7 @@ pub struct SsTable { block_cache: Option>, first_key: Bytes, last_key: Bytes, + pub(crate) bloom: Option, } impl SsTable { @@ -114,6 +118,7 @@ impl SsTable { block_cache: None, first_key, last_key, + bloom: None, } } diff --git a/mini-lsm-starter/src/table/bloom.rs b/mini-lsm-starter/src/table/bloom.rs new file mode 100644 index 0000000..0ab9796 --- /dev/null +++ b/mini-lsm-starter/src/table/bloom.rs @@ -0,0 +1,103 @@ +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +use bytes::{BufMut, Bytes, BytesMut}; + +/// Implements a bloom filter +pub struct Bloom { + /// data of filter in bits + pub(crate) filter: Bytes, + /// number of hash functions + pub(crate) k: u8, +} + +pub trait BitSlice { + fn get_bit(&self, idx: usize) -> bool; + fn bit_len(&self) -> usize; +} + +pub trait BitSliceMut { + fn set_bit(&mut self, idx: usize, val: bool); +} + +impl> BitSlice for T { + fn get_bit(&self, idx: usize) -> bool { + let pos = idx / 8; + let offset = idx % 8; + (self.as_ref()[pos] & (1 << offset)) != 0 + } + + fn bit_len(&self) -> usize { + self.as_ref().len() * 8 + } +} + +impl> BitSliceMut for T { + fn set_bit(&mut self, idx: usize, val: bool) { + let pos = idx / 8; + let offset = idx % 8; + if val { + self.as_mut()[pos] |= 1 << offset; + } else { + self.as_mut()[pos] &= !(1 << offset); + } + } +} + +impl Bloom { + /// Decode a bloom filter + pub fn decode(buf: &[u8]) -> Self { + let filter = &buf[..buf.len() - 1]; + let k = buf[buf.len() - 1]; + Self { + filter: filter.to_vec().into(), + k, + } + } + + /// Encode a bloom filter + pub fn encode(&self, buf: &mut Vec) { + buf.extend(&self.filter); + buf.put_u8(self.k); + } + + /// Get bloom filter bits per key from entries count and FPR + pub fn bloom_bits_per_key(entries: usize, false_positive_rate: f64) -> usize { + let size = + -1.0 * (entries as f64) * false_positive_rate.ln() / std::f64::consts::LN_2.powi(2); + let locs = (size / (entries as f64)).ceil(); + locs as usize + } + + /// Build bloom filter from key hashes + pub fn build_from_key_hashes(keys: &[u32], bits_per_key: usize) -> Self { + let k = (bits_per_key as f64 * 0.69) as u32; + let k = k.min(30).max(1); + let nbits = (keys.len() * bits_per_key).max(64); + let nbytes = (nbits + 7) / 8; + let nbits = nbytes * 8; + let mut filter = BytesMut::with_capacity(nbytes); + filter.resize(nbytes, 0); + + // TODO: build the bloom filter + + Self { + filter: filter.freeze(), + k: k as u8, + } + } + + /// Check if a bloom filter may contain some data + pub fn may_contain(&self, h: u32) -> bool { + if self.k > 30 { + // potential new encoding for short bloom filters + true + } else { + let nbits = self.filter.bit_len(); + let delta = (h >> 17) | (h << 15); + + // TODO: probe the bloom filter + + true + } + } +} diff --git a/mini-lsm/Cargo.toml b/mini-lsm/Cargo.toml index 9788ed1..a73722a 100644 --- a/mini-lsm/Cargo.toml +++ b/mini-lsm/Cargo.toml @@ -26,3 +26,11 @@ serde = { version = "1.0", features = ["derive"] } [dev-dependencies] tempfile = "3" + +[[bin]] +name = "mini-lsm-cli-ref" +path = "src/bin/mini-lsm-cli.rs" + +[[bin]] +name = "mini-lsm-wrapper-ref" +path = "src/bin/wrapper.rs" diff --git a/mini-lsm/src/tests/week1_day7.rs b/mini-lsm/src/tests/week1_day7.rs new file mode 100644 index 0000000..54694d0 --- /dev/null +++ b/mini-lsm/src/tests/week1_day7.rs @@ -0,0 +1,80 @@ +use tempfile::tempdir; + +use crate::table::{bloom::Bloom, FileObject, SsTable, SsTableBuilder}; + +fn key_of(idx: usize) -> Vec { + format!("key_{:010}", idx * 5).into_bytes() +} + +fn value_of(idx: usize) -> Vec { + format!("value_{:010}", idx).into_bytes() +} + +fn num_of_keys() -> usize { + 100 +} + +#[test] +fn test_task1_bloom_filter() { + let mut key_hashes = Vec::new(); + for idx in 0..num_of_keys() { + let key = key_of(idx); + key_hashes.push(farmhash::fingerprint32(&key)); + } + let bits_per_key = Bloom::bloom_bits_per_key(key_hashes.len(), 0.01); + println!("bits per key: {}", bits_per_key); + let bloom = Bloom::build_from_key_hashes(&key_hashes, bits_per_key); + println!("bloom size: {}, k={}", bloom.filter.len(), bloom.k); + assert!(bloom.k < 30); + for idx in 0..num_of_keys() { + let key = key_of(idx); + assert!(bloom.may_contain(farmhash::fingerprint32(&key))); + } + let mut x = 0; + let mut cnt = 0; + for idx in num_of_keys()..(num_of_keys() * 10) { + let key = key_of(idx); + if bloom.may_contain(farmhash::fingerprint32(&key)) { + x += 1; + } + cnt += 1; + } + assert_ne!(x, cnt, "bloom filter not taking effect?"); + assert_ne!(x, 0, "bloom filter not taking effect?"); +} + +#[test] +fn test_task2_sst_decode() { + let mut builder = SsTableBuilder::new(128); + for idx in 0..num_of_keys() { + let key = key_of(idx); + let value = value_of(idx); + builder.add(&key[..], &value[..]); + } + let dir = tempdir().unwrap(); + let path = dir.path().join("1.sst"); + let sst = builder.build_for_test(&path).unwrap(); + let sst2 = SsTable::open(0, None, FileObject::open(&path).unwrap()).unwrap(); + let bloom_1 = sst.bloom.as_ref().unwrap(); + let bloom_2 = sst2.bloom.as_ref().unwrap(); + assert_eq!(bloom_1.k, bloom_2.k); + assert_eq!(bloom_1.filter, bloom_2.filter); +} + +#[test] +fn test_task3_block_key_compression() { + let mut builder = SsTableBuilder::new(128); + for idx in 0..num_of_keys() { + let key = key_of(idx); + let value = value_of(idx); + builder.add(&key[..], &value[..]); + } + let dir = tempdir().unwrap(); + let path = dir.path().join("1.sst"); + let sst = builder.build_for_test(&path).unwrap(); + assert!( + sst.block_meta.len() <= 25, + "you have {} blocks, expect 25", + sst.block_meta.len() + ); +}