feat(code): add sst iterator / day 2
Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
75
mini-lsm/src/table/builder.rs
Normal file
75
mini-lsm/src/table/builder.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use anyhow::Result;
|
||||
use bytes::BufMut;
|
||||
use std::path::Path;
|
||||
|
||||
use super::{BlockMeta, FileObject, SsTable};
|
||||
use crate::block::BlockBuilder;
|
||||
|
||||
pub struct SsTableBuilder {
|
||||
builder: BlockBuilder,
|
||||
first_key: Vec<u8>,
|
||||
data: Vec<u8>,
|
||||
pub(super) meta: Vec<BlockMeta>,
|
||||
target_size: usize,
|
||||
block_size: usize,
|
||||
}
|
||||
|
||||
impl SsTableBuilder {
|
||||
pub fn new(target_size: usize, block_size: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::new(),
|
||||
meta: Vec::new(),
|
||||
first_key: Vec::new(),
|
||||
target_size,
|
||||
block_size,
|
||||
builder: BlockBuilder::new(block_size),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
if self.data.len() > self.target_size {
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.first_key.is_empty() {
|
||||
self.first_key = key.to_vec();
|
||||
}
|
||||
|
||||
if self.builder.add(key, value) {
|
||||
return true;
|
||||
}
|
||||
// create a new block builder and append block data
|
||||
self.finish_block();
|
||||
|
||||
// add the key-value pair to the next block
|
||||
assert!(self.builder.add(key, value));
|
||||
self.first_key = key.to_vec();
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn finish_block(&mut self) {
|
||||
let builder = std::mem::replace(&mut self.builder, BlockBuilder::new(self.block_size));
|
||||
let encoded_block = builder.build().encode();
|
||||
self.meta.push(BlockMeta {
|
||||
offset: self.data.len(),
|
||||
first_key: std::mem::take(&mut self.first_key).into(),
|
||||
});
|
||||
self.data.extend(encoded_block);
|
||||
}
|
||||
|
||||
pub fn build(mut self, path: impl AsRef<Path>) -> Result<SsTable> {
|
||||
self.finish_block();
|
||||
let mut buf = self.data;
|
||||
let meta_offset = buf.len();
|
||||
BlockMeta::encode_block_meta(&self.meta, &mut buf);
|
||||
buf.put_u32(meta_offset as u32);
|
||||
let file = FileObject::create(path.as_ref(), buf)?;
|
||||
Ok(SsTable {
|
||||
file,
|
||||
block_metas: self.meta,
|
||||
block_meta_offset: meta_offset,
|
||||
})
|
||||
}
|
||||
}
|
||||
91
mini-lsm/src/table/iterator.rs
Normal file
91
mini-lsm/src/table/iterator.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use anyhow::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::SsTable;
|
||||
use crate::block::BlockIterator;
|
||||
|
||||
pub struct SsTableIterator {
|
||||
table: Arc<SsTable>,
|
||||
blk_iter: BlockIterator,
|
||||
blk_idx: usize,
|
||||
}
|
||||
|
||||
impl SsTableIterator {
|
||||
fn seek_to_first_inner(table: &Arc<SsTable>) -> Result<(usize, BlockIterator)> {
|
||||
Ok((
|
||||
0,
|
||||
BlockIterator::create_and_seek_to_first(table.read_block(0)?),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?;
|
||||
let iter = Self {
|
||||
blk_iter,
|
||||
table,
|
||||
blk_idx,
|
||||
};
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
pub fn seek_to_first(&mut self) -> Result<()> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?;
|
||||
self.blk_idx = blk_idx;
|
||||
self.blk_iter = blk_iter;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn seek_to_key_inner(table: &Arc<SsTable>, key: &[u8]) -> Result<(usize, BlockIterator)> {
|
||||
let mut blk_idx = table.find_block_idx(key);
|
||||
let mut blk_iter = BlockIterator::create_and_seek_to_key(table.read_block(blk_idx)?, key);
|
||||
if !blk_iter.is_valid() {
|
||||
blk_idx += 1;
|
||||
if blk_idx < table.num_of_blocks() {
|
||||
blk_iter = BlockIterator::create_and_seek_to_first(table.read_block(blk_idx)?);
|
||||
}
|
||||
}
|
||||
Ok((blk_idx, blk_iter))
|
||||
}
|
||||
|
||||
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?;
|
||||
let iter = Self {
|
||||
blk_iter,
|
||||
table,
|
||||
blk_idx,
|
||||
};
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
|
||||
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?;
|
||||
self.blk_iter = blk_iter;
|
||||
self.blk_idx = blk_idx;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn key(&self) -> &[u8] {
|
||||
self.blk_iter.key()
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &[u8] {
|
||||
self.blk_iter.value()
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.blk_iter.is_valid()
|
||||
}
|
||||
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Result<()> {
|
||||
self.blk_iter.next();
|
||||
if !self.blk_iter.is_valid() {
|
||||
self.blk_idx += 1;
|
||||
if self.blk_idx < self.table.num_of_blocks() {
|
||||
self.blk_iter =
|
||||
BlockIterator::create_and_seek_to_first(self.table.read_block(self.blk_idx)?);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
134
mini-lsm/src/table/tests.rs
Normal file
134
mini-lsm/src/table/tests.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::table::SsTableBuilder;
|
||||
|
||||
use super::{SsTable, SsTableIterator};
|
||||
|
||||
#[test]
|
||||
fn test_sst_build_single_key() {
|
||||
let mut builder = SsTableBuilder::new(16, 16);
|
||||
assert!(builder.add(b"233", b"233333"));
|
||||
builder.build("").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_build_two_blocks() {
|
||||
let mut builder = SsTableBuilder::new(1024, 16);
|
||||
assert!(builder.add(b"11", b"11"));
|
||||
assert!(builder.add(b"22", b"22"));
|
||||
assert!(builder.add(b"33", b"11"));
|
||||
assert!(builder.add(b"44", b"22"));
|
||||
assert!(builder.add(b"55", b"11"));
|
||||
assert!(builder.add(b"66", b"22"));
|
||||
assert!(builder.meta.len() >= 2);
|
||||
builder.build("").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_build_full() {
|
||||
let mut builder = SsTableBuilder::new(32, 16);
|
||||
assert!(builder.add(b"11", b"11"));
|
||||
assert!(builder.add(b"22", b"22"));
|
||||
assert!(builder.add(b"33", b"11"));
|
||||
assert!(builder.add(b"44", b"22"));
|
||||
assert!(!builder.add(b"55", b"11"));
|
||||
builder.build("").unwrap();
|
||||
}
|
||||
|
||||
fn key_of(idx: usize) -> Vec<u8> {
|
||||
format!("key_{:03}", idx * 5).into_bytes()
|
||||
}
|
||||
|
||||
fn value_of(idx: usize) -> Vec<u8> {
|
||||
format!("value_{:010}", idx).into_bytes()
|
||||
}
|
||||
|
||||
fn num_of_keys() -> usize {
|
||||
100
|
||||
}
|
||||
|
||||
fn generate_sst() -> SsTable {
|
||||
let mut builder = SsTableBuilder::new(65536, 128);
|
||||
for idx in 0..num_of_keys() {
|
||||
let key = key_of(idx);
|
||||
let value = value_of(idx);
|
||||
assert!(builder.add(&key[..], &value[..]));
|
||||
}
|
||||
builder.build("").unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_build_all() {
|
||||
generate_sst();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_decode() {
|
||||
let sst = generate_sst();
|
||||
let meta = sst.block_metas.clone();
|
||||
let new_sst = SsTable::open(sst.file).unwrap();
|
||||
assert_eq!(new_sst.block_metas, meta);
|
||||
}
|
||||
|
||||
fn as_bytes(x: &[u8]) -> Bytes {
|
||||
Bytes::copy_from_slice(x)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_iterator() {
|
||||
let sst = Arc::new(generate_sst());
|
||||
let mut iter = SsTableIterator::create_and_seek_to_first(sst).unwrap();
|
||||
for _ in 0..5 {
|
||||
for i in 0..num_of_keys() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
value_of(i),
|
||||
"expected value: {:?}, actual value: {:?}",
|
||||
as_bytes(&value_of(i)),
|
||||
as_bytes(value)
|
||||
);
|
||||
iter.next().unwrap();
|
||||
}
|
||||
iter.seek_to_first().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sst_seek_key() {
|
||||
let sst = Arc::new(generate_sst());
|
||||
let mut iter = SsTableIterator::create_and_seek_to_key(sst, &key_of(0)).unwrap();
|
||||
for offset in 1..=5 {
|
||||
for i in 0..num_of_keys() {
|
||||
let key = iter.key();
|
||||
let value = iter.value();
|
||||
assert_eq!(
|
||||
key,
|
||||
key_of(i),
|
||||
"expected key: {:?}, actual key: {:?}",
|
||||
as_bytes(&key_of(i)),
|
||||
as_bytes(key)
|
||||
);
|
||||
assert_eq!(
|
||||
value,
|
||||
value_of(i),
|
||||
"expected value: {:?}, actual value: {:?}",
|
||||
as_bytes(&value_of(i)),
|
||||
as_bytes(value)
|
||||
);
|
||||
iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes())
|
||||
.unwrap();
|
||||
}
|
||||
iter.seek_to_key(b"k").unwrap();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user