From 81bd37252498a83c949e37fee5ee63bb48953cab Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Wed, 17 Jan 2024 16:49:12 +0800 Subject: [PATCH] record last key in meta Signed-off-by: Alex Chi Z --- mini-lsm/src/table.rs | 52 +++++++++++++++++++++++++++++++---- mini-lsm/src/table/builder.rs | 16 +++++++++-- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs index 831c65b..bd50002 100644 --- a/mini-lsm/src/table.rs +++ b/mini-lsm/src/table.rs @@ -10,7 +10,7 @@ pub use builder::SsTableBuilder; use bytes::{Buf, BufMut, Bytes}; pub use iterator::SsTableIterator; -use crate::block::Block; +use crate::block::{self, Block}; use crate::lsm_storage::BlockCache; #[derive(Clone, Debug, PartialEq, Eq)] @@ -19,6 +19,8 @@ pub struct BlockMeta { pub offset: usize, /// The first key of the data block. pub first_key: Bytes, + /// The last key of the data block. + pub last_key: Bytes, } impl BlockMeta { @@ -32,6 +34,10 @@ impl BlockMeta { estimated_size += std::mem::size_of::(); // The size of actual key estimated_size += meta.first_key.len(); + // The size of key length + estimated_size += std::mem::size_of::(); + // The size of actual key + estimated_size += meta.first_key.len(); } // Reserve the space to improve performance, especially when the size of incoming data is // large @@ -41,6 +47,8 @@ impl BlockMeta { buf.put_u32(meta.offset as u32); buf.put_u16(meta.first_key.len() as u16); buf.put_slice(&meta.first_key); + buf.put_u16(meta.last_key.len() as u16); + buf.put_slice(&meta.last_key); } assert_eq!(estimated_size, buf.len() - original_len); } @@ -52,7 +60,13 @@ impl BlockMeta { let offset = buf.get_u32() as usize; let first_key_len = buf.get_u16() as usize; let first_key = buf.copy_to_bytes(first_key_len); - block_meta.push(BlockMeta { offset, first_key }); + let last_key_len = buf.get_u16() as usize; + let last_key = buf.copy_to_bytes(last_key_len); + block_meta.push(BlockMeta { + offset, + first_key, + last_key, + }); } block_meta } @@ -82,13 +96,16 @@ impl BlockMeta { /// } /// } /// ``` -pub struct FileObject(File, u64); +pub struct FileObject(Option, u64); impl FileObject { pub fn read(&self, offset: u64, len: u64) -> Result> { use std::os::unix::fs::FileExt; let mut data = vec![0; len as usize]; - self.0.read_exact_at(&mut data[..], offset)?; + self.0 + .as_ref() + .unwrap() + .read_exact_at(&mut data[..], offset)?; Ok(data) } @@ -100,7 +117,7 @@ impl FileObject { pub fn create(path: &Path, data: Vec) -> Result { std::fs::write(path, &data)?; Ok(FileObject( - File::options().read(true).write(false).open(path)?, + Some(File::options().read(true).write(false).open(path)?), data.len() as u64, )) } @@ -116,6 +133,8 @@ pub struct SsTable { block_meta_offset: usize, id: usize, block_cache: Option>, + first_key: Bytes, + last_key: Bytes, } impl SsTable { @@ -130,15 +149,36 @@ impl SsTable { let raw_meta_offset = file.read(len - 4, 4)?; let block_meta_offset = (&raw_meta_offset[..]).get_u32() as u64; let raw_meta = file.read(block_meta_offset, len - 4 - block_meta_offset)?; + let block_metas = BlockMeta::decode_block_meta(&raw_meta[..]); Ok(Self { file, - block_metas: BlockMeta::decode_block_meta(&raw_meta[..]), + first_key: block_metas.first().unwrap().first_key.clone(), + last_key: block_metas.last().unwrap().last_key.clone(), + block_metas, block_meta_offset: block_meta_offset as usize, id, block_cache, }) } + /// Create a mock SST with only first key + last key metadata + pub fn create_meta_only( + id: usize, + file_size: u64, + first_key: Bytes, + last_key: Bytes, + ) -> Result { + Ok(Self { + file: FileObject(None, file_size), + block_metas: vec![], + block_meta_offset: 0, + id, + block_cache: None, + first_key, + last_key, + }) + } + /// Read a block from the disk. pub fn read_block(&self, block_idx: usize) -> Result> { let offset = self.block_metas[block_idx].offset; diff --git a/mini-lsm/src/table/builder.rs b/mini-lsm/src/table/builder.rs index 7a35437..30ff581 100644 --- a/mini-lsm/src/table/builder.rs +++ b/mini-lsm/src/table/builder.rs @@ -12,6 +12,7 @@ use crate::lsm_storage::BlockCache; pub struct SsTableBuilder { builder: BlockBuilder, first_key: Vec, + last_key: Vec, data: Vec, pub(super) meta: Vec, block_size: usize, @@ -24,6 +25,7 @@ impl SsTableBuilder { data: Vec::new(), meta: Vec::new(), first_key: Vec::new(), + last_key: Vec::new(), block_size, builder: BlockBuilder::new(block_size), } @@ -32,18 +34,25 @@ impl SsTableBuilder { /// Adds a key-value pair to SSTable pub fn add(&mut self, key: &[u8], value: &[u8]) { if self.first_key.is_empty() { - self.first_key = key.to_vec(); + self.first_key.clear(); + self.first_key.extend(key); } if self.builder.add(key, value) { + self.last_key.clear(); + self.last_key.extend(key); return; } + // create a new block builder and append block data self.finish_block(); // add the key-value pair to the next block assert!(self.builder.add(key, value)); - self.first_key = key.to_vec(); + self.first_key.clear(); + self.first_key.extend(key); + self.last_key.clear(); + self.last_key.extend(key); } /// Get the estimated size of the SSTable. @@ -57,6 +66,7 @@ impl SsTableBuilder { self.meta.push(BlockMeta { offset: self.data.len(), first_key: std::mem::take(&mut self.first_key).into(), + last_key: std::mem::take(&mut self.last_key).into(), }); self.data.extend(encoded_block); } @@ -78,6 +88,8 @@ impl SsTableBuilder { Ok(SsTable { id, file, + first_key: self.meta.first().unwrap().first_key.clone(), + last_key: self.meta.last().unwrap().last_key.clone(), block_metas: self.meta, block_meta_offset: meta_offset, block_cache,