diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index 47fb4e4..ee4d9a8 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -19,12 +19,14 @@ use crate::lsm_storage::BlockCache; pub struct BlockMeta { /// Offset of this data block. pub offset: usize, - /// The first key of the data block. + /// The first key of the data block, mainly used for index purpose. pub first_key: Bytes, } impl BlockMeta { /// Encode block meta to a buffer. + /// You may add extra fields to the buffer, + /// in order to help keep track of `first_key` when decoding from the same buffer in the future. pub fn encode_block_meta( block_meta: &[BlockMeta], #[allow(clippy::ptr_arg)] // remove this allow after you finish @@ -61,9 +63,17 @@ impl FileObject { } } +/// ------------------------------------------------------------------------------------------------------- +/// | Data Block | Meta Block | Extra | +/// ------------------------------------------------------------------------------------------------------- +/// | Data Block #1 | ... | Data Block #N | Meta Block #1 | ... | Meta Block #N | Meta Block Offset (u32) | +/// ------------------------------------------------------------------------------------------------------- pub struct SsTable { + /// The actual storage unit of SsTable, the format is as above. file: FileObject, + /// The meta blocks that hold info for data blocks. block_metas: Vec, + /// The offset that indicates the start point of meta blocks in `file`. block_meta_offset: usize, } @@ -89,6 +99,8 @@ impl SsTable { } /// Find the block that may contain `key`. + /// Note: You may want to make use of the `first_key` stored in `BlockMeta`. + /// You may also assume the key-value pairs stored in each consecutive block are sorted. pub fn find_block_idx(&self, key: &[u8]) -> usize { unimplemented!() } diff --git a/mini-lsm-starter/src/table/builder.rs b/mini-lsm-starter/src/table/builder.rs index 9e53417..b89dab8 100644 --- a/mini-lsm-starter/src/table/builder.rs +++ b/mini-lsm-starter/src/table/builder.rs @@ -21,12 +21,14 @@ impl SsTableBuilder { unimplemented!() } - /// Adds a key-value pair to SSTable + /// Adds a key-value pair to SSTable. + /// Note: You should split a new block when the current block is full.(`std::mem::replace` may be of help here) pub fn add(&mut self, key: &[u8], value: &[u8]) { unimplemented!() } /// Get the estimated size of the SSTable. + /// Since the data blocks contain much more data than meta blocks, just return the size of data blocks here. pub fn estimated_size(&self) -> usize { unimplemented!() } diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs index ae3f66c..7c7ae4d 100644 --- a/mini-lsm-starter/src/table/iterator.rs +++ b/mini-lsm-starter/src/table/iterator.rs @@ -12,12 +12,12 @@ use crate::iterators::StorageIterator; pub struct SsTableIterator {} impl SsTableIterator { - /// Create a new iterator and seek to the first key-value pair. + /// Create a new iterator and seek to the first key-value pair in the first data block. pub fn create_and_seek_to_first(table: Arc) -> Result { unimplemented!() } - /// Seek to the first key-value pair. + /// Seek to the first key-value pair in the first data block. pub fn seek_to_first(&mut self) -> Result<()> { unimplemented!() } @@ -28,24 +28,30 @@ impl SsTableIterator { } /// Seek to the first key-value pair which >= `key`. + /// Note: You probably want to review the handout for detailed explanation when implementing this function. pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { unimplemented!() } } impl StorageIterator for SsTableIterator { - fn value(&self) -> &[u8] { - unimplemented!() - } - + /// Return the `key` that's held by the underlying block iterator. fn key(&self) -> &[u8] { unimplemented!() } + /// Return the `value` that's held by the underlying block iterator. + fn value(&self) -> &[u8] { + unimplemented!() + } + + /// Return whether the current block iterator is valid or not. fn is_valid(&self) -> bool { unimplemented!() } + /// Move to the next `key` in the block. + /// Note: You may want to check if the current block iterator is valid after the move. fn next(&mut self) -> Result<()> { unimplemented!() } diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs index 4349ca7..05a3be6 100644 --- a/mini-lsm/src/table.rs +++ b/mini-lsm/src/table.rs @@ -26,10 +26,14 @@ impl BlockMeta { pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec) { let mut estimated_size = 0; for meta in block_meta { + // The size of offset estimated_size += std::mem::size_of::(); + // The size of key length estimated_size += std::mem::size_of::(); + // The size of actual key estimated_size += meta.first_key.len(); } + // Reserve the space to improve performance, especially when the size of incoming data is large buf.reserve(estimated_size); let original_len = buf.len(); for meta in block_meta {