docs: update solution & starter code documentation for day 2 (#20)

* feat(docs): update day 2 starter code documentation

* feat(docs): update day 2 solution code documentation
This commit is contained in:
Xu
2023-07-11 12:05:40 +08:00
committed by GitHub
parent de7f2ec263
commit a5ac71c99f
4 changed files with 32 additions and 8 deletions

View File

@@ -19,12 +19,14 @@ use crate::lsm_storage::BlockCache;
pub struct BlockMeta { pub struct BlockMeta {
/// Offset of this data block. /// Offset of this data block.
pub offset: usize, pub offset: usize,
/// The first key of the data block. /// The first key of the data block, mainly used for index purpose.
pub first_key: Bytes, pub first_key: Bytes,
} }
impl BlockMeta { impl BlockMeta {
/// Encode block meta to a buffer. /// Encode block meta to a buffer.
/// You may add extra fields to the buffer,
/// in order to help keep track of `first_key` when decoding from the same buffer in the future.
pub fn encode_block_meta( pub fn encode_block_meta(
block_meta: &[BlockMeta], block_meta: &[BlockMeta],
#[allow(clippy::ptr_arg)] // remove this allow after you finish #[allow(clippy::ptr_arg)] // remove this allow after you finish
@@ -61,9 +63,17 @@ impl FileObject {
} }
} }
/// -------------------------------------------------------------------------------------------------------
/// | Data Block | Meta Block | Extra |
/// -------------------------------------------------------------------------------------------------------
/// | Data Block #1 | ... | Data Block #N | Meta Block #1 | ... | Meta Block #N | Meta Block Offset (u32) |
/// -------------------------------------------------------------------------------------------------------
pub struct SsTable { pub struct SsTable {
/// The actual storage unit of SsTable, the format is as above.
file: FileObject, file: FileObject,
/// The meta blocks that hold info for data blocks.
block_metas: Vec<BlockMeta>, block_metas: Vec<BlockMeta>,
/// The offset that indicates the start point of meta blocks in `file`.
block_meta_offset: usize, block_meta_offset: usize,
} }
@@ -89,6 +99,8 @@ impl SsTable {
} }
/// Find the block that may contain `key`. /// Find the block that may contain `key`.
/// Note: You may want to make use of the `first_key` stored in `BlockMeta`.
/// You may also assume the key-value pairs stored in each consecutive block are sorted.
pub fn find_block_idx(&self, key: &[u8]) -> usize { pub fn find_block_idx(&self, key: &[u8]) -> usize {
unimplemented!() unimplemented!()
} }

View File

@@ -21,12 +21,14 @@ impl SsTableBuilder {
unimplemented!() unimplemented!()
} }
/// Adds a key-value pair to SSTable /// Adds a key-value pair to SSTable.
/// Note: You should split a new block when the current block is full.(`std::mem::replace` may be of help here)
pub fn add(&mut self, key: &[u8], value: &[u8]) { pub fn add(&mut self, key: &[u8], value: &[u8]) {
unimplemented!() unimplemented!()
} }
/// Get the estimated size of the SSTable. /// Get the estimated size of the SSTable.
/// Since the data blocks contain much more data than meta blocks, just return the size of data blocks here.
pub fn estimated_size(&self) -> usize { pub fn estimated_size(&self) -> usize {
unimplemented!() unimplemented!()
} }

View File

@@ -12,12 +12,12 @@ use crate::iterators::StorageIterator;
pub struct SsTableIterator {} pub struct SsTableIterator {}
impl SsTableIterator { impl SsTableIterator {
/// Create a new iterator and seek to the first key-value pair. /// Create a new iterator and seek to the first key-value pair in the first data block.
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> { pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
unimplemented!() unimplemented!()
} }
/// Seek to the first key-value pair. /// Seek to the first key-value pair in the first data block.
pub fn seek_to_first(&mut self) -> Result<()> { pub fn seek_to_first(&mut self) -> Result<()> {
unimplemented!() unimplemented!()
} }
@@ -28,24 +28,30 @@ impl SsTableIterator {
} }
/// Seek to the first key-value pair which >= `key`. /// Seek to the first key-value pair which >= `key`.
/// Note: You probably want to review the handout for detailed explanation when implementing this function.
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
unimplemented!() unimplemented!()
} }
} }
impl StorageIterator for SsTableIterator { impl StorageIterator for SsTableIterator {
fn value(&self) -> &[u8] { /// Return the `key` that's held by the underlying block iterator.
unimplemented!()
}
fn key(&self) -> &[u8] { fn key(&self) -> &[u8] {
unimplemented!() unimplemented!()
} }
/// Return the `value` that's held by the underlying block iterator.
fn value(&self) -> &[u8] {
unimplemented!()
}
/// Return whether the current block iterator is valid or not.
fn is_valid(&self) -> bool { fn is_valid(&self) -> bool {
unimplemented!() unimplemented!()
} }
/// Move to the next `key` in the block.
/// Note: You may want to check if the current block iterator is valid after the move.
fn next(&mut self) -> Result<()> { fn next(&mut self) -> Result<()> {
unimplemented!() unimplemented!()
} }

View File

@@ -26,10 +26,14 @@ impl BlockMeta {
pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec<u8>) { pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec<u8>) {
let mut estimated_size = 0; let mut estimated_size = 0;
for meta in block_meta { for meta in block_meta {
// The size of offset
estimated_size += std::mem::size_of::<u32>(); estimated_size += std::mem::size_of::<u32>();
// The size of key length
estimated_size += std::mem::size_of::<u16>(); estimated_size += std::mem::size_of::<u16>();
// The size of actual key
estimated_size += meta.first_key.len(); estimated_size += meta.first_key.len();
} }
// Reserve the space to improve performance, especially when the size of incoming data is large
buf.reserve(estimated_size); buf.reserve(estimated_size);
let original_len = buf.len(); let original_len = buf.len();
for meta in block_meta { for meta in block_meta {