docs: add comments & hints for day one starter and reference code (#18)
* feat(docs): Improve/Add comments & some hints for day one starter code * feat(docs): Add comments for day one solution code * feat(docs): Add figure for block storage format in starter code (block.rs)
This commit is contained in:
@@ -5,21 +5,32 @@ mod builder;
|
||||
mod iterator;
|
||||
|
||||
pub use builder::BlockBuilder;
|
||||
/// You may want to check `bytes::BufMut` out when manipulating continuous chunks of memory
|
||||
use bytes::Bytes;
|
||||
pub use iterator::BlockIterator;
|
||||
|
||||
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted
|
||||
/// key-value pairs.
|
||||
/// A block is the smallest unit of read and caching in LSM tree.
|
||||
/// It is a collection of sorted key-value pairs.
|
||||
/// The `actual` storage format is as below (After `Block::encode`):
|
||||
///
|
||||
/// ----------------------------------------------------------------------------------------------------
|
||||
/// | Data Section | Offset Section | Extra |
|
||||
/// ----------------------------------------------------------------------------------------------------
|
||||
/// | Entry #1 | Entry #2 | ... | Entry #N | Offset #1 | Offset #2 | ... | Offset #N | num_of_elements |
|
||||
/// ----------------------------------------------------------------------------------------------------
|
||||
pub struct Block {
|
||||
data: Vec<u8>,
|
||||
offsets: Vec<u16>,
|
||||
}
|
||||
|
||||
impl Block {
|
||||
/// Encode the internal data to the data layout illustrated in the tutorial
|
||||
/// Note: You may want to recheck if any of the expected field is missing from your output
|
||||
pub fn encode(&self) -> Bytes {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Decode from the data layout, transform the input `data` to a single `Block`
|
||||
pub fn decode(data: &[u8]) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -7,9 +7,13 @@ use super::Block;
|
||||
|
||||
/// Iterates on a block.
|
||||
pub struct BlockIterator {
|
||||
/// The internal `Block`, wrapped by an `Arc`
|
||||
block: Arc<Block>,
|
||||
/// The current key, empty represents the iterator is invalid
|
||||
key: Vec<u8>,
|
||||
/// The corresponding value, can be empty
|
||||
value: Vec<u8>,
|
||||
/// Current index of the key-value pair, should be in range of [0, num_of_elements)
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
@@ -44,6 +48,7 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
/// Returns true if the iterator is valid.
|
||||
/// Note: You may want to make use of `key`
|
||||
pub fn is_valid(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
@@ -59,6 +64,7 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
/// Seek to the first key that >= `key`.
|
||||
/// Note: You should assume the key-value pairs in the block are sorted when being added by callers.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ impl Block {
|
||||
for offset in &self.offsets {
|
||||
buf.put_u16(*offset);
|
||||
}
|
||||
// Adds number of elements at the end of the block
|
||||
buf.put_u16(offsets_len as u16);
|
||||
buf.into()
|
||||
}
|
||||
|
||||
@@ -30,11 +30,13 @@ impl BlockBuilder {
|
||||
#[must_use]
|
||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||
assert!(!key.is_empty(), "key must not be empty");
|
||||
// The overhead here is `key_len` + `val_len` + `offset`, each is of type `u16`
|
||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.block_size
|
||||
&& !self.is_empty()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// The offsets should be updated at first, to maintain the correct offset
|
||||
self.offsets.push(self.data.len() as u16);
|
||||
self.data.put_u16(key.len() as u16);
|
||||
self.data.put(key);
|
||||
|
||||
@@ -76,8 +76,12 @@ impl BlockIterator {
|
||||
self.seek_to(self.idx);
|
||||
}
|
||||
|
||||
/// Seek to the specified position and update the current `key` and `value`
|
||||
/// Index update will be handled by caller
|
||||
fn seek_to_offset(&mut self, offset: usize) {
|
||||
let mut entry = &self.block.data[offset..];
|
||||
// Since `get_u16()` will automatically move the ptr 2 bytes ahead here,
|
||||
// we don't need to manually advance it
|
||||
let key_len = entry.get_u16() as usize;
|
||||
let key = entry[..key_len].to_vec();
|
||||
entry.advance(key_len);
|
||||
@@ -90,7 +94,7 @@ impl BlockIterator {
|
||||
self.value.extend(value);
|
||||
}
|
||||
|
||||
/// Seek to the first key that >= `key`.
|
||||
/// Seek to the first key that is >= `key`.
|
||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||
let mut low = 0;
|
||||
let mut high = self.block.offsets.len();
|
||||
|
||||
Reference in New Issue
Block a user