docs: add comments & hints for day one starter and reference code (#18)
* feat(docs): Improve/Add comments & some hints for day one starter code * feat(docs): Add comments for day one solution code * feat(docs): Add figure for block storage format in starter code (block.rs)
This commit is contained in:
@@ -5,21 +5,32 @@ mod builder;
|
|||||||
mod iterator;
|
mod iterator;
|
||||||
|
|
||||||
pub use builder::BlockBuilder;
|
pub use builder::BlockBuilder;
|
||||||
|
/// You may want to check `bytes::BufMut` out when manipulating continuous chunks of memory
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
pub use iterator::BlockIterator;
|
pub use iterator::BlockIterator;
|
||||||
|
|
||||||
/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted
|
/// A block is the smallest unit of read and caching in LSM tree.
|
||||||
/// key-value pairs.
|
/// It is a collection of sorted key-value pairs.
|
||||||
|
/// The `actual` storage format is as below (After `Block::encode`):
|
||||||
|
///
|
||||||
|
/// ----------------------------------------------------------------------------------------------------
|
||||||
|
/// | Data Section | Offset Section | Extra |
|
||||||
|
/// ----------------------------------------------------------------------------------------------------
|
||||||
|
/// | Entry #1 | Entry #2 | ... | Entry #N | Offset #1 | Offset #2 | ... | Offset #N | num_of_elements |
|
||||||
|
/// ----------------------------------------------------------------------------------------------------
|
||||||
pub struct Block {
|
pub struct Block {
|
||||||
data: Vec<u8>,
|
data: Vec<u8>,
|
||||||
offsets: Vec<u16>,
|
offsets: Vec<u16>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Block {
|
impl Block {
|
||||||
|
/// Encode the internal data to the data layout illustrated in the tutorial
|
||||||
|
/// Note: You may want to recheck if any of the expected field is missing from your output
|
||||||
pub fn encode(&self) -> Bytes {
|
pub fn encode(&self) -> Bytes {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Decode from the data layout, transform the input `data` to a single `Block`
|
||||||
pub fn decode(data: &[u8]) -> Self {
|
pub fn decode(data: &[u8]) -> Self {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,9 +7,13 @@ use super::Block;
|
|||||||
|
|
||||||
/// Iterates on a block.
|
/// Iterates on a block.
|
||||||
pub struct BlockIterator {
|
pub struct BlockIterator {
|
||||||
|
/// The internal `Block`, wrapped by an `Arc`
|
||||||
block: Arc<Block>,
|
block: Arc<Block>,
|
||||||
|
/// The current key, empty represents the iterator is invalid
|
||||||
key: Vec<u8>,
|
key: Vec<u8>,
|
||||||
|
/// The corresponding value, can be empty
|
||||||
value: Vec<u8>,
|
value: Vec<u8>,
|
||||||
|
/// Current index of the key-value pair, should be in range of [0, num_of_elements)
|
||||||
idx: usize,
|
idx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -44,6 +48,7 @@ impl BlockIterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if the iterator is valid.
|
/// Returns true if the iterator is valid.
|
||||||
|
/// Note: You may want to make use of `key`
|
||||||
pub fn is_valid(&self) -> bool {
|
pub fn is_valid(&self) -> bool {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
@@ -59,6 +64,7 @@ impl BlockIterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Seek to the first key that >= `key`.
|
/// Seek to the first key that >= `key`.
|
||||||
|
/// Note: You should assume the key-value pairs in the block are sorted when being added by callers.
|
||||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ impl Block {
|
|||||||
for offset in &self.offsets {
|
for offset in &self.offsets {
|
||||||
buf.put_u16(*offset);
|
buf.put_u16(*offset);
|
||||||
}
|
}
|
||||||
|
// Adds number of elements at the end of the block
|
||||||
buf.put_u16(offsets_len as u16);
|
buf.put_u16(offsets_len as u16);
|
||||||
buf.into()
|
buf.into()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,11 +30,13 @@ impl BlockBuilder {
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
|
||||||
assert!(!key.is_empty(), "key must not be empty");
|
assert!(!key.is_empty(), "key must not be empty");
|
||||||
|
// The overhead here is `key_len` + `val_len` + `offset`, each is of type `u16`
|
||||||
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.block_size
|
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.block_size
|
||||||
&& !self.is_empty()
|
&& !self.is_empty()
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// The offsets should be updated at first, to maintain the correct offset
|
||||||
self.offsets.push(self.data.len() as u16);
|
self.offsets.push(self.data.len() as u16);
|
||||||
self.data.put_u16(key.len() as u16);
|
self.data.put_u16(key.len() as u16);
|
||||||
self.data.put(key);
|
self.data.put(key);
|
||||||
|
|||||||
@@ -76,8 +76,12 @@ impl BlockIterator {
|
|||||||
self.seek_to(self.idx);
|
self.seek_to(self.idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Seek to the specified position and update the current `key` and `value`
|
||||||
|
/// Index update will be handled by caller
|
||||||
fn seek_to_offset(&mut self, offset: usize) {
|
fn seek_to_offset(&mut self, offset: usize) {
|
||||||
let mut entry = &self.block.data[offset..];
|
let mut entry = &self.block.data[offset..];
|
||||||
|
// Since `get_u16()` will automatically move the ptr 2 bytes ahead here,
|
||||||
|
// we don't need to manually advance it
|
||||||
let key_len = entry.get_u16() as usize;
|
let key_len = entry.get_u16() as usize;
|
||||||
let key = entry[..key_len].to_vec();
|
let key = entry[..key_len].to_vec();
|
||||||
entry.advance(key_len);
|
entry.advance(key_len);
|
||||||
@@ -90,7 +94,7 @@ impl BlockIterator {
|
|||||||
self.value.extend(value);
|
self.value.extend(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Seek to the first key that >= `key`.
|
/// Seek to the first key that is >= `key`.
|
||||||
pub fn seek_to_key(&mut self, key: &[u8]) {
|
pub fn seek_to_key(&mut self, key: &[u8]) {
|
||||||
let mut low = 0;
|
let mut low = 0;
|
||||||
let mut high = self.block.offsets.len();
|
let mut high = self.block.offsets.len();
|
||||||
|
|||||||
Reference in New Issue
Block a user