diff --git a/.gitignore b/.gitignore index de358ff..87bc59f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target .vscode/ +sync-tmp/ diff --git a/Cargo.lock b/Cargo.lock index b4d1676..54ba756 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -183,6 +183,7 @@ dependencies = [ name = "mini-lsm-starter" version = "0.1.0" dependencies = [ + "anyhow", "bytes", ] diff --git a/Cargo.toml b/Cargo.toml index 8cf7351..1814864 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [workspace] members = [ "mini-lsm", + "xtask", "mini-lsm-starter", - "xtask" ] [workspace.package] diff --git a/mini-lsm-starter/Cargo.toml b/mini-lsm-starter/Cargo.toml index 5900389..7846954 100644 --- a/mini-lsm-starter/Cargo.toml +++ b/mini-lsm-starter/Cargo.toml @@ -1,12 +1,9 @@ [package] name = "mini-lsm-starter" -version = { workspace = true } -edition = { workspace = true } -homepage = { workspace = true } -keywords = { workspace = true } -license = { workspace = true } -repository = { workspace = true } +version = "0.1.0" +edition = "2021" publish = false [dependencies] +anyhow = "1" bytes = "1" diff --git a/mini-lsm-starter/src/block/builder.rs b/mini-lsm-starter/src/block/builder.rs index 3c2570e..d14fabe 100644 --- a/mini-lsm-starter/src/block/builder.rs +++ b/mini-lsm-starter/src/block/builder.rs @@ -3,26 +3,27 @@ use super::Block; -/// Builds a block +/// Builds a block. pub struct BlockBuilder {} impl BlockBuilder { - /// Creates a new block builder - pub fn new(target_size: usize) -> Self { + /// Creates a new block builder. + pub fn new(block_size: usize) -> Self { unimplemented!() } - /// Adds a key-value pair to the block + /// Adds a key-value pair to the block. Returns false when the block is full. #[must_use] pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { unimplemented!() } + /// Check if there is no key-value pair in the block. pub fn is_empty(&self) -> bool { unimplemented!() } - /// Builds a block + /// Finalize the block. pub fn build(self) -> Block { unimplemented!() } diff --git a/mini-lsm-starter/src/block/iterator.rs b/mini-lsm-starter/src/block/iterator.rs index a099708..cef570d 100644 --- a/mini-lsm-starter/src/block/iterator.rs +++ b/mini-lsm-starter/src/block/iterator.rs @@ -5,57 +5,60 @@ use std::sync::Arc; use super::Block; -pub struct BlockIterator {} +/// Iterates on a block. +pub struct BlockIterator { + block: Arc, + key: Vec, + value: Vec, + idx: usize, +} impl BlockIterator { fn new(block: Arc) -> Self { - unimplemented!() + Self { + block, + key: Vec::new(), + value: Vec::new(), + idx: 0, + } } + /// Creates a block iterator and seek to the first entry. pub fn create_and_seek_to_first(block: Arc) -> Self { unimplemented!() } + /// Creates a block iterator and seek to the first key that >= `key`. pub fn create_and_seek_to_key(block: Arc, key: &[u8]) -> Self { unimplemented!() } + /// Returns the key of the current entry. pub fn key(&self) -> &[u8] { unimplemented!() } + /// Returns the value of the current entry. pub fn value(&self) -> &[u8] { unimplemented!() } + /// Returns true if the iterator is valid. pub fn is_valid(&self) -> bool { unimplemented!() } + /// Seeks to the first key in the block. pub fn seek_to_first(&mut self) { unimplemented!() } - pub fn seek_to_last(&mut self) { - unimplemented!() - } - - pub fn len(&self) -> usize { - unimplemented!() - } - - pub fn is_empty(&self) -> bool { - unimplemented!() - } - - pub fn seek_to(&mut self, idx: usize) { - unimplemented!() - } - + /// Move to the next key in the block. pub fn next(&mut self) { unimplemented!() } + /// Seek to the first key that >= `key`. pub fn seek_to_key(&mut self, key: &[u8]) { unimplemented!() } diff --git a/mini-lsm-starter/src/lib.rs b/mini-lsm-starter/src/lib.rs index cd9e46c..3ed909f 100644 --- a/mini-lsm-starter/src/lib.rs +++ b/mini-lsm-starter/src/lib.rs @@ -1,2 +1,3 @@ pub mod block; +pub mod storage; pub mod table; diff --git a/mini-lsm-starter/src/storage.rs b/mini-lsm-starter/src/storage.rs new file mode 100644 index 0000000..8a0cef5 --- /dev/null +++ b/mini-lsm-starter/src/storage.rs @@ -0,0 +1 @@ +pub struct Storage {} diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index 8b13789..cf3bf04 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -1 +1,85 @@ +#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod +#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod +mod builder; +mod iterator; + +use std::{path::Path, sync::Arc}; + +pub use builder::SsTableBuilder; +use bytes::{Buf, Bytes}; +pub use iterator::SsTableIterator; + +use crate::block::Block; +use anyhow::Result; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BlockMeta { + /// Offset of this data block. + pub offset: usize, + /// The first key of the data block. + pub first_key: Bytes, +} + +impl BlockMeta { + /// Encode block meta to a buffer. + pub fn encode_block_meta( + block_meta: &[BlockMeta], + #[allow(clippy::ptr_arg)] buf: &mut Vec, + ) { + unimplemented!() + } + + /// Decode block meta from a buffer. + pub fn decode_block_meta(buf: impl Buf) -> Vec { + unimplemented!() + } +} + +/// A file object. +pub struct FileObject(Bytes); + +impl FileObject { + pub fn read(&self, offset: u64, len: u64) -> Result> { + Ok(self.0[offset as usize..(offset + len) as usize].to_vec()) + } + + pub fn size(&self) -> u64 { + self.0.len() as u64 + } + + pub fn create(path: &Path, data: Vec) -> Result { + unimplemented!() + } + + pub fn open(path: &Path) -> Result { + unimplemented!() + } +} + +pub struct SsTable {} + +impl SsTable { + /// Open SSTable from a file. + pub fn open(file: FileObject) -> Result { + unimplemented!() + } + + /// Read a block from the disk. + pub fn read_block(&self, block_idx: usize) -> Result> { + unimplemented!() + } + + /// Find the block that may contain `key`. + pub fn find_block_idx(&self, key: &[u8]) -> usize { + unimplemented!() + } + + /// Get number of data blocks. + pub fn num_of_blocks(&self) -> usize { + unimplemented!() + } +} + +#[cfg(test)] +mod tests; diff --git a/mini-lsm-starter/src/table/builder.rs b/mini-lsm-starter/src/table/builder.rs index e69de29..e37d204 100644 --- a/mini-lsm-starter/src/table/builder.rs +++ b/mini-lsm-starter/src/table/builder.rs @@ -0,0 +1,30 @@ +#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod +#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod + +use anyhow::Result; +use std::path::Path; + +use super::{BlockMeta, SsTable}; + +/// Builds an SSTable from key-value pairs. +pub struct SsTableBuilder { + pub(super) meta: Vec, +} + +impl SsTableBuilder { + /// Create a builder based on target SST size and target block size. + pub fn new(target_size: usize, block_size: usize) -> Self { + unimplemented!() + } + + /// Adds a key-value pair to SSTable, return false when SST full. + #[must_use] + pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { + unimplemented!() + } + + /// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache. + pub fn build(self, path: impl AsRef) -> Result { + unimplemented!() + } +} diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs index e69de29..a14654d 100644 --- a/mini-lsm-starter/src/table/iterator.rs +++ b/mini-lsm-starter/src/table/iterator.rs @@ -0,0 +1,53 @@ +#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod +#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod + +use anyhow::Result; +use std::sync::Arc; + +use super::SsTable; + +/// An iterator over the contents of an SSTable. +pub struct SsTableIterator {} + +impl SsTableIterator { + /// Create a new iterator and seek to the first key-value pair. + pub fn create_and_seek_to_first(table: Arc) -> Result { + unimplemented!() + } + + /// Seek to the first key-value pair. + pub fn seek_to_first(&mut self) -> Result<()> { + unimplemented!() + } + + /// Create a new iterator and seek to the first key-value pair which >= `key`. + pub fn create_and_seek_to_key(table: Arc, key: &[u8]) -> Result { + unimplemented!() + } + + /// Seek to the first key-value pair which >= `key`. + pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { + unimplemented!() + } + + /// Get the current key. + pub fn key(&self) -> &[u8] { + unimplemented!() + } + + /// Get the current value. + pub fn value(&self) -> &[u8] { + unimplemented!() + } + + /// Check if the iterator is valid. + pub fn is_valid(&self) -> bool { + unimplemented!() + } + + /// Move to the next key-value pair. + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result<()> { + unimplemented!() + } +} diff --git a/mini-lsm-starter/src/table/tests.rs b/mini-lsm-starter/src/table/tests.rs index e69de29..8b13789 100644 --- a/mini-lsm-starter/src/table/tests.rs +++ b/mini-lsm-starter/src/table/tests.rs @@ -0,0 +1 @@ + diff --git a/mini-lsm/src/block/builder.rs b/mini-lsm/src/block/builder.rs index 3f5b166..ff50ad7 100644 --- a/mini-lsm/src/block/builder.rs +++ b/mini-lsm/src/block/builder.rs @@ -2,15 +2,18 @@ use bytes::BufMut; use super::{Block, SIZEOF_U16}; -/// Builds a block +/// Builds a block. pub struct BlockBuilder { + /// Offsets of each key-value entries. offsets: Vec, + /// All key-value pairs in the block. data: Vec, + /// The expected block size. block_size: usize, } impl BlockBuilder { - /// Creates a new block builder + /// Creates a new block builder. pub fn new(block_size: usize) -> Self { Self { offsets: Vec::new(), @@ -23,7 +26,7 @@ impl BlockBuilder { self.offsets.len() * SIZEOF_U16 + self.data.len() + SIZEOF_U16 } - /// Adds a key-value pair to the block + /// Adds a key-value pair to the block. Returns false when the block is full. #[must_use] pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { assert!(!key.is_empty(), "key must not be empty"); @@ -41,11 +44,12 @@ impl BlockBuilder { true } + /// Check if there is no key-value pair in the block. pub fn is_empty(&self) -> bool { self.offsets.is_empty() } - /// Builds a block + /// Finalize the block. pub fn build(self) -> Block { if self.is_empty() { panic!("block should not be empty"); diff --git a/mini-lsm/src/block/iterator.rs b/mini-lsm/src/block/iterator.rs index 87d76dd..ae527a6 100644 --- a/mini-lsm/src/block/iterator.rs +++ b/mini-lsm/src/block/iterator.rs @@ -4,6 +4,7 @@ use bytes::Buf; use super::Block; +/// Iterates on a block. pub struct BlockIterator { block: Arc, key: Vec, @@ -21,45 +22,44 @@ impl BlockIterator { } } + /// Creates a block iterator and seek to the first entry. pub fn create_and_seek_to_first(block: Arc) -> Self { let mut iter = Self::new(block); iter.seek_to_first(); iter } + /// Creates a block iterator and seek to the first key that >= `key`. pub fn create_and_seek_to_key(block: Arc, key: &[u8]) -> Self { let mut iter = Self::new(block); iter.seek_to_key(key); iter } + /// Returns the key of the current entry. pub fn key(&self) -> &[u8] { debug_assert!(!self.key.is_empty(), "invalid iterator"); &self.key } + /// Returns the value of the current entry. pub fn value(&self) -> &[u8] { debug_assert!(!self.key.is_empty(), "invalid iterator"); &self.value } + /// Returns true if the iterator is valid. pub fn is_valid(&self) -> bool { !self.key.is_empty() } + /// Seeks to the first key in the block. pub fn seek_to_first(&mut self) { self.seek_to(0); } - pub fn len(&self) -> usize { - self.block.offsets.len() - } - - pub fn is_empty(&self) -> bool { - self.block.offsets.is_empty() - } - - pub fn seek_to(&mut self, idx: usize) { + /// Seeks to the idx-th key in the block. + fn seek_to(&mut self, idx: usize) { if idx >= self.block.offsets.len() { self.key.clear(); self.value.clear(); @@ -70,6 +70,7 @@ impl BlockIterator { self.idx = idx; } + /// Move to the next key in the block. pub fn next(&mut self) { self.idx += 1; self.seek_to(self.idx); @@ -89,6 +90,7 @@ impl BlockIterator { self.value.extend(value); } + /// Seek to the first key that >= `key`. pub fn seek_to_key(&mut self, key: &[u8]) { let mut low = 0; let mut high = self.block.offsets.len(); diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs index cd9e46c..3ed909f 100644 --- a/mini-lsm/src/lib.rs +++ b/mini-lsm/src/lib.rs @@ -1,2 +1,3 @@ pub mod block; +pub mod storage; pub mod table; diff --git a/mini-lsm/src/storage.rs b/mini-lsm/src/storage.rs new file mode 100644 index 0000000..8a0cef5 --- /dev/null +++ b/mini-lsm/src/storage.rs @@ -0,0 +1 @@ +pub struct Storage {} diff --git a/mini-lsm/src/table.rs b/mini-lsm/src/table.rs index 0a73851..ebc4b4b 100644 --- a/mini-lsm/src/table.rs +++ b/mini-lsm/src/table.rs @@ -12,11 +12,14 @@ use anyhow::Result; #[derive(Clone, Debug, PartialEq, Eq)] pub struct BlockMeta { + /// Offset of this data block. pub offset: usize, + /// The first key of the data block. pub first_key: Bytes, } impl BlockMeta { + /// Encode block meta to a buffer. pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec) { let mut estimated_size = 0; for meta in block_meta { @@ -34,6 +37,7 @@ impl BlockMeta { assert_eq!(estimated_size, buf.len() - original_len); } + /// Decode block meta from a buffer. pub fn decode_block_meta(mut buf: impl Buf) -> Vec { let mut block_meta = Vec::new(); while buf.has_remaining() { @@ -46,6 +50,7 @@ impl BlockMeta { } } +/// A file object. pub struct FileObject(Bytes); impl FileObject { @@ -73,6 +78,7 @@ pub struct SsTable { } impl SsTable { + /// Open SSTable from a file. pub fn open(file: FileObject) -> Result { let len = file.size(); let raw_meta_offset = file.read(len - 4, 4)?; @@ -85,7 +91,8 @@ impl SsTable { }) } - fn read_block(&self, block_idx: usize) -> Result> { + /// Read a block from the disk. + pub fn read_block(&self, block_idx: usize) -> Result> { let offset = self.block_metas[block_idx].offset; let offset_end = self .block_metas @@ -98,13 +105,15 @@ impl SsTable { Ok(Arc::new(Block::decode(&block_data[..]))) } - fn find_block_idx(&self, key: &[u8]) -> usize { + /// Find the block that may contain `key`. + pub fn find_block_idx(&self, key: &[u8]) -> usize { self.block_metas .partition_point(|meta| meta.first_key <= key) .saturating_sub(1) } - fn num_of_blocks(&self) -> usize { + /// Get number of data blocks. + pub fn num_of_blocks(&self) -> usize { self.block_metas.len() } } diff --git a/mini-lsm/src/table/builder.rs b/mini-lsm/src/table/builder.rs index db71360..c128ed1 100644 --- a/mini-lsm/src/table/builder.rs +++ b/mini-lsm/src/table/builder.rs @@ -5,6 +5,7 @@ use std::path::Path; use super::{BlockMeta, FileObject, SsTable}; use crate::block::BlockBuilder; +/// Builds an SSTable from key-value pairs. pub struct SsTableBuilder { builder: BlockBuilder, first_key: Vec, @@ -15,6 +16,7 @@ pub struct SsTableBuilder { } impl SsTableBuilder { + /// Create a builder based on target SST size and target block size. pub fn new(target_size: usize, block_size: usize) -> Self { Self { data: Vec::new(), @@ -26,6 +28,7 @@ impl SsTableBuilder { } } + /// Adds a key-value pair to SSTable, return false when SST full. #[must_use] pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { if self.data.len() > self.target_size { @@ -59,6 +62,7 @@ impl SsTableBuilder { self.data.extend(encoded_block); } + /// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache. pub fn build(mut self, path: impl AsRef) -> Result { self.finish_block(); let mut buf = self.data; diff --git a/mini-lsm/src/table/iterator.rs b/mini-lsm/src/table/iterator.rs index d8b3cc6..4392335 100644 --- a/mini-lsm/src/table/iterator.rs +++ b/mini-lsm/src/table/iterator.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use super::SsTable; use crate::block::BlockIterator; +/// An iterator over the contents of an SSTable. pub struct SsTableIterator { table: Arc, blk_iter: BlockIterator, @@ -18,6 +19,7 @@ impl SsTableIterator { )) } + /// Create a new iterator and seek to the first key-value pair. pub fn create_and_seek_to_first(table: Arc) -> Result { let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?; let iter = Self { @@ -28,6 +30,7 @@ impl SsTableIterator { Ok(iter) } + /// Seek to the first key-value pair. pub fn seek_to_first(&mut self) -> Result<()> { let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?; self.blk_idx = blk_idx; @@ -47,6 +50,7 @@ impl SsTableIterator { Ok((blk_idx, blk_iter)) } + /// Create a new iterator and seek to the first key-value pair which >= `key`. pub fn create_and_seek_to_key(table: Arc, key: &[u8]) -> Result { let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?; let iter = Self { @@ -57,6 +61,7 @@ impl SsTableIterator { Ok(iter) } + /// Seek to the first key-value pair which >= `key`. pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> { let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?; self.blk_iter = blk_iter; @@ -64,18 +69,22 @@ impl SsTableIterator { Ok(()) } + /// Get the current key. pub fn key(&self) -> &[u8] { self.blk_iter.key() } + /// Get the current value. pub fn value(&self) -> &[u8] { self.blk_iter.value() } + /// Check if the iterator is valid. pub fn is_valid(&self) -> bool { self.blk_iter.is_valid() } + /// Move to the next key-value pair. #[allow(clippy::should_implement_trait)] pub fn next(&mut self) -> Result<()> { self.blk_iter.next(); diff --git a/xtask/src/main.rs b/xtask/src/main.rs index ad53f8f..1416db2 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -17,6 +17,10 @@ enum Action { Show, /// Run CI jobs Ci, + /// Sync starter repo and reference solution. + Sync, + /// Check starter code + Scheck, } /// Simple program to greet a person @@ -36,6 +40,16 @@ fn switch_to_workspace_root() -> Result<()> { Ok(()) } +fn switch_to_starter_root() -> Result<()> { + std::env::set_current_dir( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .ok_or_else(|| anyhow!("failed to find the workspace root"))? + .join("mini-lsm-starter"), + )?; + Ok(()) +} + fn fmt() -> Result<()> { println!("{}", style("cargo fmt").bold()); cmd!("cargo", "fmt").run()?; @@ -78,6 +92,28 @@ fn serve_book() -> Result<()> { Ok(()) } +fn sync() -> Result<()> { + cmd!("mkdir", "-p", "sync-tmp").run()?; + cmd!("cp", "-a", "mini-lsm-starter/", "sync-tmp/mini-lsm-starter").run()?; + let cargo_toml = "sync-tmp/mini-lsm-starter/Cargo.toml"; + std::fs::write( + cargo_toml, + std::fs::read_to_string(cargo_toml)?.replace("mini-lsm-starter", "mini-lsm") + + "\n[workspace]\n", + )?; + cmd!( + "cargo", + "semver-checks", + "check-release", + "--manifest-path", + cargo_toml, + "--baseline-root", + "mini-lsm/Cargo.toml", + ) + .run()?; + Ok(()) +} + fn main() -> Result<()> { let args = Args::parse(); @@ -87,6 +123,8 @@ fn main() -> Result<()> { cmd!("cargo", "install", "cargo-nextest", "--locked").run()?; println!("{}", style("cargo install mdbook mdbook-toc").bold()); cmd!("cargo", "install", "mdbook", "mdbook-toc", "--locked").run()?; + println!("{}", style("cargo install cargo-semver-checks").bold()); + cmd!("cargo", "install", "cargo-semver-checks", "--locked").run()?; } Action::Check => { switch_to_workspace_root()?; @@ -95,6 +133,13 @@ fn main() -> Result<()> { test()?; clippy()?; } + Action::Scheck => { + switch_to_starter_root()?; + fmt()?; + check()?; + test()?; + clippy()?; + } Action::Book => { switch_to_workspace_root()?; serve_book()?; @@ -111,6 +156,10 @@ fn main() -> Result<()> { println!("CARGO_MANIFEST_DIR={}", env!("CARGO_MANIFEST_DIR")); println!("PWD={:?}", std::env::current_dir()?); } + Action::Sync => { + switch_to_workspace_root()?; + sync()?; + } } Ok(())