more comments, sync check

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2022-12-23 22:32:30 -05:00
parent 72cee6ac13
commit 3c50c81b69
20 changed files with 298 additions and 46 deletions

1
.gitignore vendored
View File

@@ -1,2 +1,3 @@
/target
.vscode/
sync-tmp/

1
Cargo.lock generated
View File

@@ -183,6 +183,7 @@ dependencies = [
name = "mini-lsm-starter"
version = "0.1.0"
dependencies = [
"anyhow",
"bytes",
]

View File

@@ -1,8 +1,8 @@
[workspace]
members = [
"mini-lsm",
"xtask",
"mini-lsm-starter",
"xtask"
]
[workspace.package]

View File

@@ -1,12 +1,9 @@
[package]
name = "mini-lsm-starter"
version = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
repository = { workspace = true }
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
anyhow = "1"
bytes = "1"

View File

@@ -3,26 +3,27 @@
use super::Block;
/// Builds a block
/// Builds a block.
pub struct BlockBuilder {}
impl BlockBuilder {
/// Creates a new block builder
pub fn new(target_size: usize) -> Self {
/// Creates a new block builder.
pub fn new(block_size: usize) -> Self {
unimplemented!()
}
/// Adds a key-value pair to the block
/// Adds a key-value pair to the block. Returns false when the block is full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
unimplemented!()
}
/// Check if there is no key-value pair in the block.
pub fn is_empty(&self) -> bool {
unimplemented!()
}
/// Builds a block
/// Finalize the block.
pub fn build(self) -> Block {
unimplemented!()
}

View File

@@ -5,57 +5,60 @@ use std::sync::Arc;
use super::Block;
pub struct BlockIterator {}
/// Iterates on a block.
pub struct BlockIterator {
block: Arc<Block>,
key: Vec<u8>,
value: Vec<u8>,
idx: usize,
}
impl BlockIterator {
fn new(block: Arc<Block>) -> Self {
unimplemented!()
Self {
block,
key: Vec::new(),
value: Vec::new(),
idx: 0,
}
}
/// Creates a block iterator and seek to the first entry.
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
unimplemented!()
}
/// Creates a block iterator and seek to the first key that >= `key`.
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
unimplemented!()
}
/// Returns the key of the current entry.
pub fn key(&self) -> &[u8] {
unimplemented!()
}
/// Returns the value of the current entry.
pub fn value(&self) -> &[u8] {
unimplemented!()
}
/// Returns true if the iterator is valid.
pub fn is_valid(&self) -> bool {
unimplemented!()
}
/// Seeks to the first key in the block.
pub fn seek_to_first(&mut self) {
unimplemented!()
}
pub fn seek_to_last(&mut self) {
unimplemented!()
}
pub fn len(&self) -> usize {
unimplemented!()
}
pub fn is_empty(&self) -> bool {
unimplemented!()
}
pub fn seek_to(&mut self, idx: usize) {
unimplemented!()
}
/// Move to the next key in the block.
pub fn next(&mut self) {
unimplemented!()
}
/// Seek to the first key that >= `key`.
pub fn seek_to_key(&mut self, key: &[u8]) {
unimplemented!()
}

View File

@@ -1,2 +1,3 @@
pub mod block;
pub mod storage;
pub mod table;

View File

@@ -0,0 +1 @@
pub struct Storage {}

View File

@@ -1 +1,85 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
mod builder;
mod iterator;
use std::{path::Path, sync::Arc};
pub use builder::SsTableBuilder;
use bytes::{Buf, Bytes};
pub use iterator::SsTableIterator;
use crate::block::Block;
use anyhow::Result;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BlockMeta {
/// Offset of this data block.
pub offset: usize,
/// The first key of the data block.
pub first_key: Bytes,
}
impl BlockMeta {
/// Encode block meta to a buffer.
pub fn encode_block_meta(
block_meta: &[BlockMeta],
#[allow(clippy::ptr_arg)] buf: &mut Vec<u8>,
) {
unimplemented!()
}
/// Decode block meta from a buffer.
pub fn decode_block_meta(buf: impl Buf) -> Vec<BlockMeta> {
unimplemented!()
}
}
/// A file object.
pub struct FileObject(Bytes);
impl FileObject {
pub fn read(&self, offset: u64, len: u64) -> Result<Vec<u8>> {
Ok(self.0[offset as usize..(offset + len) as usize].to_vec())
}
pub fn size(&self) -> u64 {
self.0.len() as u64
}
pub fn create(path: &Path, data: Vec<u8>) -> Result<Self> {
unimplemented!()
}
pub fn open(path: &Path) -> Result<Self> {
unimplemented!()
}
}
pub struct SsTable {}
impl SsTable {
/// Open SSTable from a file.
pub fn open(file: FileObject) -> Result<Self> {
unimplemented!()
}
/// Read a block from the disk.
pub fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
unimplemented!()
}
/// Find the block that may contain `key`.
pub fn find_block_idx(&self, key: &[u8]) -> usize {
unimplemented!()
}
/// Get number of data blocks.
pub fn num_of_blocks(&self) -> usize {
unimplemented!()
}
}
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,30 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use anyhow::Result;
use std::path::Path;
use super::{BlockMeta, SsTable};
/// Builds an SSTable from key-value pairs.
pub struct SsTableBuilder {
pub(super) meta: Vec<BlockMeta>,
}
impl SsTableBuilder {
/// Create a builder based on target SST size and target block size.
pub fn new(target_size: usize, block_size: usize) -> Self {
unimplemented!()
}
/// Adds a key-value pair to SSTable, return false when SST full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
unimplemented!()
}
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache.
pub fn build(self, path: impl AsRef<Path>) -> Result<SsTable> {
unimplemented!()
}
}

View File

@@ -0,0 +1,53 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use anyhow::Result;
use std::sync::Arc;
use super::SsTable;
/// An iterator over the contents of an SSTable.
pub struct SsTableIterator {}
impl SsTableIterator {
/// Create a new iterator and seek to the first key-value pair.
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
unimplemented!()
}
/// Seek to the first key-value pair.
pub fn seek_to_first(&mut self) -> Result<()> {
unimplemented!()
}
/// Create a new iterator and seek to the first key-value pair which >= `key`.
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
unimplemented!()
}
/// Seek to the first key-value pair which >= `key`.
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
unimplemented!()
}
/// Get the current key.
pub fn key(&self) -> &[u8] {
unimplemented!()
}
/// Get the current value.
pub fn value(&self) -> &[u8] {
unimplemented!()
}
/// Check if the iterator is valid.
pub fn is_valid(&self) -> bool {
unimplemented!()
}
/// Move to the next key-value pair.
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<()> {
unimplemented!()
}
}

View File

@@ -0,0 +1 @@

View File

@@ -2,15 +2,18 @@ use bytes::BufMut;
use super::{Block, SIZEOF_U16};
/// Builds a block
/// Builds a block.
pub struct BlockBuilder {
/// Offsets of each key-value entries.
offsets: Vec<u16>,
/// All key-value pairs in the block.
data: Vec<u8>,
/// The expected block size.
block_size: usize,
}
impl BlockBuilder {
/// Creates a new block builder
/// Creates a new block builder.
pub fn new(block_size: usize) -> Self {
Self {
offsets: Vec::new(),
@@ -23,7 +26,7 @@ impl BlockBuilder {
self.offsets.len() * SIZEOF_U16 + self.data.len() + SIZEOF_U16
}
/// Adds a key-value pair to the block
/// Adds a key-value pair to the block. Returns false when the block is full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty");
@@ -41,11 +44,12 @@ impl BlockBuilder {
true
}
/// Check if there is no key-value pair in the block.
pub fn is_empty(&self) -> bool {
self.offsets.is_empty()
}
/// Builds a block
/// Finalize the block.
pub fn build(self) -> Block {
if self.is_empty() {
panic!("block should not be empty");

View File

@@ -4,6 +4,7 @@ use bytes::Buf;
use super::Block;
/// Iterates on a block.
pub struct BlockIterator {
block: Arc<Block>,
key: Vec<u8>,
@@ -21,45 +22,44 @@ impl BlockIterator {
}
}
/// Creates a block iterator and seek to the first entry.
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
let mut iter = Self::new(block);
iter.seek_to_first();
iter
}
/// Creates a block iterator and seek to the first key that >= `key`.
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
let mut iter = Self::new(block);
iter.seek_to_key(key);
iter
}
/// Returns the key of the current entry.
pub fn key(&self) -> &[u8] {
debug_assert!(!self.key.is_empty(), "invalid iterator");
&self.key
}
/// Returns the value of the current entry.
pub fn value(&self) -> &[u8] {
debug_assert!(!self.key.is_empty(), "invalid iterator");
&self.value
}
/// Returns true if the iterator is valid.
pub fn is_valid(&self) -> bool {
!self.key.is_empty()
}
/// Seeks to the first key in the block.
pub fn seek_to_first(&mut self) {
self.seek_to(0);
}
pub fn len(&self) -> usize {
self.block.offsets.len()
}
pub fn is_empty(&self) -> bool {
self.block.offsets.is_empty()
}
pub fn seek_to(&mut self, idx: usize) {
/// Seeks to the idx-th key in the block.
fn seek_to(&mut self, idx: usize) {
if idx >= self.block.offsets.len() {
self.key.clear();
self.value.clear();
@@ -70,6 +70,7 @@ impl BlockIterator {
self.idx = idx;
}
/// Move to the next key in the block.
pub fn next(&mut self) {
self.idx += 1;
self.seek_to(self.idx);
@@ -89,6 +90,7 @@ impl BlockIterator {
self.value.extend(value);
}
/// Seek to the first key that >= `key`.
pub fn seek_to_key(&mut self, key: &[u8]) {
let mut low = 0;
let mut high = self.block.offsets.len();

View File

@@ -1,2 +1,3 @@
pub mod block;
pub mod storage;
pub mod table;

1
mini-lsm/src/storage.rs Normal file
View File

@@ -0,0 +1 @@
pub struct Storage {}

View File

@@ -12,11 +12,14 @@ use anyhow::Result;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BlockMeta {
/// Offset of this data block.
pub offset: usize,
/// The first key of the data block.
pub first_key: Bytes,
}
impl BlockMeta {
/// Encode block meta to a buffer.
pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec<u8>) {
let mut estimated_size = 0;
for meta in block_meta {
@@ -34,6 +37,7 @@ impl BlockMeta {
assert_eq!(estimated_size, buf.len() - original_len);
}
/// Decode block meta from a buffer.
pub fn decode_block_meta(mut buf: impl Buf) -> Vec<BlockMeta> {
let mut block_meta = Vec::new();
while buf.has_remaining() {
@@ -46,6 +50,7 @@ impl BlockMeta {
}
}
/// A file object.
pub struct FileObject(Bytes);
impl FileObject {
@@ -73,6 +78,7 @@ pub struct SsTable {
}
impl SsTable {
/// Open SSTable from a file.
pub fn open(file: FileObject) -> Result<Self> {
let len = file.size();
let raw_meta_offset = file.read(len - 4, 4)?;
@@ -85,7 +91,8 @@ impl SsTable {
})
}
fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
/// Read a block from the disk.
pub fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
let offset = self.block_metas[block_idx].offset;
let offset_end = self
.block_metas
@@ -98,13 +105,15 @@ impl SsTable {
Ok(Arc::new(Block::decode(&block_data[..])))
}
fn find_block_idx(&self, key: &[u8]) -> usize {
/// Find the block that may contain `key`.
pub fn find_block_idx(&self, key: &[u8]) -> usize {
self.block_metas
.partition_point(|meta| meta.first_key <= key)
.saturating_sub(1)
}
fn num_of_blocks(&self) -> usize {
/// Get number of data blocks.
pub fn num_of_blocks(&self) -> usize {
self.block_metas.len()
}
}

View File

@@ -5,6 +5,7 @@ use std::path::Path;
use super::{BlockMeta, FileObject, SsTable};
use crate::block::BlockBuilder;
/// Builds an SSTable from key-value pairs.
pub struct SsTableBuilder {
builder: BlockBuilder,
first_key: Vec<u8>,
@@ -15,6 +16,7 @@ pub struct SsTableBuilder {
}
impl SsTableBuilder {
/// Create a builder based on target SST size and target block size.
pub fn new(target_size: usize, block_size: usize) -> Self {
Self {
data: Vec::new(),
@@ -26,6 +28,7 @@ impl SsTableBuilder {
}
}
/// Adds a key-value pair to SSTable, return false when SST full.
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
if self.data.len() > self.target_size {
@@ -59,6 +62,7 @@ impl SsTableBuilder {
self.data.extend(encoded_block);
}
/// Builds the SSTable and writes it to the given path. No need to actually write to disk until chapter 4 block cache.
pub fn build(mut self, path: impl AsRef<Path>) -> Result<SsTable> {
self.finish_block();
let mut buf = self.data;

View File

@@ -4,6 +4,7 @@ use std::sync::Arc;
use super::SsTable;
use crate::block::BlockIterator;
/// An iterator over the contents of an SSTable.
pub struct SsTableIterator {
table: Arc<SsTable>,
blk_iter: BlockIterator,
@@ -18,6 +19,7 @@ impl SsTableIterator {
))
}
/// Create a new iterator and seek to the first key-value pair.
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&table)?;
let iter = Self {
@@ -28,6 +30,7 @@ impl SsTableIterator {
Ok(iter)
}
/// Seek to the first key-value pair.
pub fn seek_to_first(&mut self) -> Result<()> {
let (blk_idx, blk_iter) = Self::seek_to_first_inner(&self.table)?;
self.blk_idx = blk_idx;
@@ -47,6 +50,7 @@ impl SsTableIterator {
Ok((blk_idx, blk_iter))
}
/// Create a new iterator and seek to the first key-value pair which >= `key`.
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: &[u8]) -> Result<Self> {
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&table, key)?;
let iter = Self {
@@ -57,6 +61,7 @@ impl SsTableIterator {
Ok(iter)
}
/// Seek to the first key-value pair which >= `key`.
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
let (blk_idx, blk_iter) = Self::seek_to_key_inner(&self.table, key)?;
self.blk_iter = blk_iter;
@@ -64,18 +69,22 @@ impl SsTableIterator {
Ok(())
}
/// Get the current key.
pub fn key(&self) -> &[u8] {
self.blk_iter.key()
}
/// Get the current value.
pub fn value(&self) -> &[u8] {
self.blk_iter.value()
}
/// Check if the iterator is valid.
pub fn is_valid(&self) -> bool {
self.blk_iter.is_valid()
}
/// Move to the next key-value pair.
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<()> {
self.blk_iter.next();

View File

@@ -17,6 +17,10 @@ enum Action {
Show,
/// Run CI jobs
Ci,
/// Sync starter repo and reference solution.
Sync,
/// Check starter code
Scheck,
}
/// Simple program to greet a person
@@ -36,6 +40,16 @@ fn switch_to_workspace_root() -> Result<()> {
Ok(())
}
fn switch_to_starter_root() -> Result<()> {
std::env::set_current_dir(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.ok_or_else(|| anyhow!("failed to find the workspace root"))?
.join("mini-lsm-starter"),
)?;
Ok(())
}
fn fmt() -> Result<()> {
println!("{}", style("cargo fmt").bold());
cmd!("cargo", "fmt").run()?;
@@ -78,6 +92,28 @@ fn serve_book() -> Result<()> {
Ok(())
}
fn sync() -> Result<()> {
cmd!("mkdir", "-p", "sync-tmp").run()?;
cmd!("cp", "-a", "mini-lsm-starter/", "sync-tmp/mini-lsm-starter").run()?;
let cargo_toml = "sync-tmp/mini-lsm-starter/Cargo.toml";
std::fs::write(
cargo_toml,
std::fs::read_to_string(cargo_toml)?.replace("mini-lsm-starter", "mini-lsm")
+ "\n[workspace]\n",
)?;
cmd!(
"cargo",
"semver-checks",
"check-release",
"--manifest-path",
cargo_toml,
"--baseline-root",
"mini-lsm/Cargo.toml",
)
.run()?;
Ok(())
}
fn main() -> Result<()> {
let args = Args::parse();
@@ -87,6 +123,8 @@ fn main() -> Result<()> {
cmd!("cargo", "install", "cargo-nextest", "--locked").run()?;
println!("{}", style("cargo install mdbook mdbook-toc").bold());
cmd!("cargo", "install", "mdbook", "mdbook-toc", "--locked").run()?;
println!("{}", style("cargo install cargo-semver-checks").bold());
cmd!("cargo", "install", "cargo-semver-checks", "--locked").run()?;
}
Action::Check => {
switch_to_workspace_root()?;
@@ -95,6 +133,13 @@ fn main() -> Result<()> {
test()?;
clippy()?;
}
Action::Scheck => {
switch_to_starter_root()?;
fmt()?;
check()?;
test()?;
clippy()?;
}
Action::Book => {
switch_to_workspace_root()?;
serve_book()?;
@@ -111,6 +156,10 @@ fn main() -> Result<()> {
println!("CARGO_MANIFEST_DIR={}", env!("CARGO_MANIFEST_DIR"));
println!("PWD={:?}", std::env::current_dir()?);
}
Action::Sync => {
switch_to_workspace_root()?;
sync()?;
}
}
Ok(())