From 68303e99b8e4506bf96f091f0222858bd61b159c Mon Sep 17 00:00:00 2001 From: Alex Chi Date: Fri, 23 Dec 2022 14:29:32 -0500 Subject: [PATCH] initial commit Signed-off-by: Alex Chi --- .cargo/config.toml | 3 + .gitignore | 1 + .vscode/settings.json | 3 + Cargo.lock | 419 +++++++++++++++++++++++++++++++++ Cargo.toml | 13 + mini-lsm-book/.gitignore | 1 + mini-lsm-book/book.toml | 10 + mini-lsm-book/src/SUMMARY.md | 3 + mini-lsm-book/src/chapter_1.md | 1 + mini-lsm/Cargo.toml | 13 + mini-lsm/src/block.rs | 132 +++++++++++ mini-lsm/src/block/builder.rs | 57 +++++ mini-lsm/src/block/iterator.rs | 110 +++++++++ mini-lsm/src/lib.rs | 1 + rust-toolchain | 1 + xtask/Cargo.toml | 16 ++ xtask/src/main.rs | 65 +++++ 17 files changed, 849 insertions(+) create mode 100644 .cargo/config.toml create mode 100644 .gitignore create mode 100644 .vscode/settings.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 mini-lsm-book/.gitignore create mode 100644 mini-lsm-book/book.toml create mode 100644 mini-lsm-book/src/SUMMARY.md create mode 100644 mini-lsm-book/src/chapter_1.md create mode 100644 mini-lsm/Cargo.toml create mode 100644 mini-lsm/src/block.rs create mode 100644 mini-lsm/src/block/builder.rs create mode 100644 mini-lsm/src/block/iterator.rs create mode 100644 mini-lsm/src/lib.rs create mode 100644 rust-toolchain create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/main.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..776c9c9 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,3 @@ +[alias] +xtask = "run --package mini-lsm-xtask --" +x = "run --package mini-lsm-xtask --" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..23fd35f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "editor.formatOnSave": true +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..26bc7ff --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,419 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bytes" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" + +[[package]] +name = "cc" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" + +[[package]] +name = "clap" +version = "4.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" +dependencies = [ + "bitflags", + "clap_derive", + "clap_lex", + "is-terminal", + "once_cell", + "strsim", + "termcolor", +] + +[[package]] +name = "clap_derive" +version = "4.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "console" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "terminal_size", + "unicode-width", + "winapi", +] + +[[package]] +name = "duct" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ae3fc31835f74c2a7ceda3aeede378b0ae2e74c8f1c36559fcc9ae2a4e7d3e" +dependencies = [ + "libc", + "once_cell", + "os_pipe", + "shared_child", +] + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "is-terminal" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +dependencies = [ + "hermit-abi", + "io-lifetimes", + "rustix", + "windows-sys", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + +[[package]] +name = "mini-lsm" +version = "0.1.0" +dependencies = [ + "bytes", +] + +[[package]] +name = "mini-lsm-xtask" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "console", + "duct", +] + +[[package]] +name = "once_cell" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" + +[[package]] +name = "os_pipe" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6a252f1f8c11e84b3ab59d7a488e48e4478a93937e027076638c49536204639" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "os_str_bytes" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustix" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "shared_child" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0d94659ad3c2137fef23ae75b03d5241d633f8acded53d672decfa0e6e0caef" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..15207d2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[workspace] +members = [ + "mini-lsm", + "xtask" +] + +[workspace.package] +version = "0.1.0" +edition = "2021" +homepage = "https://github.com/skyzh/mini-lsm" +keywords = ["storage", "database", "tutorial"] +license = "Apache-2.0" +repository = "https://github.com/skyzh/mini-lsm" diff --git a/mini-lsm-book/.gitignore b/mini-lsm-book/.gitignore new file mode 100644 index 0000000..7585238 --- /dev/null +++ b/mini-lsm-book/.gitignore @@ -0,0 +1 @@ +book diff --git a/mini-lsm-book/book.toml b/mini-lsm-book/book.toml new file mode 100644 index 0000000..7c7ce5f --- /dev/null +++ b/mini-lsm-book/book.toml @@ -0,0 +1,10 @@ +[book] +authors = ["Alex Chi"] +language = "en" +multilingual = false +src = "src" +title = "LSM in a Week" + +[preprocessor.toc] +command = "mdbook-toc" +renderer = ["html"] diff --git a/mini-lsm-book/src/SUMMARY.md b/mini-lsm-book/src/SUMMARY.md new file mode 100644 index 0000000..7390c82 --- /dev/null +++ b/mini-lsm-book/src/SUMMARY.md @@ -0,0 +1,3 @@ +# Summary + +- [Chapter 1](./chapter_1.md) diff --git a/mini-lsm-book/src/chapter_1.md b/mini-lsm-book/src/chapter_1.md new file mode 100644 index 0000000..b743fda --- /dev/null +++ b/mini-lsm-book/src/chapter_1.md @@ -0,0 +1 @@ +# Chapter 1 diff --git a/mini-lsm/Cargo.toml b/mini-lsm/Cargo.toml new file mode 100644 index 0000000..77c4a9f --- /dev/null +++ b/mini-lsm/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "mini-lsm" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +repository = { workspace = true } + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytes = "1" diff --git a/mini-lsm/src/block.rs b/mini-lsm/src/block.rs new file mode 100644 index 0000000..7d78a8b --- /dev/null +++ b/mini-lsm/src/block.rs @@ -0,0 +1,132 @@ +mod builder; +mod iterator; + +use bytes::{Buf, BufMut, Bytes}; + +pub use builder::BlockBuilder; +pub use iterator::BlockIterator; + +pub const SIZEOF_U16: usize = std::mem::size_of::(); + +/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs. +pub struct Block { + pub(self) data: Vec, + pub(self) offsets: Vec, +} + +impl Block { + pub fn encode(&self) -> Bytes { + let mut buf = self.data.clone(); + let offsets_len = self.offsets.len(); + for offset in &self.offsets { + buf.put_u16(*offset); + } + buf.put_u16(offsets_len as u16); + buf.into() + } + + pub fn decode(data: &[u8]) -> Self { + let entry_offsets_len = (&data[data.len() - SIZEOF_U16..]).get_u16() as usize; + let data_end = data.len() - SIZEOF_U16 - entry_offsets_len * SIZEOF_U16; + let offsets_raw = &data[data_end..data.len() - SIZEOF_U16]; + let offsets = offsets_raw + .chunks(SIZEOF_U16) + .map(|mut x| x.get_u16()) + .collect(); + let data = data[0..data_end].to_vec(); + Self { data, offsets } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::{builder::BlockBuilder, iterator::BlockIterator, *}; + + #[test] + fn test_block_build_single_key() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"233", b"233333")); + builder.build(); + } + + #[test] + fn test_block_build_full() { + let mut builder = BlockBuilder::new(16); + assert!(builder.add(b"11", b"11")); + assert!(!builder.add(b"22", b"22")); + builder.build(); + } + + fn key_of(idx: usize) -> Vec { + format!("key_{:03}", idx).into_bytes() + } + + fn value_of(idx: usize) -> Vec { + format!("value_{:010}", idx).into_bytes() + } + + fn num_of_keys() -> usize { + 100 + } + + fn generate_block() -> Block { + let mut builder = BlockBuilder::new(10000); + for idx in 0..num_of_keys() { + let key = key_of(idx); + let value = value_of(idx); + assert!(builder.add(&key[..], &value[..])); + } + builder.build() + } + + #[test] + fn test_block_build_all() { + generate_block(); + } + + #[test] + fn test_block_encode() { + let block = generate_block(); + block.encode(); + } + + #[test] + fn test_block_decode() { + let block = generate_block(); + let encoded = block.encode(); + let decoded_block = Block::decode(&encoded); + assert_eq!(block.offsets, decoded_block.offsets); + assert_eq!(block.data, decoded_block.data); + } + + fn as_bytes(x: &[u8]) -> Bytes { + Bytes::copy_from_slice(x) + } + + #[test] + fn test_block_iterator() { + let block = Arc::new(generate_block()); + let mut iter = BlockIterator::create_and_seek_to_first(block); + for i in 0..num_of_keys() { + let key = iter.key(); + let value = iter.value(); + assert_eq!( + key, + key_of(i), + "expected key: {:?}, actual key: {:?}", + as_bytes(&key_of(i)), + as_bytes(key) + ); + assert_eq!( + value, + value_of(i), + "expected value: {:?}, actual value: {:?}", + as_bytes(&value_of(i)), + as_bytes(value) + ); + iter.next(); + } + } +} diff --git a/mini-lsm/src/block/builder.rs b/mini-lsm/src/block/builder.rs new file mode 100644 index 0000000..2ab036b --- /dev/null +++ b/mini-lsm/src/block/builder.rs @@ -0,0 +1,57 @@ +use bytes::BufMut; + +use super::{Block, SIZEOF_U16}; + +/// Builds a block +pub struct BlockBuilder { + offsets: Vec, + data: Vec, + target_size: usize, +} + +impl BlockBuilder { + /// Creates a new block builder + pub fn new(target_size: usize) -> Self { + Self { + offsets: Vec::new(), + data: Vec::new(), + target_size, + } + } + + fn estimated_size(&self) -> usize { + self.offsets.len() * SIZEOF_U16 + self.data.len() + SIZEOF_U16 + } + + /// Adds a key-value pair to the block + #[must_use] + pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool { + assert!(!key.is_empty(), "key must not be empty"); + if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.target_size + && !self.is_empty() + { + return false; + } + self.offsets.push(self.data.len() as u16); + self.data.put_u16(key.len() as u16); + self.data.put(key); + self.data.put_u16(value.len() as u16); + self.data.put(value); + true + } + + pub fn is_empty(&self) -> bool { + self.offsets.is_empty() + } + + /// Builds a block + pub fn build(self) -> Block { + if self.is_empty() { + panic!("block should not be empty"); + } + Block { + data: self.data, + offsets: self.offsets, + } + } +} diff --git a/mini-lsm/src/block/iterator.rs b/mini-lsm/src/block/iterator.rs new file mode 100644 index 0000000..62645d2 --- /dev/null +++ b/mini-lsm/src/block/iterator.rs @@ -0,0 +1,110 @@ +use std::sync::Arc; + +use bytes::Buf; + +use super::Block; + +pub struct BlockIterator { + block: Arc, + key: Vec, + value: Vec, + idx: usize, +} + +impl BlockIterator { + fn new(block: Arc) -> Self { + Self { + block, + key: Vec::new(), + value: Vec::new(), + idx: 0, + } + } + + pub fn create_and_seek_to_first(block: Arc) -> Self { + let mut iter = Self::new(block); + iter.seek_to_first(); + iter + } + + pub fn create_and_seek_to_key(block: Arc, key: &[u8]) -> Self { + let mut iter = Self::new(block); + iter.seek_to_key(key); + iter + } + + pub fn key(&self) -> &[u8] { + debug_assert!(!self.key.is_empty(), "invalid iterator"); + &self.key + } + + pub fn value(&self) -> &[u8] { + debug_assert!(!self.key.is_empty(), "invalid iterator"); + &self.value + } + + pub fn is_valid(&self) -> bool { + !self.key.is_empty() + } + + pub fn seek_to_first(&mut self) { + self.seek_to(0); + } + + pub fn seek_to_last(&mut self) { + self.seek_to(self.block.offsets.len() - 1); + } + + pub fn len(&self) -> usize { + self.block.offsets.len() + } + + pub fn is_empty(&self) -> bool { + self.block.offsets.is_empty() + } + + pub fn seek_to(&mut self, idx: usize) { + if idx >= self.block.offsets.len() { + self.key.clear(); + self.value.clear(); + return; + } + let offset = self.block.offsets[idx] as usize; + self.seek_to_offset(offset); + self.idx = idx; + } + + pub fn next(&mut self) { + self.idx += 1; + self.seek_to(self.idx); + } + + fn seek_to_offset(&mut self, offset: usize) { + let mut entry = &self.block.data[offset..]; + let key_len = entry.get_u16() as usize; + let key = entry[..key_len].to_vec(); + entry.advance(key_len); + self.key.clear(); + self.key.extend(key); + let value_len = entry.get_u16() as usize; + let value = entry[..value_len].to_vec(); + entry.advance(value_len); + self.value.clear(); + self.value.extend(value); + } + + pub fn seek_to_key(&mut self, key: &[u8]) { + let mut low = 0; + let mut high = self.block.offsets.len(); + while low < high { + let mid = low + (high - low) / 2; + self.seek_to(mid); + assert!(self.is_valid()); + match self.key().cmp(key) { + std::cmp::Ordering::Less => low = mid + 1, + std::cmp::Ordering::Greater => high = mid, + std::cmp::Ordering::Equal => return, + } + } + } +} diff --git a/mini-lsm/src/lib.rs b/mini-lsm/src/lib.rs new file mode 100644 index 0000000..a863eaa --- /dev/null +++ b/mini-lsm/src/lib.rs @@ -0,0 +1 @@ +pub mod block; diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..8483d9f --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly-2022-11-11 diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 0000000..5dc5483 --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "mini-lsm-xtask" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +repository = { workspace = true } + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1" +console = "0.15" +clap = { version = "4", features = ["derive"] } +duct = "0.13" diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 0000000..1e6808a --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,65 @@ +use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use clap::Parser; +use console::style; +use duct::cmd; + +#[derive(clap::Subcommand, Debug)] +enum Action { + /// Check. + Check, + /// Install necessary tools for development. + InstallTools, + /// Show environment variables. + Show, +} + +/// Simple program to greet a person +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + #[command(subcommand)] + action: Action, +} + +fn switch_to_workspace_root() -> Result<()> { + std::env::set_current_dir( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .ok_or_else(|| anyhow!("failed to find the workspace root"))?, + )?; + Ok(()) +} + +fn main() -> Result<()> { + let args = Args::parse(); + + match args.action { + Action::InstallTools => { + println!("{}", style("cargo install cargo-nextest").bold()); + cmd!("cargo", "install", "cargo-nextest", "--locked").run()?; + println!("{}", style("cargo install mdbook mdbook-toc").bold()); + cmd!("cargo", "install", "mdbook", "mdbook-toc", "--locked").run()?; + } + Action::Check => { + switch_to_workspace_root()?; + println!("{}", style("cargo fmt").bold()); + cmd!("cargo", "fmt").run()?; + println!("{}", style("cargo check").bold()); + cmd!("cargo", "check", "--all-targets").run()?; + println!("{}", style("cargo nextest run").bold()); + cmd!("cargo", "nextest", "run").run()?; + println!("{}", style("cargo clippy").bold()); + cmd!("cargo", "clippy", "--all-targets").run()?; + println!("{}", style("mdbook build").bold()); + cmd!("mdbook", "build").dir("mini-lsm-book").run()?; + } + Action::Show => { + println!("CARGO_MANIFEST_DIR={}", env!("CARGO_MANIFEST_DIR")); + println!("PWD={:?}", std::env::current_dir()?); + } + } + + Ok(()) +}