feat(docs): finish part 2

Signed-off-by: Alex Chi <iskyzh@gmail.com>
This commit is contained in:
Alex Chi
2022-12-24 15:34:34 -05:00
parent 3ed6204400
commit f7b6d9a847
18 changed files with 601 additions and 29 deletions

457
Cargo.lock generated
View File

@@ -38,12 +38,55 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bumpalo"
version = "3.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
[[package]]
name = "bytecount"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
[[package]]
name = "bytes"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
[[package]]
name = "camino"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ad0e1e3e88dd237a156ab9f571021b8a158caa0ae44b1968a241efb5144c1e"
dependencies = [
"serde",
]
[[package]]
name = "cargo-platform"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27"
dependencies = [
"serde",
]
[[package]]
name = "cargo_metadata"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa"
dependencies = [
"camino",
"cargo-platform",
"semver",
"serde",
"serde_json",
]
[[package]]
name = "cc"
version = "1.0.78"
@@ -107,6 +150,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.13"
@@ -180,6 +233,41 @@ dependencies = [
"libc",
]
[[package]]
name = "error-chain"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc"
dependencies = [
"version_check",
]
[[package]]
name = "fastrand"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
dependencies = [
"instant",
]
[[package]]
name = "getrandom"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
dependencies = [
"cfg-if",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "heck"
version = "0.4.0"
@@ -195,6 +283,15 @@ dependencies = [
"libc",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "io-lifetimes"
version = "1.0.3"
@@ -217,6 +314,21 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "itoa"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -245,6 +357,30 @@ dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "mach"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
dependencies = [
"libc",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.7.1"
@@ -263,8 +399,10 @@ dependencies = [
"bytes",
"crossbeam-epoch",
"crossbeam-skiplist",
"moka",
"ouroboros",
"parking_lot",
"tempfile",
]
[[package]]
@@ -276,8 +414,10 @@ dependencies = [
"bytes",
"crossbeam-epoch",
"crossbeam-skiplist",
"moka",
"ouroboros",
"parking_lot",
"tempfile",
]
[[package]]
@@ -290,6 +430,39 @@ dependencies = [
"duct",
]
[[package]]
name = "moka"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b49a05f67020456541f4f29cbaa812016a266a86ec76f96d3873d459c68fe5e"
dependencies = [
"crossbeam-channel",
"crossbeam-epoch",
"crossbeam-utils",
"num_cpus",
"once_cell",
"parking_lot",
"quanta",
"rustc_version",
"scheduled-thread-pool",
"skeptic",
"smallvec",
"tagptr",
"thiserror",
"triomphe",
"uuid",
]
[[package]]
name = "num_cpus"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "once_cell"
version = "1.16.0"
@@ -391,6 +564,33 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "pulldown-cmark"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d9cc634bc78768157b5cbfe988ffcd1dcba95cd2b2f03a88316c08c6d00ed63"
dependencies = [
"bitflags",
"memchr",
"unicase",
]
[[package]]
name = "quanta"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27"
dependencies = [
"crossbeam-utils",
"libc",
"mach",
"once_cell",
"raw-cpuid",
"wasi 0.10.2+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]]
name = "quote"
version = "1.0.23"
@@ -400,6 +600,15 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "raw-cpuid"
version = "10.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb"
dependencies = [
"bitflags",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
@@ -409,6 +618,24 @@ dependencies = [
"bitflags",
]
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.36.5"
@@ -423,12 +650,76 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "ryu"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scheduled-thread-pool"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf"
dependencies = [
"parking_lot",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fed41fc1a24994d044e6db6935e69511a1153b52c15eb42493b26fa87feba0"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "255abe9a125a985c05190d687b320c12f9b1f0b99445e608c21ba0782c719ad8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "shared_child"
version = "1.0.0"
@@ -439,6 +730,21 @@ dependencies = [
"winapi",
]
[[package]]
name = "skeptic"
version = "0.13.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8"
dependencies = [
"bytecount",
"cargo_metadata",
"error-chain",
"glob",
"pulldown-cmark",
"tempfile",
"walkdir",
]
[[package]]
name = "smallvec"
version = "1.10.0"
@@ -462,6 +768,26 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tagptr"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "tempfile"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
dependencies = [
"cfg-if",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.3"
@@ -481,6 +807,41 @@ dependencies = [
"winapi",
]
[[package]]
name = "thiserror"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "triomphe"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1ee9bd9239c339d714d657fac840c6d2a4f9c45f4f9ec7b0975113458be78db"
[[package]]
name = "unicase"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
dependencies = [
"version_check",
]
[[package]]
name = "unicode-ident"
version = "1.0.6"
@@ -493,12 +854,108 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "uuid"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
dependencies = [
"getrandom",
]
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "web-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"

View File

@@ -26,12 +26,12 @@ The tutorial has 8 parts (which can be finished in 7 days):
* Day 1: Block encoding. SSTs are composed of multiple data blocks. We will implement the block encoding.
* Day 2: SST encoding.
* Day 3: Engine. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `delete`
API.
* Day 4: Block cache. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache for the
LSM tree.
* Day 3: MemTable and Merge Iterators.
* Day 4: Block cache and Engine. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache
* for the LSM tree. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `scan`,
`delete` API.
* Day 5: Compaction. Now it's time to maintain a leveled structure for SSTs.
* Day 6: Recovery. We will implement WAL and manifest so that the engine can recover after restart.
* Day 7: Bloom filter and key compression. They are widely-used optimizations in LSM tree structures.
We have reference solution up to day 3 and tutorial up to day 1 for now.
We have reference solution up to day 3 and tutorial up to day 2 for now.

View File

@@ -84,10 +84,10 @@ In this tutorial, we will build the LSM tree structure in 7 days:
* Day 1: Block encoding. SSTs are composed of multiple data blocks. We will implement the block encoding.
* Day 2: SST encoding.
* Day 3: Engine. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `delete`
API.
* Day 4: Block cache. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache for the
LSM tree.
* Day 3: MemTable and Merge Iterators.
* Day 4: Block cache and Engine. To reduce disk I/O and maximize performance, we will use moka-rs to build a block cache
* for the LSM tree. In this day we will get a functional (but not persistent) key-value engine with `get`, `put`, `scan`,
`delete` API.
* Day 5: Compaction. Now it's time to maintain a leveled structure for SSTs.
* Day 6: Recovery. We will implement WAL and manifest so that the engine can recover after restart.
* Day 7: Bloom filter and key compression. They are widely-used optimizations in LSM tree structures.

View File

@@ -87,6 +87,8 @@ After implementing this part, you should be able to pass all tests in `block/tes
## Extra Tasks
Here is a list of extra tasks you can do to make the block encoding more robust and efficient.
*Note: Some test cases might not pass after implementing this part. You might need to write your own test cases.*
* Implement block checksum. Verify checksum when decoding the block.

View File

@@ -1 +1,74 @@
# SST Builder and SST Iterator
<!-- toc -->
In this part, you will need to modify:
* `src/table/builder.rs`
* `src/table/iterator.rs`
* `src/table.rs`
You can use `cargo x copy-test day2` to copy our provided test cases to the starter code directory. After you have
finished this part, use `cargo x scheck` to check the style and run all test cases. If you want to write your own
test cases, write a new module `#[cfg(test)] mod user_tests { /* your test cases */ }` in `table.rs`. Remember to remove
`#![allow(...)]` at the top of the modules you modified so that cargo clippy can actually check the styles.
## Task 1 - SST Builder
SST is composed of data blocks and index blocks stored on the disk. Usually, data blocks are lazily loaded -- they will
not be loaded into the memory until a user requests it. Index blocks can also be loaded on-demand, but in this tutorial,
we make simple assumptions that all SST index blocks (meta blocks) can fit in memory. Generally, an SST file is of 256MB
size.
The SST builder is similar to block builder -- users will call `add` on the builder. You should maintain a `BlockBuilder`
inside SST builder and split block when necessary. Also, you will need to maintain block metadata `BlockMeta`, which
includes the first key in each block and the offset of each block. The `build` function will encode the SST, write
everything to disk using `FileObject::create`, and return an `SsTable` object. Note that in part 2, you don't need to
actually write the data to the disk. Just store everything in memory as a vector until we implement a block cache.
The encoding of SST is like:
```
| data block | data block | data block | data block | meta block | meta block offset (u32) |
```
You also need to implement `estimated_size` function of `SsTableBuilder`, so that the caller can know when can it start
a new SST to write data. The function don't need to be very accurate. Given the assumption that data blocks contain much
more data than meta block, we can simply return the size of data blocks for `estimated_size`.
You can also align blocks to 4KB boundary so as to make it possible to do direct I/O in the future. This is an optional
optimization.
## Task 2 - SST Iterator
Like `BlockIteartor`, you will need to implement an iterator over an SST. Note that you should load data on demand. For
example, if your iterator is at block 1, it should not hold any other block content in memory until it reaches the next
block.
`SsTableIterator` should implement the `StorageIterator` trait, so that it can be composed with other iterators in the
future.
One thing to note is `seek_to_key` function. Basically, you will need to do binary search on block metadata to find
which block might possibly contain the key. It is possible that the key doesn't exist in the LSM tree so that the
block iterator will be invalid immediately after a seek. For example,
```
| block 1 | block 2 | block meta |
| a, b, c | e, f, g | 1: a, 2: e |
```
If we do `seek(b)` in this SST, it is quite simple -- using binary search, we can know block 1 contains keys `a <= keys
< e`. Therefore, we load block 1 and seek the block iterator to the corresponding position.
But if we do `seek(d)`, we will position to block 1, but seeking `d` in block 1 will reach the end of the block.
Therefore, we should check if the iterator is invalid after seek, and switch to the next block if necessary.
## Extra Tasks
Here is a list of extra tasks you can do to make the block encoding more robust and efficient.
*Note: Some test cases might not pass after implementing this part. You might need to write your own test cases.*
* Implement index checksum. Verify checksum when decoding.
* Explore different SST encoding and layout. For example, in the [Lethe](https://disc-projects.bu.edu/lethe/) paper,
the author adds secondary key support to SST.

View File

@@ -1 +0,0 @@
# Mem Table and Storage Engine

View File

@@ -0,0 +1,28 @@
# Mem Table and Merge Iterators
<!-- toc -->
In this part, you will need to modify:
* `src/iterators/merge_iterator.rs`
* `src/iterators/two_merge_iterator.rs`
* `src/mem_table.rs`
You can use `cargo x copy-test day3` to copy our provided test cases to the starter code directory. After you have
finished this part, use `cargo x scheck` to check the style and run all test cases. If you want to write your own
test cases, write a new module `#[cfg(test)] mod user_tests { /* your test cases */ }` in `table.rs`. Remember to remove
`#![allow(...)]` at the top of the modules you modified so that cargo clippy can actually check the styles.
This is the last part for the basic building blocks of an LSM tree. After implementing the merge iterators, we can
easily merge data from different part of the data structure (mem table + SST) and get an iterator over all data. And
in part 4, we will compose all these things together to make a real storage engine.
## Task 1 - Mem Table
## Task 2 - Mem Table Iterator
## Task 3 - Two-Merge Iterator
## Task 4 - Merge Iterator
## Extra Tasks

View File

@@ -1 +0,0 @@
# Block Cache

View File

@@ -0,0 +1 @@
# Storage Engine and Block Cache

View File

@@ -6,8 +6,8 @@
- [Store key-value pairs in little blocks](./01-block.md)
- [And make them into an SST](./02-sst.md)
- [Now it's time for a storage engine](./03-engine.md)
- [Block cache, the good way](./04-block-cache.md)
- [Now it's time for merging everything](./03-memtable.md)
- [Block cache, the good way](./04-engine.md)
- [Let's do something in the background](./05-compaction.md)
- [Be careful when the system crashes](./06-recovery.md)
- [A good bloom filter makes life easier](./07-bloom-filter.md)

View File

@@ -12,3 +12,7 @@ crossbeam-epoch = "0.9"
crossbeam-skiplist = "0.1"
parking_lot = "0.12"
ouroboros = "0.15"
moka = "0.9"
[dev-dependencies]
tempfile = "3"

View File

@@ -1,9 +1,10 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
use crate::iterators::impls::StorageIterator;
use anyhow::Result;
use crate::iterators::impls::StorageIterator;
pub struct LsmIterator {}
impl StorageIterator for LsmIterator {
@@ -24,7 +25,8 @@ impl StorageIterator for LsmIterator {
}
}
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is invalid.
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is
/// invalid.
pub struct FusedIterator<I: StorageIterator> {
iter: I,
}

View File

@@ -14,7 +14,7 @@ pub struct SsTableBuilder {
}
impl SsTableBuilder {
/// Create a builder based on target SST size and target block size.
/// Create a builder based on target block size.
pub fn new(block_size: usize) -> Self {
unimplemented!()
}

View File

@@ -6,6 +6,7 @@ use std::sync::Arc;
use anyhow::Result;
use super::SsTable;
use crate::iterators::impls::StorageIterator;
/// An iterator over the contents of an SSTable.
pub struct SsTableIterator {}
@@ -30,25 +31,22 @@ impl SsTableIterator {
pub fn seek_to_key(&mut self, key: &[u8]) -> Result<()> {
unimplemented!()
}
}
/// Get the current key.
pub fn key(&self) -> &[u8] {
impl StorageIterator for SsTableIterator {
fn value(&self) -> &[u8] {
unimplemented!()
}
/// Get the current value.
pub fn value(&self) -> &[u8] {
fn key(&self) -> &[u8] {
unimplemented!()
}
/// Check if the iterator is valid.
pub fn is_valid(&self) -> bool {
fn is_valid(&self) -> bool {
unimplemented!()
}
/// Move to the next key-value pair.
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<()> {
fn next(&mut self) -> Result<()> {
unimplemented!()
}
}

View File

@@ -8,8 +8,6 @@ license = { workspace = true }
repository = { workspace = true }
description = "A tutorial for building an LSM tree storage engine in a week."
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1"
arc-swap = "1"
@@ -18,3 +16,7 @@ crossbeam-epoch = "0.9"
crossbeam-skiplist = "0.1"
parking_lot = "0.12"
ouroboros = "0.15"
moka = "0.9"
[dev-dependencies]
tempfile = "3"

View File

@@ -71,7 +71,8 @@ impl StorageIterator for LsmIterator {
}
}
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is invalid.
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is
/// invalid.
pub struct FusedIterator<I: StorageIterator> {
iter: I,
}

View File

@@ -16,7 +16,7 @@ pub struct SsTableBuilder {
}
impl SsTableBuilder {
/// Create a builder based on target SST size and target block size.
/// Create a builder based on target block size.
pub fn new(block_size: usize) -> Self {
Self {
data: Vec::new(),

View File

@@ -161,6 +161,12 @@ fn copy_test_case(test: CopyTestAction) -> Result<()> {
"mini-lsm-starter/src/iterators/tests/two_merge_iterator_test.rs"
)
.run()?;
cmd!(
"cp",
"mini-lsm/src/iterators/tests.rs",
"mini-lsm-starter/src/iterators/tests.rs"
)
.run()?;
}
}
Ok(())