From 71342d4384b6eb49572e12f2e854e751d0f91d62 Mon Sep 17 00:00:00 2001 From: Yu Lei Date: Tue, 30 Jan 2024 17:04:25 +0800 Subject: [PATCH] feat: refine the CLI tool (#33) * implement a repl * remove debug log --- Cargo.lock | 166 +++++++++- mini-lsm-mvcc/Cargo.toml | 2 + mini-lsm-starter/Cargo.toml | 2 + mini-lsm-starter/src/bin/mini-lsm-cli.rs | 372 +++++++++++++++++------ mini-lsm/Cargo.toml | 2 + 5 files changed, 449 insertions(+), 95 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 66dd13e..678ceac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,6 +80,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + [[package]] name = "bumpalo" version = "3.11.1" @@ -175,6 +181,15 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +[[package]] +name = "clipboard-win" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c57002a5d9be777c1ef967e33674dac9ebd310d8893e4e3437b14d5f0f6372cc" +dependencies = [ + "error-code", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -268,6 +283,22 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "error-chain" version = "0.12.4" @@ -277,6 +308,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "error-code" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "281e452d3bad4005426416cdba5ccfd4f5c1280e10099e21db27f7c1c28347fc" + [[package]] name = "farmhash" version = "1.1.5" @@ -292,6 +329,17 @@ dependencies = [ "instant", ] +[[package]] +name = "fd-lock" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "getrandom" version = "0.2.8" @@ -324,6 +372,15 @@ dependencies = [ "libc", ] +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "instant" version = "0.1.12" @@ -365,9 +422,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -426,9 +489,11 @@ dependencies = [ "crossbeam-skiplist", "farmhash", "moka", + "nom", "ouroboros", "parking_lot", "rand", + "rustyline", "serde", "serde_json", "tempfile", @@ -448,9 +513,11 @@ dependencies = [ "crossbeam-skiplist", "farmhash", "moka", + "nom", "ouroboros", "parking_lot", "rand", + "rustyline", "serde", "serde_json", "tempfile", @@ -469,9 +536,11 @@ dependencies = [ "crossbeam-skiplist", "farmhash", "moka", + "nom", "ouroboros", "parking_lot", "rand", + "rustyline", "serde", "serde_json", "tempfile", @@ -487,6 +556,12 @@ dependencies = [ "duct", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "moka" version = "0.9.6" @@ -510,6 +585,36 @@ dependencies = [ "uuid", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num_cpus" version = "1.15.0" @@ -618,7 +723,7 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d9cc634bc78768157b5cbfe988ffcd1dcba95cd2b2f03a88316c08c6d00ed63" dependencies = [ - "bitflags", + "bitflags 1.3.2", "memchr", "unicase", ] @@ -648,6 +753,16 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -684,7 +799,7 @@ version = "10.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -693,7 +808,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -714,6 +829,41 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" +dependencies = [ + "bitflags 2.4.2", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustyline" +version = "13.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02a2d683a4ac90aeef5b1013933f6d977bd37d51ff3f4dad829d4931a7e6be86" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "winapi", +] + [[package]] name = "ryu" version = "1.0.12" @@ -920,6 +1070,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/mini-lsm-mvcc/Cargo.toml b/mini-lsm-mvcc/Cargo.toml index 15213c3..00b2e5b 100644 --- a/mini-lsm-mvcc/Cargo.toml +++ b/mini-lsm-mvcc/Cargo.toml @@ -25,6 +25,8 @@ serde_json = { version = "1.0" } serde = { version = "1.0", features = ["derive"] } farmhash = "1" crc32fast = "1.3.2" +nom = "7.1.3" +rustyline = "13.0.0" [dev-dependencies] tempfile = "3" diff --git a/mini-lsm-starter/Cargo.toml b/mini-lsm-starter/Cargo.toml index 25943e5..e84ac93 100644 --- a/mini-lsm-starter/Cargo.toml +++ b/mini-lsm-starter/Cargo.toml @@ -19,6 +19,8 @@ crossbeam-channel = "0.5.11" serde_json = { version = "1.0" } serde = { version = "1.0", features = ["derive"] } farmhash = "1" +nom = "7.1.3" +rustyline = "13.0.0" [dev-dependencies] tempfile = "3" diff --git a/mini-lsm-starter/src/bin/mini-lsm-cli.rs b/mini-lsm-starter/src/bin/mini-lsm-cli.rs index f44ef91..e2529c7 100644 --- a/mini-lsm-starter/src/bin/mini-lsm-cli.rs +++ b/mini-lsm-starter/src/bin/mini-lsm-cli.rs @@ -1,4 +1,6 @@ mod wrapper; + +use rustyline::DefaultEditor; use wrapper::mini_lsm_wrapper; use anyhow::Result; @@ -11,6 +13,7 @@ use mini_lsm_wrapper::compact::{ use mini_lsm_wrapper::iterators::StorageIterator; use mini_lsm_wrapper::lsm_storage::{LsmStorageOptions, MiniLsm}; use std::path::PathBuf; +use std::sync::Arc; #[derive(Debug, Clone, ValueEnum)] enum CompactionStrategy { @@ -33,6 +36,279 @@ struct Args { serializable: bool, } +struct ReplHandler { + epoch: u64, + lsm: Arc, +} + +impl ReplHandler { + fn handle(&mut self, command: &Command) -> Result<()> { + match command { + Command::Fill { begin, end } => { + for i in *begin..=*end { + self.lsm.put( + format!("{}", i).as_bytes(), + format!("value{}@{}", i, self.epoch).as_bytes(), + )?; + } + + println!( + "{} values filled with epoch {}", + end - begin + 1, + self.epoch + ); + } + Command::Del { key } => { + self.lsm.delete(key.as_bytes())?; + println!("{} deleted", key); + } + Command::Get { key } => { + if let Some(value) = self.lsm.get(key.as_bytes())? { + println!("{}={:?}", key, value); + } else { + println!("{} not exist", key); + } + } + Command::Scan { begin, end } => match (begin, end) { + (None, None) => { + let mut iter = self + .lsm + .scan(std::ops::Bound::Unbounded, std::ops::Bound::Unbounded)?; + let mut cnt = 0; + while iter.is_valid() { + println!( + "{:?}={:?}", + Bytes::copy_from_slice(iter.key()), + Bytes::copy_from_slice(iter.value()), + ); + iter.next()?; + cnt += 1; + } + println!(); + println!("{} keys scanned", cnt); + } + (Some(begin), Some(end)) => { + let mut iter = self.lsm.scan( + std::ops::Bound::Included(begin.as_bytes()), + std::ops::Bound::Included(end.as_bytes()), + )?; + let mut cnt = 0; + while iter.is_valid() { + println!( + "{:?}={:?}", + Bytes::copy_from_slice(iter.key()), + Bytes::copy_from_slice(iter.value()), + ); + iter.next()?; + cnt += 1; + } + println!(); + println!("{} keys scanned", cnt); + } + _ => { + println!("invalid command"); + } + }, + Command::Dump => { + self.lsm.dump_structure(); + println!("dump success"); + } + Command::Flush => { + self.lsm.force_flush()?; + println!("flush success"); + } + Command::FullCompaction => { + self.lsm.force_full_compaction()?; + println!("full compaction success"); + } + Command::Quit | Command::Close => std::process::exit(0), + }; + + self.epoch += 1; + + Ok(()) + } +} + +#[derive(Debug)] +enum Command { + Fill { + begin: u64, + end: u64, + }, + Del { + key: String, + }, + Get { + key: String, + }, + Scan { + begin: Option, + end: Option, + }, + + Dump, + Flush, + FullCompaction, + Quit, + Close, +} + +impl Command { + pub fn parse(input: &str) -> Result { + use nom::bytes::complete::*; + use nom::character::complete::*; + + use nom::branch::*; + use nom::combinator::*; + use nom::sequence::*; + + let uint = |i| { + map_res(digit1::<&str, nom::error::Error<_>>, |s: &str| { + s.parse() + .map_err(|_| nom::error::Error::new(s, nom::error::ErrorKind::Digit)) + })(i) + }; + + let string = |i| { + map(take_till1(|c: char| c.is_whitespace()), |s: &str| { + s.to_string() + })(i) + }; + + let fill = |i| { + map( + tuple((tag_no_case("fill"), space1, uint, space1, uint)), + |(_, _, key, _, value)| Command::Fill { + begin: key, + end: value, + }, + )(i) + }; + + let del = |i| { + map( + tuple((tag_no_case("del"), space1, string)), + |(_, _, key)| Command::Del { key }, + )(i) + }; + + let get = |i| { + map( + tuple((tag_no_case("get"), space1, string)), + |(_, _, key)| Command::Get { key }, + )(i) + }; + + let scan = |i| { + map( + tuple(( + tag_no_case("scan"), + opt(tuple((space1, string, space1, string))), + )), + |(_, opt_args)| { + let (begin, end) = opt_args + .map_or((None, None), |(_, begin, _, end)| (Some(begin), Some(end))); + Command::Scan { begin, end } + }, + )(i) + }; + + let command = |i| { + alt(( + fill, + del, + get, + scan, + map(tag_no_case("dump"), |_| Command::Dump), + map(tag_no_case("flush"), |_| Command::Flush), + map(tag_no_case("full_compaction"), |_| Command::FullCompaction), + map(tag_no_case("quit"), |_| Command::Quit), + map(tag_no_case("close"), |_| Command::Close), + ))(i) + }; + + command(input) + .map(|(_, c)| c) + .map_err(|e| anyhow::anyhow!("{}", e)) + } +} + +struct Repl { + app_name: String, + description: String, + prompt: String, + + handler: ReplHandler, + + editor: DefaultEditor, +} + +impl Repl { + pub fn run(mut self) -> Result<()> { + self.bootstrap()?; + + loop { + let readline = self.editor.readline(&self.prompt)?; + if readline.trim().is_empty() { + // Skip noop + continue; + } + let command = Command::parse(&readline)?; + self.handler.handle(&command)?; + self.editor.add_history_entry(readline)?; + } + } + + fn bootstrap(&mut self) -> Result<()> { + println!("Welcome to {}!", self.app_name); + println!("{}", self.description); + println!(); + Ok(()) + } +} + +struct ReplBuilder { + app_name: String, + description: String, + prompt: String, +} + +impl ReplBuilder { + pub fn new() -> Self { + Self { + app_name: "mini-lsm-cli".to_string(), + description: "A CLI for mini-lsm".to_string(), + prompt: "mini-lsm-cli> ".to_string(), + } + } + + pub fn app_name(mut self, app_name: &str) -> Self { + self.app_name = app_name.to_string(); + self + } + + pub fn description(mut self, description: &str) -> Self { + self.description = description.to_string(); + self + } + + pub fn prompt(mut self, prompt: &str) -> Self { + self.prompt = prompt.to_string(); + self + } + + pub fn build(self, handler: ReplHandler) -> Result { + Ok(Repl { + app_name: self.app_name, + description: self.description, + prompt: self.prompt, + editor: DefaultEditor::new()?, + handler, + }) + } +} + fn main() -> Result<()> { let args = Args::parse(); let lsm = MiniLsm::open( @@ -69,97 +345,13 @@ fn main() -> Result<()> { serializable: args.serializable, }, )?; - let mut epoch = 0; - loop { - let mut line = String::new(); - std::io::stdin().read_line(&mut line)?; - let line = line.trim().to_string(); - if line.starts_with("fill ") { - let Some((_, options)) = line.split_once(' ') else { - println!("invalid command"); - continue; - }; - let Some((begin, end)) = options.split_once(' ') else { - println!("invalid command"); - continue; - }; - let begin = begin.parse::()?; - let end = end.parse::()?; - for i in begin..=end { - lsm.put( - format!("{}", i).as_bytes(), - format!("value{}@{}", i, epoch).as_bytes(), - )?; - } + let repl = ReplBuilder::new() + .app_name("mini-lsm-cli") + .description("A CLI for mini-lsm") + .prompt("mini-lsm-cli> ") + .build(ReplHandler { epoch: 0, lsm })?; - println!("{} values filled with epoch {}", end - begin + 1, epoch); - } else if line.starts_with("del ") { - let Some((_, key)) = line.split_once(' ') else { - println!("invalid command"); - continue; - }; - lsm.delete(key.as_bytes())?; - } else if line.starts_with("get ") { - let Some((_, key)) = line.split_once(' ') else { - println!("invalid command"); - continue; - }; - if let Some(value) = lsm.get(key.as_bytes())? { - println!("{}={:?}", key, value); - } else { - println!("{} not exist", key); - } - } else if line == "scan" { - let mut iter = lsm.scan(std::ops::Bound::Unbounded, std::ops::Bound::Unbounded)?; - let mut cnt = 0; - while iter.is_valid() { - println!( - "{:?}={:?}", - Bytes::copy_from_slice(iter.key()), - Bytes::copy_from_slice(iter.value()), - ); - iter.next()?; - cnt += 1; - } - println!("{} keys scanned", cnt); - } else if line.starts_with("scan ") { - let Some((_, rest)) = line.split_once(' ') else { - println!("invalid command"); - continue; - }; - let Some((begin_key, end_key)) = rest.split_once(' ') else { - println!("invalid command"); - continue; - }; - let mut iter = lsm.scan( - std::ops::Bound::Included(begin_key.as_bytes()), - std::ops::Bound::Included(end_key.as_bytes()), - )?; - let mut cnt = 0; - while iter.is_valid() { - println!( - "{:?}={:?}", - Bytes::copy_from_slice(iter.key()), - Bytes::copy_from_slice(iter.value()), - ); - iter.next()?; - cnt += 1; - } - println!("{} keys scanned", cnt); - } else if line == "dump" { - lsm.dump_structure(); - } else if line == "flush" { - lsm.force_flush()?; - } else if line == "full_compaction" { - lsm.force_full_compaction()?; - } else if line == "quit" || line == "close" { - lsm.close()?; - break; - } else { - println!("invalid command: {}", line); - } - epoch += 1; - } + repl.run()?; Ok(()) } diff --git a/mini-lsm/Cargo.toml b/mini-lsm/Cargo.toml index 0936160..000e05a 100644 --- a/mini-lsm/Cargo.toml +++ b/mini-lsm/Cargo.toml @@ -25,6 +25,8 @@ serde_json = { version = "1.0" } serde = { version = "1.0", features = ["derive"] } farmhash = "1" crc32fast = "1.3.2" +nom = "7.1.3" +rustyline = "13.0.0" [dev-dependencies] tempfile = "3"