| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | #![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
 | 
					
						
							|  |  |  | #![allow(dead_code)] // TODO(you): remove this lint after implementing this mod
 | 
					
						
							| 
									
										
										
										
											2022-12-23 21:14:11 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | mod builder;
 | 
					
						
							|  |  |  | mod iterator;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  | use std::fs::File;
 | 
					
						
							| 
									
										
										
										
											2022-12-24 10:11:06 -05:00
										 |  |  | use std::path::Path;
 | 
					
						
							|  |  |  | use std::sync::Arc;
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-24 10:11:06 -05:00
										 |  |  | use anyhow::Result;
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | pub use builder::SsTableBuilder;
 | 
					
						
							|  |  |  | use bytes::{Buf, Bytes};
 | 
					
						
							|  |  |  | pub use iterator::SsTableIterator;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use crate::block::Block;
 | 
					
						
							| 
									
										
										
										
											2022-12-24 18:07:18 -05:00
										 |  |  | use crate::lsm_storage::BlockCache;
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | #[derive(Clone, Debug, PartialEq, Eq)]
 | 
					
						
							|  |  |  | pub struct BlockMeta {
 | 
					
						
							|  |  |  |     /// Offset of this data block.
 | 
					
						
							|  |  |  |     pub offset: usize,
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:16:55 +08:00
										 |  |  |     /// The first key of the data block.
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     pub first_key: Bytes,
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:16:55 +08:00
										 |  |  |     /// The last key of the data block.
 | 
					
						
							|  |  |  |     pub last_key: Bytes,
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | impl BlockMeta {
 | 
					
						
							|  |  |  |     /// Encode block meta to a buffer.
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  |     /// You may add extra fields to the buffer,
 | 
					
						
							|  |  |  |     /// in order to help keep track of `first_key` when decoding from the same buffer in the future.
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     pub fn encode_block_meta(
 | 
					
						
							|  |  |  |         block_meta: &[BlockMeta],
 | 
					
						
							| 
									
										
										
										
											2022-12-24 10:11:06 -05:00
										 |  |  |         #[allow(clippy::ptr_arg)] // remove this allow after you finish
 | 
					
						
							|  |  |  |         buf: &mut Vec<u8>,
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     ) {
 | 
					
						
							|  |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Decode block meta from a buffer.
 | 
					
						
							|  |  |  |     pub fn decode_block_meta(buf: impl Buf) -> Vec<BlockMeta> {
 | 
					
						
							|  |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// A file object.
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  | pub struct FileObject(Option<File>, u64);
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | impl FileObject {
 | 
					
						
							|  |  |  |     pub fn read(&self, offset: u64, len: u64) -> Result<Vec<u8>> {
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |         use std::os::unix::fs::FileExt;
 | 
					
						
							|  |  |  |         let mut data = vec![0; len as usize];
 | 
					
						
							|  |  |  |         self.0
 | 
					
						
							|  |  |  |             .as_ref()
 | 
					
						
							|  |  |  |             .unwrap()
 | 
					
						
							|  |  |  |             .read_exact_at(&mut data[..], offset)?;
 | 
					
						
							|  |  |  |         Ok(data)
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn size(&self) -> u64 {
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |         self.1
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-24 18:07:18 -05:00
										 |  |  |     /// Create a new file object (day 2) and write the file to the disk (day 4).
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     pub fn create(path: &Path, data: Vec<u8>) -> Result<Self> {
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |         std::fs::write(path, &data)?;
 | 
					
						
							|  |  |  |         File::open(path)?.sync_all()?;
 | 
					
						
							|  |  |  |         Ok(FileObject(
 | 
					
						
							|  |  |  |             Some(File::options().read(true).write(false).open(path)?),
 | 
					
						
							|  |  |  |             data.len() as u64,
 | 
					
						
							|  |  |  |         ))
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn open(path: &Path) -> Result<Self> {
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |         let file = File::options().read(true).write(false).open(path)?;
 | 
					
						
							|  |  |  |         let size = file.metadata()?.len();
 | 
					
						
							|  |  |  |         Ok(FileObject(Some(file), size))
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  | /// -------------------------------------------------------------------------------------------------------
 | 
					
						
							|  |  |  | /// |              Data Block             |             Meta Block              |          Extra          |
 | 
					
						
							|  |  |  | /// -------------------------------------------------------------------------------------------------------
 | 
					
						
							|  |  |  | /// | Data Block #1 | ... | Data Block #N | Meta Block #1 | ... | Meta Block #N | Meta Block Offset (u32) |
 | 
					
						
							|  |  |  | /// -------------------------------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2022-12-24 00:26:11 -05:00
										 |  |  | pub struct SsTable {
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  |     /// The actual storage unit of SsTable, the format is as above.
 | 
					
						
							| 
									
										
										
										
											2022-12-24 00:26:11 -05:00
										 |  |  |     file: FileObject,
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  |     /// The meta blocks that hold info for data blocks.
 | 
					
						
							| 
									
										
										
										
											2022-12-24 00:26:11 -05:00
										 |  |  |     block_metas: Vec<BlockMeta>,
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  |     /// The offset that indicates the start point of meta blocks in `file`.
 | 
					
						
							| 
									
										
										
										
											2022-12-24 00:26:11 -05:00
										 |  |  |     block_meta_offset: usize,
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |     id: usize,
 | 
					
						
							|  |  |  |     block_cache: Option<Arc<BlockCache>>,
 | 
					
						
							|  |  |  |     first_key: Bytes,
 | 
					
						
							|  |  |  |     last_key: Bytes,
 | 
					
						
							| 
									
										
										
										
											2022-12-24 00:26:11 -05:00
										 |  |  | }
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | impl SsTable {
 | 
					
						
							| 
									
										
										
										
											2022-12-24 18:07:18 -05:00
										 |  |  |     #[cfg(test)]
 | 
					
						
							|  |  |  |     pub(crate) fn open_for_test(file: FileObject) -> Result<Self> {
 | 
					
						
							|  |  |  |         Self::open(0, None, file)
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     /// Open SSTable from a file.
 | 
					
						
							| 
									
										
										
										
											2022-12-24 18:07:18 -05:00
										 |  |  |     pub fn open(id: usize, block_cache: Option<Arc<BlockCache>>, file: FileObject) -> Result<Self> {
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:11:09 +08:00
										 |  |  |     /// Create a mock SST with only first key + last key metadata
 | 
					
						
							|  |  |  |     pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self {
 | 
					
						
							|  |  |  |         Self {
 | 
					
						
							|  |  |  |             file: FileObject(None, file_size),
 | 
					
						
							|  |  |  |             block_metas: vec![],
 | 
					
						
							|  |  |  |             block_meta_offset: 0,
 | 
					
						
							|  |  |  |             id,
 | 
					
						
							|  |  |  |             block_cache: None,
 | 
					
						
							|  |  |  |             first_key,
 | 
					
						
							|  |  |  |             last_key,
 | 
					
						
							|  |  |  |         }
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     /// Read a block from the disk.
 | 
					
						
							|  |  |  |     pub fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
 | 
					
						
							|  |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-24 18:07:18 -05:00
										 |  |  |     /// Read a block from disk, with block cache. (Day 4)
 | 
					
						
							|  |  |  |     pub fn read_block_cached(&self, block_idx: usize) -> Result<Arc<Block>> {
 | 
					
						
							|  |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     /// Find the block that may contain `key`.
 | 
					
						
							| 
									
										
										
										
											2023-07-11 12:05:40 +08:00
										 |  |  |     /// Note: You may want to make use of the `first_key` stored in `BlockMeta`.
 | 
					
						
							|  |  |  |     /// You may also assume the key-value pairs stored in each consecutive block are sorted.
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     pub fn find_block_idx(&self, key: &[u8]) -> usize {
 | 
					
						
							|  |  |  |         unimplemented!()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Get number of data blocks.
 | 
					
						
							|  |  |  |     pub fn num_of_blocks(&self) -> usize {
 | 
					
						
							| 
									
										
										
										
											2024-01-20 11:16:55 +08:00
										 |  |  |         self.block_metas.len()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn first_key(&self) -> &Bytes {
 | 
					
						
							|  |  |  |         &self.first_key
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn last_key(&self) -> &Bytes {
 | 
					
						
							|  |  |  |         &self.last_key
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn table_size(&self) -> u64 {
 | 
					
						
							|  |  |  |         self.file.1
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn sst_id(&self) -> usize {
 | 
					
						
							|  |  |  |         self.id
 | 
					
						
							| 
									
										
										
										
											2022-12-23 22:32:30 -05:00
										 |  |  |     }
 | 
					
						
							|  |  |  | }
 |