215 lines
6.4 KiB
Rust
215 lines
6.4 KiB
Rust
mod builder;
|
|
mod iterator;
|
|
|
|
use std::fs::File;
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
|
|
use anyhow::{anyhow, Result};
|
|
pub use builder::SsTableBuilder;
|
|
use bytes::{Buf, BufMut, Bytes};
|
|
pub use iterator::SsTableIterator;
|
|
|
|
use crate::block::Block;
|
|
use crate::lsm_storage::BlockCache;
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
pub struct BlockMeta {
|
|
/// Offset of this data block.
|
|
pub offset: usize,
|
|
/// The first key of the data block.
|
|
pub first_key: Bytes,
|
|
/// The last key of the data block.
|
|
pub last_key: Bytes,
|
|
}
|
|
|
|
impl BlockMeta {
|
|
/// Encode block meta to a buffer.
|
|
pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec<u8>) {
|
|
let mut estimated_size = 0;
|
|
for meta in block_meta {
|
|
// The size of offset
|
|
estimated_size += std::mem::size_of::<u32>();
|
|
// The size of key length
|
|
estimated_size += std::mem::size_of::<u16>();
|
|
// The size of actual key
|
|
estimated_size += meta.first_key.len();
|
|
// The size of key length
|
|
estimated_size += std::mem::size_of::<u16>();
|
|
// The size of actual key
|
|
estimated_size += meta.last_key.len();
|
|
}
|
|
// Reserve the space to improve performance, especially when the size of incoming data is
|
|
// large
|
|
buf.reserve(estimated_size);
|
|
let original_len = buf.len();
|
|
for meta in block_meta {
|
|
buf.put_u32(meta.offset as u32);
|
|
buf.put_u16(meta.first_key.len() as u16);
|
|
buf.put_slice(&meta.first_key);
|
|
buf.put_u16(meta.last_key.len() as u16);
|
|
buf.put_slice(&meta.last_key);
|
|
}
|
|
assert_eq!(estimated_size, buf.len() - original_len);
|
|
}
|
|
|
|
/// Decode block meta from a buffer.
|
|
pub fn decode_block_meta(mut buf: impl Buf) -> Vec<BlockMeta> {
|
|
let mut block_meta = Vec::new();
|
|
while buf.has_remaining() {
|
|
let offset = buf.get_u32() as usize;
|
|
let first_key_len = buf.get_u16() as usize;
|
|
let first_key = buf.copy_to_bytes(first_key_len);
|
|
let last_key_len = buf.get_u16() as usize;
|
|
let last_key = buf.copy_to_bytes(last_key_len);
|
|
block_meta.push(BlockMeta {
|
|
offset,
|
|
first_key,
|
|
last_key,
|
|
});
|
|
}
|
|
block_meta
|
|
}
|
|
}
|
|
|
|
/// A file object.
|
|
///
|
|
/// Before day 4, it should look like:
|
|
pub struct FileObject(Option<File>, u64);
|
|
|
|
impl FileObject {
|
|
pub fn read(&self, offset: u64, len: u64) -> Result<Vec<u8>> {
|
|
use std::os::unix::fs::FileExt;
|
|
let mut data = vec![0; len as usize];
|
|
self.0
|
|
.as_ref()
|
|
.unwrap()
|
|
.read_exact_at(&mut data[..], offset)?;
|
|
Ok(data)
|
|
}
|
|
|
|
pub fn size(&self) -> u64 {
|
|
self.1
|
|
}
|
|
|
|
/// Create a new file object (day 2) and write the file to the disk (day 4).
|
|
pub fn create(path: &Path, data: Vec<u8>) -> Result<Self> {
|
|
std::fs::write(path, &data)?;
|
|
File::open(path)?.sync_all()?;
|
|
Ok(FileObject(
|
|
Some(File::options().read(true).write(false).open(path)?),
|
|
data.len() as u64,
|
|
))
|
|
}
|
|
|
|
pub fn open(path: &Path) -> Result<Self> {
|
|
let file = File::options().read(true).write(false).open(path)?;
|
|
let size = file.metadata()?.len();
|
|
Ok(FileObject(Some(file), size))
|
|
}
|
|
}
|
|
|
|
pub struct SsTable {
|
|
file: FileObject,
|
|
block_metas: Vec<BlockMeta>,
|
|
block_meta_offset: usize,
|
|
id: usize,
|
|
block_cache: Option<Arc<BlockCache>>,
|
|
first_key: Bytes,
|
|
last_key: Bytes,
|
|
}
|
|
|
|
impl SsTable {
|
|
#[cfg(test)]
|
|
pub(crate) fn open_for_test(file: FileObject) -> Result<Self> {
|
|
Self::open(0, None, file)
|
|
}
|
|
|
|
/// Open SSTable from a file.
|
|
pub fn open(id: usize, block_cache: Option<Arc<BlockCache>>, file: FileObject) -> Result<Self> {
|
|
let len = file.size();
|
|
let raw_meta_offset = file.read(len - 4, 4)?;
|
|
let block_meta_offset = (&raw_meta_offset[..]).get_u32() as u64;
|
|
let raw_meta = file.read(block_meta_offset, len - 4 - block_meta_offset)?;
|
|
let block_metas = BlockMeta::decode_block_meta(&raw_meta[..]);
|
|
Ok(Self {
|
|
file,
|
|
first_key: block_metas.first().unwrap().first_key.clone(),
|
|
last_key: block_metas.last().unwrap().last_key.clone(),
|
|
block_metas,
|
|
block_meta_offset: block_meta_offset as usize,
|
|
id,
|
|
block_cache,
|
|
})
|
|
}
|
|
|
|
/// Create a mock SST with only first key + last key metadata
|
|
pub fn create_meta_only(id: usize, file_size: u64, first_key: Bytes, last_key: Bytes) -> Self {
|
|
Self {
|
|
file: FileObject(None, file_size),
|
|
block_metas: vec![],
|
|
block_meta_offset: 0,
|
|
id,
|
|
block_cache: None,
|
|
first_key,
|
|
last_key,
|
|
}
|
|
}
|
|
|
|
/// Read a block from the disk.
|
|
pub fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
|
|
let offset = self.block_metas[block_idx].offset;
|
|
let offset_end = self
|
|
.block_metas
|
|
.get(block_idx + 1)
|
|
.map_or(self.block_meta_offset, |x| x.offset);
|
|
let block_data = self
|
|
.file
|
|
.read(offset as u64, (offset_end - offset) as u64)?;
|
|
Ok(Arc::new(Block::decode(&block_data[..])))
|
|
}
|
|
|
|
/// Read a block from disk, with block cache.
|
|
pub fn read_block_cached(&self, block_idx: usize) -> Result<Arc<Block>> {
|
|
if let Some(ref block_cache) = self.block_cache {
|
|
let blk = block_cache
|
|
.try_get_with((self.id, block_idx), || self.read_block(block_idx))
|
|
.map_err(|e| anyhow!("{}", e))?;
|
|
Ok(blk)
|
|
} else {
|
|
self.read_block(block_idx)
|
|
}
|
|
}
|
|
|
|
/// Find the block that may contain `key`.
|
|
pub fn find_block_idx(&self, key: &[u8]) -> usize {
|
|
self.block_metas
|
|
.partition_point(|meta| meta.first_key <= key)
|
|
.saturating_sub(1)
|
|
}
|
|
|
|
/// Get number of data blocks.
|
|
pub fn num_of_blocks(&self) -> usize {
|
|
self.block_metas.len()
|
|
}
|
|
|
|
pub fn first_key(&self) -> &Bytes {
|
|
&self.first_key
|
|
}
|
|
|
|
pub fn last_key(&self) -> &Bytes {
|
|
&self.last_key
|
|
}
|
|
|
|
pub fn table_size(&self) -> u64 {
|
|
self.file.1
|
|
}
|
|
|
|
pub fn sst_id(&self) -> usize {
|
|
self.id
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|