Files
mini_lsm/mini-lsm/src/lsm_iterator.rs
Alex Chi Z a3a92359e1 add key abstraction and prepare for MVCC (#28)
* add key abstraction and prepare for MVCC

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* a little bit type exercise

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* refactor tests

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* fix clippy warnings

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* refactor starter code

Signed-off-by: Alex Chi <iskyzh@gmail.com>

* final touch docs

Signed-off-by: Alex Chi <iskyzh@gmail.com>

---------

Signed-off-by: Alex Chi <iskyzh@gmail.com>
2024-01-25 10:59:08 +08:00

140 lines
3.7 KiB
Rust

use std::ops::Bound;
use anyhow::{bail, Result};
use bytes::Bytes;
use crate::iterators::concat_iterator::SstConcatIterator;
use crate::iterators::merge_iterator::MergeIterator;
use crate::iterators::two_merge_iterator::TwoMergeIterator;
use crate::iterators::StorageIterator;
use crate::mem_table::MemTableIterator;
use crate::table::SsTableIterator;
/// Represents the internal type for an LSM iterator. This type will be changed across the tutorial for multiple times.
type LsmIteratorInner = TwoMergeIterator<
TwoMergeIterator<MergeIterator<MemTableIterator>, MergeIterator<SsTableIterator>>,
MergeIterator<SstConcatIterator>,
>;
pub struct LsmIterator {
inner: LsmIteratorInner,
end_bound: Bound<Bytes>,
is_valid: bool,
}
impl LsmIterator {
pub(crate) fn new(iter: LsmIteratorInner, end_bound: Bound<Bytes>) -> Result<Self> {
let mut iter = Self {
is_valid: iter.is_valid(),
inner: iter,
end_bound,
};
iter.move_to_non_delete()?;
Ok(iter)
}
fn next_inner(&mut self) -> Result<()> {
self.inner.next()?;
if !self.inner.is_valid() {
self.is_valid = false;
return Ok(());
}
match self.end_bound.as_ref() {
Bound::Unbounded => {}
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(),
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(),
}
Ok(())
}
fn move_to_non_delete(&mut self) -> Result<()> {
while self.is_valid() && self.inner.value().is_empty() {
self.next_inner()?;
}
Ok(())
}
}
impl StorageIterator for LsmIterator {
type KeyType<'a> = &'a [u8];
fn is_valid(&self) -> bool {
self.is_valid
}
fn key(&self) -> &[u8] {
self.inner.key().raw_ref()
}
fn value(&self) -> &[u8] {
self.inner.value()
}
fn next(&mut self) -> Result<()> {
self.next_inner()?;
self.move_to_non_delete()?;
Ok(())
}
fn num_active_iterators(&self) -> usize {
self.inner.num_active_iterators()
}
}
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is
/// invalid. If an iterator is already invalid, `next` does not do anything. If `next` returns an error,
/// `is_valid` should return false, and `next` should always return an error.
pub struct FusedIterator<I: StorageIterator> {
iter: I,
has_errored: bool,
}
impl<I: StorageIterator> FusedIterator<I> {
pub fn new(iter: I) -> Self {
Self {
iter,
has_errored: false,
}
}
}
impl<I: StorageIterator> StorageIterator for FusedIterator<I> {
type KeyType<'a> = I::KeyType<'a> where Self: 'a;
fn is_valid(&self) -> bool {
!self.has_errored && self.iter.is_valid()
}
fn key(&self) -> Self::KeyType<'_> {
if self.has_errored || !self.iter.is_valid() {
panic!("invalid access to the underlying iterator");
}
self.iter.key()
}
fn value(&self) -> &[u8] {
if self.has_errored || !self.iter.is_valid() {
panic!("invalid access to the underlying iterator");
}
self.iter.value()
}
fn next(&mut self) -> Result<()> {
// only move when the iterator is valid and not errored
if self.has_errored {
bail!("the iterator is tainted");
}
if self.iter.is_valid() {
if let Err(e) = self.iter.next() {
self.has_errored = true;
return Err(e);
}
}
Ok(())
}
fn num_active_iterators(&self) -> usize {
self.iter.num_active_iterators()
}
}