Files
mini_lsm/mini-lsm/src/lsm_iterator.rs
Alex Chi Z. 7f4b204064 relicense mini-lsm-book to CC BY-NC-SA 4.0 (#118)
* relicense mini-lsm-book to CC BY-NC-SA 4.0

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* clearify license

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* fix fmt

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

* fix fmt

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>

---------

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
2025-01-19 19:24:12 -05:00

157 lines
4.3 KiB
Rust

// Copyright (c) 2022-2025 Alex Chi Z
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::Bound;
use anyhow::{bail, Result};
use bytes::Bytes;
use crate::iterators::concat_iterator::SstConcatIterator;
use crate::iterators::merge_iterator::MergeIterator;
use crate::iterators::two_merge_iterator::TwoMergeIterator;
use crate::iterators::StorageIterator;
use crate::mem_table::MemTableIterator;
use crate::table::SsTableIterator;
/// Represents the internal type for an LSM iterator. This type will be changed across the tutorial for multiple times.
type LsmIteratorInner = TwoMergeIterator<
TwoMergeIterator<MergeIterator<MemTableIterator>, MergeIterator<SsTableIterator>>,
MergeIterator<SstConcatIterator>,
>;
pub struct LsmIterator {
inner: LsmIteratorInner,
end_bound: Bound<Bytes>,
is_valid: bool,
}
impl LsmIterator {
pub(crate) fn new(iter: LsmIteratorInner, end_bound: Bound<Bytes>) -> Result<Self> {
let mut iter = Self {
is_valid: iter.is_valid(),
inner: iter,
end_bound,
};
iter.move_to_non_delete()?;
Ok(iter)
}
fn next_inner(&mut self) -> Result<()> {
self.inner.next()?;
if !self.inner.is_valid() {
self.is_valid = false;
return Ok(());
}
match self.end_bound.as_ref() {
Bound::Unbounded => {}
Bound::Included(key) => self.is_valid = self.inner.key().raw_ref() <= key.as_ref(),
Bound::Excluded(key) => self.is_valid = self.inner.key().raw_ref() < key.as_ref(),
}
Ok(())
}
fn move_to_non_delete(&mut self) -> Result<()> {
while self.is_valid() && self.inner.value().is_empty() {
self.next_inner()?;
}
Ok(())
}
}
impl StorageIterator for LsmIterator {
type KeyType<'a> = &'a [u8];
fn is_valid(&self) -> bool {
self.is_valid
}
fn key(&self) -> &[u8] {
self.inner.key().raw_ref()
}
fn value(&self) -> &[u8] {
self.inner.value()
}
fn next(&mut self) -> Result<()> {
self.next_inner()?;
self.move_to_non_delete()?;
Ok(())
}
fn num_active_iterators(&self) -> usize {
self.inner.num_active_iterators()
}
}
/// A wrapper around existing iterator, will prevent users from calling `next` when the iterator is
/// invalid. If an iterator is already invalid, `next` does not do anything. If `next` returns an error,
/// `is_valid` should return false, and `next` should always return an error.
pub struct FusedIterator<I: StorageIterator> {
iter: I,
has_errored: bool,
}
impl<I: StorageIterator> FusedIterator<I> {
pub fn new(iter: I) -> Self {
Self {
iter,
has_errored: false,
}
}
}
impl<I: StorageIterator> StorageIterator for FusedIterator<I> {
type KeyType<'a>
= I::KeyType<'a>
where
Self: 'a;
fn is_valid(&self) -> bool {
!self.has_errored && self.iter.is_valid()
}
fn key(&self) -> Self::KeyType<'_> {
if !self.is_valid() {
panic!("invalid access to the underlying iterator");
}
self.iter.key()
}
fn value(&self) -> &[u8] {
if !self.is_valid() {
panic!("invalid access to the underlying iterator");
}
self.iter.value()
}
fn next(&mut self) -> Result<()> {
// only move when the iterator is valid and not errored
if self.has_errored {
bail!("the iterator is tainted");
}
if self.iter.is_valid() {
if let Err(e) = self.iter.next() {
self.has_errored = true;
return Err(e);
}
}
Ok(())
}
fn num_active_iterators(&self) -> usize {
self.iter.num_active_iterators()
}
}