| 
									
										
										
										
											2025-01-19 19:24:12 -05:00
										 |  |  | // Copyright (c) 2022-2025 Alex Chi Z
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //     http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | use std::ops::Bound;
 | 
					
						
							|  |  |  | use std::path::Path;
 | 
					
						
							|  |  |  | use std::sync::Arc;
 | 
					
						
							| 
									
										
										
										
											2025-03-09 16:11:52 -04:00
										 |  |  | use std::sync::atomic::AtomicUsize;
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | use anyhow::Result;
 | 
					
						
							|  |  |  | use bytes::Bytes;
 | 
					
						
							|  |  |  | use crossbeam_skiplist::SkipMap;
 | 
					
						
							| 
									
										
										
										
											2025-03-09 16:11:52 -04:00
										 |  |  | use crossbeam_skiplist::map::Entry;
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | use ouroboros::self_referencing;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use crate::iterators::StorageIterator;
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  | use crate::key::{KeyBytes, KeySlice, TS_DEFAULT};
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | use crate::table::SsTableBuilder;
 | 
					
						
							|  |  |  | use crate::wal::Wal;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// A basic mem-table based on crossbeam-skiplist.
 | 
					
						
							|  |  |  | ///
 | 
					
						
							|  |  |  | /// An initial implementation of memtable is part of week 1, day 1. It will be incrementally implemented in other
 | 
					
						
							|  |  |  | /// chapters of week 1 and week 2.
 | 
					
						
							|  |  |  | pub struct MemTable {
 | 
					
						
							| 
									
										
										
										
											2024-01-29 20:46:12 +08:00
										 |  |  |     pub(crate) map: Arc<SkipMap<KeyBytes, Bytes>>,
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |     wal: Option<Wal>,
 | 
					
						
							|  |  |  |     id: usize,
 | 
					
						
							|  |  |  |     approximate_size: Arc<AtomicUsize>,
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// Create a bound of `Bytes` from a bound of `&[u8]`.
 | 
					
						
							|  |  |  | pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound<Bytes> {
 | 
					
						
							|  |  |  |     match bound {
 | 
					
						
							|  |  |  |         Bound::Included(x) => Bound::Included(Bytes::copy_from_slice(x)),
 | 
					
						
							|  |  |  |         Bound::Excluded(x) => Bound::Excluded(Bytes::copy_from_slice(x)),
 | 
					
						
							|  |  |  |         Bound::Unbounded => Bound::Unbounded,
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  | /// Create a bound of `Bytes` from a bound of `KeySlice`.
 | 
					
						
							|  |  |  | pub(crate) fn map_key_bound(bound: Bound<KeySlice>) -> Bound<KeyBytes> {
 | 
					
						
							|  |  |  |     match bound {
 | 
					
						
							|  |  |  |         Bound::Included(x) => Bound::Included(KeyBytes::from_bytes_with_ts(
 | 
					
						
							|  |  |  |             Bytes::copy_from_slice(x.key_ref()),
 | 
					
						
							|  |  |  |             x.ts(),
 | 
					
						
							|  |  |  |         )),
 | 
					
						
							|  |  |  |         Bound::Excluded(x) => Bound::Excluded(KeyBytes::from_bytes_with_ts(
 | 
					
						
							|  |  |  |             Bytes::copy_from_slice(x.key_ref()),
 | 
					
						
							|  |  |  |             x.ts(),
 | 
					
						
							|  |  |  |         )),
 | 
					
						
							|  |  |  |         Bound::Unbounded => Bound::Unbounded,
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// Create a bound of `Bytes` from a bound of `KeySlice`.
 | 
					
						
							|  |  |  | pub(crate) fn map_key_bound_plus_ts(bound: Bound<&[u8]>, ts: u64) -> Bound<KeySlice> {
 | 
					
						
							|  |  |  |     match bound {
 | 
					
						
							|  |  |  |         Bound::Included(x) => Bound::Included(KeySlice::from_slice(x, ts)),
 | 
					
						
							|  |  |  |         Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice(x, ts)),
 | 
					
						
							|  |  |  |         Bound::Unbounded => Bound::Unbounded,
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | impl MemTable {
 | 
					
						
							|  |  |  |     /// Create a new mem-table.
 | 
					
						
							|  |  |  |     pub fn create(id: usize) -> Self {
 | 
					
						
							|  |  |  |         Self {
 | 
					
						
							|  |  |  |             id,
 | 
					
						
							|  |  |  |             map: Arc::new(SkipMap::new()),
 | 
					
						
							|  |  |  |             wal: None,
 | 
					
						
							|  |  |  |             approximate_size: Arc::new(AtomicUsize::new(0)),
 | 
					
						
							|  |  |  |         }
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Create a new mem-table with WAL
 | 
					
						
							|  |  |  |     pub fn create_with_wal(id: usize, path: impl AsRef<Path>) -> Result<Self> {
 | 
					
						
							|  |  |  |         Ok(Self {
 | 
					
						
							|  |  |  |             id,
 | 
					
						
							|  |  |  |             map: Arc::new(SkipMap::new()),
 | 
					
						
							|  |  |  |             wal: Some(Wal::create(path.as_ref())?),
 | 
					
						
							|  |  |  |             approximate_size: Arc::new(AtomicUsize::new(0)),
 | 
					
						
							|  |  |  |         })
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Create a memtable from WAL
 | 
					
						
							|  |  |  |     pub fn recover_from_wal(id: usize, path: impl AsRef<Path>) -> Result<Self> {
 | 
					
						
							|  |  |  |         let map = Arc::new(SkipMap::new());
 | 
					
						
							|  |  |  |         Ok(Self {
 | 
					
						
							|  |  |  |             id,
 | 
					
						
							|  |  |  |             wal: Some(Wal::recover(path.as_ref(), &map)?),
 | 
					
						
							|  |  |  |             map,
 | 
					
						
							|  |  |  |             approximate_size: Arc::new(AtomicUsize::new(0)),
 | 
					
						
							|  |  |  |         })
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     /// Get a value by key. Should not be used in week 3.
 | 
					
						
							|  |  |  |     pub fn get(&self, key: KeySlice) -> Option<Bytes> {
 | 
					
						
							|  |  |  |         let key_bytes = KeyBytes::from_bytes_with_ts(
 | 
					
						
							| 
									
										
										
										
											2024-06-24 00:45:56 +08:00
										 |  |  |             Bytes::from_static(unsafe { std::mem::transmute::<&[u8], &[u8]>(key.key_ref()) }),
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |             key.ts(),
 | 
					
						
							|  |  |  |         );
 | 
					
						
							|  |  |  |         self.map.get(&key_bytes).map(|e| e.value().clone())
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn for_testing_put_slice(&self, key: &[u8], value: &[u8]) -> Result<()> {
 | 
					
						
							|  |  |  |         self.put(KeySlice::from_slice(key, TS_DEFAULT), value)
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn for_testing_get_slice(&self, key: &[u8]) -> Option<Bytes> {
 | 
					
						
							|  |  |  |         self.get(KeySlice::from_slice(key, TS_DEFAULT))
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn for_testing_scan_slice(
 | 
					
						
							|  |  |  |         &self,
 | 
					
						
							|  |  |  |         lower: Bound<&[u8]>,
 | 
					
						
							|  |  |  |         upper: Bound<&[u8]>,
 | 
					
						
							|  |  |  |     ) -> MemTableIterator {
 | 
					
						
							|  |  |  |         self.scan(
 | 
					
						
							|  |  |  |             map_key_bound_plus_ts(lower, TS_DEFAULT),
 | 
					
						
							|  |  |  |             map_key_bound_plus_ts(upper, TS_DEFAULT),
 | 
					
						
							|  |  |  |         )
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Put a key-value pair into the mem-table.
 | 
					
						
							|  |  |  |     ///
 | 
					
						
							|  |  |  |     /// In week 1, day 1, simply put the key-value pair into the skipmap.
 | 
					
						
							|  |  |  |     /// In week 2, day 6, also flush the data to WAL.
 | 
					
						
							| 
									
										
										
										
											2024-07-02 20:23:33 -04:00
										 |  |  |     /// In week 3, day 5, modify the function to use the batch API.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     pub fn put(&self, key: KeySlice, value: &[u8]) -> Result<()> {
 | 
					
						
							| 
									
										
										
										
											2024-07-02 20:23:33 -04:00
										 |  |  |         self.put_batch(&[(key, value)])
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Implement this in week 3, day 5.
 | 
					
						
							|  |  |  |     pub fn put_batch(&self, data: &[(KeySlice, &[u8])]) -> Result<()> {
 | 
					
						
							|  |  |  |         let mut estimated_size = 0;
 | 
					
						
							|  |  |  |         for (key, value) in data {
 | 
					
						
							|  |  |  |             estimated_size += key.raw_len() + value.len();
 | 
					
						
							|  |  |  |             self.map.insert(
 | 
					
						
							|  |  |  |                 key.to_key_vec().into_key_bytes(),
 | 
					
						
							|  |  |  |                 Bytes::copy_from_slice(value),
 | 
					
						
							|  |  |  |             );
 | 
					
						
							|  |  |  |         }
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         self.approximate_size
 | 
					
						
							|  |  |  |             .fetch_add(estimated_size, std::sync::atomic::Ordering::Relaxed);
 | 
					
						
							|  |  |  |         if let Some(ref wal) = self.wal {
 | 
					
						
							| 
									
										
										
										
											2024-07-02 20:23:33 -04:00
										 |  |  |             wal.put_batch(data)?;
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         }
 | 
					
						
							|  |  |  |         Ok(())
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn sync_wal(&self) -> Result<()> {
 | 
					
						
							|  |  |  |         if let Some(ref wal) = self.wal {
 | 
					
						
							|  |  |  |             wal.sync()?;
 | 
					
						
							|  |  |  |         }
 | 
					
						
							|  |  |  |         Ok(())
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Get an iterator over a range of keys.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     pub fn scan(&self, lower: Bound<KeySlice>, upper: Bound<KeySlice>) -> MemTableIterator {
 | 
					
						
							|  |  |  |         let (lower, upper) = (map_key_bound(lower), map_key_bound(upper));
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         let mut iter = MemTableIteratorBuilder {
 | 
					
						
							|  |  |  |             map: self.map.clone(),
 | 
					
						
							|  |  |  |             iter_builder: |map| map.range((lower, upper)),
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |             item: (KeyBytes::new(), Bytes::new()),
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         }
 | 
					
						
							|  |  |  |         .build();
 | 
					
						
							| 
									
										
										
										
											2024-02-11 23:41:07 +08:00
										 |  |  |         iter.next().unwrap();
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         iter
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Flush the mem-table to SSTable. Implement in week 1 day 6.
 | 
					
						
							|  |  |  |     pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> {
 | 
					
						
							|  |  |  |         for entry in self.map.iter() {
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |             builder.add(entry.key().as_key_slice(), &entry.value()[..]);
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         }
 | 
					
						
							|  |  |  |         Ok(())
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn id(&self) -> usize {
 | 
					
						
							|  |  |  |         self.id
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pub fn approximate_size(&self) -> usize {
 | 
					
						
							|  |  |  |         self.approximate_size
 | 
					
						
							|  |  |  |             .load(std::sync::atomic::Ordering::Relaxed)
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Only use this function when closing the database
 | 
					
						
							|  |  |  |     pub fn is_empty(&self) -> bool {
 | 
					
						
							|  |  |  |         self.map.is_empty()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  | type SkipMapRangeIter<'a> = crossbeam_skiplist::map::Range<
 | 
					
						
							|  |  |  |     'a,
 | 
					
						
							|  |  |  |     KeyBytes,
 | 
					
						
							|  |  |  |     (Bound<KeyBytes>, Bound<KeyBytes>),
 | 
					
						
							|  |  |  |     KeyBytes,
 | 
					
						
							|  |  |  |     Bytes,
 | 
					
						
							|  |  |  | >;
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | /// An iterator over a range of `SkipMap`. This is a self-referential structure and please refer to week 1, day 2
 | 
					
						
							|  |  |  | /// chapter for more information.
 | 
					
						
							|  |  |  | ///
 | 
					
						
							|  |  |  | /// This is part of week 1, day 2.
 | 
					
						
							|  |  |  | #[self_referencing]
 | 
					
						
							|  |  |  | pub struct MemTableIterator {
 | 
					
						
							|  |  |  |     /// Stores a reference to the skipmap.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     map: Arc<SkipMap<KeyBytes, Bytes>>,
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |     /// Stores a skipmap iterator that refers to the lifetime of `MemTableIterator` itself.
 | 
					
						
							|  |  |  |     #[borrows(map)]
 | 
					
						
							|  |  |  |     #[not_covariant]
 | 
					
						
							|  |  |  |     iter: SkipMapRangeIter<'this>,
 | 
					
						
							|  |  |  |     /// Stores the current key-value pair.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     item: (KeyBytes, Bytes),
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | impl MemTableIterator {
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |     fn entry_to_item(entry: Option<Entry<'_, KeyBytes, Bytes>>) -> (KeyBytes, Bytes) {
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |         entry
 | 
					
						
							|  |  |  |             .map(|x| (x.key().clone(), x.value().clone()))
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |             .unwrap_or_else(|| (KeyBytes::new(), Bytes::new()))
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |     }
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | impl StorageIterator for MemTableIterator {
 | 
					
						
							|  |  |  |     type KeyType<'a> = KeySlice<'a>;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fn value(&self) -> &[u8] {
 | 
					
						
							|  |  |  |         &self.borrow_item().1[..]
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fn key(&self) -> KeySlice {
 | 
					
						
							| 
									
										
										
										
											2024-01-25 23:09:16 +08:00
										 |  |  |         self.borrow_item().0.as_key_slice()
 | 
					
						
							| 
									
										
										
										
											2024-01-25 12:07:53 +08:00
										 |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fn is_valid(&self) -> bool {
 | 
					
						
							|  |  |  |         !self.borrow_item().0.is_empty()
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fn next(&mut self) -> Result<()> {
 | 
					
						
							|  |  |  |         let entry = self.with_iter_mut(|iter| MemTableIterator::entry_to_item(iter.next()));
 | 
					
						
							|  |  |  |         self.with_mut(|x| *x.item = entry);
 | 
					
						
							|  |  |  |         Ok(())
 | 
					
						
							|  |  |  |     }
 | 
					
						
							|  |  |  | }
 |