#mmap #hash-map #disk #database

opendiskmap

Single-threaded disk-backed hash map with memory-mapped file backing

1 unstable release

Uses new Rust 2024

0.1.0 Aug 2, 2025

#66 in Database implementations

Apache-2.0

110KB
2.5K SLoC

MapX2 πŸ—ΊοΈ

CI Security Audit CodeQL Crates.io Documentation License Rust Version

High-performance, persistent hash maps for Rust with zero-copy deserialization support and memory-mapped file backing.

There are no durability guarantees, but the data is stored in a memory-mapped file that persists across program runs. The OS will flush the data when it pleases!

✨ Features

  • πŸš€ High Performance: Open addressing with linear probing for cache-friendly access
  • πŸ’Ύ Persistent Storage: Memory-mapped files for durability and fast startup
  • πŸ”„ Zero-Copy: 8-byte aligned storage compatible with rkyv and other zero-copy frameworks
  • 🎯 Type Safety: Strong typing with trait-based encoding/decoding
  • 🧡 Memory Safe: Written in Rust with minimal unsafe code
  • πŸ“ Configurable: Custom capacity allocation with power-of-2 sizing
  • πŸ”§ Flexible: Support for any serializable key-value types

πŸ“¦ Crates

Crate Description Status
opendiskmap Single-threaded persistent hash map βœ… Stable
concurrent_diskmap Multi-threaded hash map with sharded locking 🚧 In Development

πŸš€ Quick Start

Add to your Cargo.toml:

[dependencies]
opendiskmap = "0.1"
rkyv = "0.7" # Optional: for zero-copy serialization

Basic Usage with Native Types

use opendiskmap::{DiskHashMap, Native, Str};
use tempfile::tempdir;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;

    // Create a persistent map with u64 keys and String values
    let mut map: DiskHashMap<Native<u64>, Str, _, _> =
        DiskHashMap::new_in(dir.path())?;

    // Insert data
    map.insert(&42, "Hello, World!")?;
    map.insert(&100, "Rust is awesome!")?;

    // Retrieve data
    if let Some(value) = map.get(&42)? {
        println!("Found: {}", value); // "Found: Hello, World!"
    }

    println!("Map size: {}", map.len()); // "Map size: 2"

    // Data persists automatically!
    drop(map);

    // Load the same map from disk
    let map: DiskHashMap<Native<u64>, Str, _, _> =
        DiskHashMap::load_from(dir.path())?;

    assert_eq!(map.get(&42)?, Some("Hello, World!"));

    Ok(())
}

Working with Raw Bytes

use opendiskmap::{DiskHashMap, Bytes};
use tempfile::tempdir;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;

    let mut map: DiskHashMap<Bytes, Bytes, _, _> =
        DiskHashMap::new_in(dir.path())?;

    // Store raw byte data
    map.insert(b"key1", b"value1")?;
    map.insert(b"key2", b"value2")?;

    // Retrieve as byte slices
    if let Some(value) = map.get(b"key1")? {
        println!("Raw value: {:?}", value); // [118, 97, 108, 117, 101, 49]
        println!("As string: {}", String::from_utf8_lossy(value));
    }

    Ok(())
}

Zero-Copy with rkyv

use opendiskmap::{DiskHashMap, Native, types::Arch};
use rkyv::{Archive, Deserialize, Serialize};
use tempfile::tempdir;

#[derive(Archive, Deserialize, Serialize, Debug, PartialEq)]
struct UserProfile {
    id: u32,
    name: String,
    email: String,
    scores: Vec<f64>,
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;

    // Create map with archived values (zero-copy)
    let mut map: DiskHashMap<Native<u64>, Arch<UserProfile>, _, _> =
        DiskHashMap::new_in(dir.path())?;

    let profile = UserProfile {
        id: 1,
        name: "Alice".to_string(),
        email: "alice@example.com".to_string(),
        scores: vec![95.5, 87.2, 92.8],
    };

    // Insert user profile (automatically serialized with rkyv)
    map.insert(&1001, &profile)?;

    // Retrieve with zero-copy deserialization
    if let Some(archived_profile) = map.get(&1001)? {
        println!("User ID: {}", archived_profile.id);
        println!("User name: {}", archived_profile.name);
        println!("User email: {}", archived_profile.email);
        println!("First score: {}", archived_profile.scores[0]);

        // The archived_profile is a zero-copy view into memory-mapped data!
        // No deserialization overhead for accessing the data.
    }

    Ok(())
}

Custom Capacity Configuration

use opendiskmap::{DiskHashMap, Bytes};
use tempfile::tempdir;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;

    // Create map with specific capacities (rounded to power of 2)
    let mut map: DiskHashMap<Bytes, Bytes, _, _> = DiskHashMap::with_capacity(
        dir.path(),
        1000,    // entries (-> 1024)
        64_000,  // key storage bytes (-> 65536)
        128_000, // value storage bytes (-> 131072)
    )?;

    println!("Capacity: {}", map.capacity()); // 1024

    // Pre-allocated space prevents early resizing
    for i in 0..500 {
        map.insert(format!("key_{}", i).as_bytes(),
                  format!("value_{}", i).as_bytes())?;
    }

    Ok(())
}

Type Aliases for Common Use Cases

use opendiskmap::{U64StringMap, StringU64Map, StringStringMap};
use tempfile::tempdir;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;

    // u64 -> String mapping
    let mut id_to_name: U64StringMap<_> = U64StringMap::new_in(dir.path().join("names"))?;
    id_to_name.insert(&1, "Alice")?;
    id_to_name.insert(&2, "Bob")?;

    // String -> u64 mapping
    let mut name_to_id: StringU64Map<_> = StringU64Map::new_in(dir.path().join("ids"))?;
    name_to_id.insert("Alice", &1)?;
    name_to_id.insert("Bob", &2)?;

    // String -> String mapping
    let mut config: StringStringMap<_> = StringStringMap::new_in(dir.path().join("config"))?;
    config.insert("database_url", "postgresql://localhost/mydb")?;
    config.insert("log_level", "info")?;

    // All maps persist independently
    assert_eq!(id_to_name.get(&1)?, Some("Alice"));
    assert_eq!(name_to_id.get("Bob")?, Some(2));
    assert_eq!(config.get("log_level")?, Some("info"));

    Ok(())
}

Entry API for Efficient Updates

use opendiskmap::{DiskHashMap, Native, MapEntry};
use tempfile::tempdir;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let dir = tempdir()?;
    let mut map: DiskHashMap<Native<u64>, Native<u64>, _, _> =
        DiskHashMap::new_in(dir.path())?;

    // Efficient insert-or-update pattern
    match map.entry(&42)? {
        MapEntry::Vacant(entry) => {
            entry.insert(&100)?; // Insert new value
        }
        MapEntry::Occupied(entry) => {
            let old_value = entry.insert(&200)?; // Update existing
            println!("Updated value from {} to 200", old_value);
        }
    }

    // Or use convenience methods
    match map.entry(&43)? {
        MapEntry::Vacant(entry) => {
            entry.or_insert(&150)?;
        }
        MapEntry::Occupied(entry) => {
            entry.or_insert_with(|| &250)?;
        }
    }

    Ok(())
}

πŸ”§ Type System

Type Wrapper Purpose Example
Native<T> Copy types (numbers, etc.) Native<u64>, Native<f32>
Str String types String literals and String
Bytes Raw byte arrays &[u8], Vec<u8>
Arch<T> rkyv archived types Zero-copy structs

πŸ“Š Performance

Benchmarks

cargo bench

Dependencies

~2.5MB
~49K SLoC