1 unstable release
0.0.1 | Sep 21, 2021 |
---|
#9 in #lsh
23KB
526 lines
minhash-lsh
This crate reimplements the MinHash
and MinHash LSH
approaches from the Python package datasketch in Rust. It's only a partial reimplementation, use it at your own risk.
lib.rs
:
minhash-lsh
This crate reimplements the MinHash
and MinHash LSH
approaches from the Python package datasketch in Rust. It's only a partial reimplementation, use it at your own risk.
Example MinHash
use datasketch_minhash_lsh::MinHash;
let mut m1 = <MinHash>::new(4, Some(1));
let mut m2 = <MinHash>::new(4, Some(1));
assert_eq!(m1.jaccard(&m2).unwrap(), 1.0);
m2.update(&12);
assert_eq!(m1.jaccard(&m2).unwrap(), 0.0);
m1.update(&13);
assert!(m1.jaccard(&m2).unwrap() < 1.0);
m1.update(&12);
let distance = m1.jaccard(&m2).unwrap();
assert!(distance < 1.0 && distance > 0.0);
Example MinHashLsh
use datasketch_minhash_lsh::{MinHashLsh, MinHash};
let mut lsh = <MinHashLsh<&str>>::new(16, None, Some(0.5)).unwrap();
let mut m1 = <MinHash>::new(16, Some(0));
m1.update(&"a");
let mut m2 = <MinHash>::new(16, Some(0));
m2.update(&"b");
lsh.insert("a", &m1).unwrap();
lsh.insert("b", &m2).unwrap();
let result = lsh.query(&m1).unwrap();
assert!(result.contains(&"a"));
let result = lsh.query(&m2).unwrap();
assert!(result.contains(&"b"));
assert!(result.len() <= 2);
Dependencies
~1.9–2.6MB
~46K SLoC