#crf #trainer #machine-learning #rucrf

no-std rucrf

Conditional Random Fields implemented in pure Rust

10 releases

0.3.3 Mar 17, 2025
0.3.2 Oct 5, 2022
0.3.1 Sep 21, 2022
0.2.3 Sep 19, 2022
0.1.1 Sep 14, 2022

#155 in Algorithms

Download history 143/week @ 2025-01-14 83/week @ 2025-01-21 140/week @ 2025-01-28 217/week @ 2025-02-04 315/week @ 2025-02-11 197/week @ 2025-02-18 286/week @ 2025-02-25 117/week @ 2025-03-04 301/week @ 2025-03-11 221/week @ 2025-03-18 201/week @ 2025-03-25 180/week @ 2025-04-01 163/week @ 2025-04-08 335/week @ 2025-04-15 912/week @ 2025-04-22 267/week @ 2025-04-29

1,695 downloads per month
Used in vibrato

MIT/Apache

81KB
2K SLoC

rucrf: Conditional Random Fields implemented in pure Rust

Crates.io Documentation Rust Build Status

rucrf contains a trainer and an estimator for Conditional Random Fields (CRFs). This library supports:

  • lattices with variable length edges,
  • L1 and L2 regularization, and
  • multi-threading during training.

Examples

use std::num::NonZeroU32;

use rucrf::{Edge, FeatureProvider, FeatureSet, Lattice, Model, Trainer};

// Train:
// 京(kyo) 都(to)
// 東(to) 京(kyo)
// 京(kei) 浜(hin)
// 京(kyo) の(no) 都(miyako)
//
// Test:
// 水(mizu) の(no) 都(miyako)
//
// 1-gram features:
// 京: 1, 都: 2, 東: 3, 浜: 4, の: 5, 水: 6
// 2-gram features:
// kyo: 1, to: 2, kei: 3, hin: 4, no: 5, miyako: 6, mizu: 7

let mut provider = FeatureProvider::new();
let label_京kyo = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(1).unwrap()],
    &[NonZeroU32::new(1)],
    &[NonZeroU32::new(1)],
))?;
let label_都to = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(2).unwrap()],
    &[NonZeroU32::new(2)],
    &[NonZeroU32::new(2)],
))?;
let label_東to = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(3).unwrap()],
    &[NonZeroU32::new(2)],
    &[NonZeroU32::new(2)],
))?;
let label_京kei = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(1).unwrap()],
    &[NonZeroU32::new(3)],
    &[NonZeroU32::new(3)],
))?;
let label_浜hin = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(4).unwrap()],
    &[NonZeroU32::new(4)],
    &[NonZeroU32::new(4)],
))?;
let label_のno = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(5).unwrap()],
    &[NonZeroU32::new(5)],
    &[NonZeroU32::new(5)],
))?;
let label_都miyako = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(2).unwrap()],
    &[NonZeroU32::new(6)],
    &[NonZeroU32::new(6)],
))?;
let label_水mizu = provider.add_feature_set(FeatureSet::new(
    &[NonZeroU32::new(6).unwrap()],
    &[NonZeroU32::new(7)],
    &[NonZeroU32::new(7)],
))?;

let mut lattices = vec![];

// 京都 (kyo to)
let mut lattice = Lattice::new(2)?;
lattice.add_edge(0, Edge::new(1, label_京kyo))?;
lattice.add_edge(1, Edge::new(2, label_都to))?;

lattice.add_edge(0, Edge::new(1, label_京kei))?;
lattice.add_edge(1, Edge::new(2, label_都miyako))?;

lattices.push(lattice);

// 東京 (to kyo)
let mut lattice = Lattice::new(2)?;
lattice.add_edge(0, Edge::new(1, label_東to))?;
lattice.add_edge(1, Edge::new(2, label_京kyo))?;

lattice.add_edge(1, Edge::new(2, label_京kei))?;

lattices.push(lattice);

// 京浜 (kei hin)
let mut lattice = Lattice::new(2)?;
lattice.add_edge(0, Edge::new(1, label_京kei))?;
lattice.add_edge(1, Edge::new(2, label_浜hin))?;

lattice.add_edge(0, Edge::new(1, label_京kyo))?;

lattices.push(lattice);

// 京の都 (kyo no miyako)
let mut lattice = Lattice::new(3)?;
lattice.add_edge(0, Edge::new(1, label_京kyo))?;
lattice.add_edge(1, Edge::new(2, label_のno))?;
lattice.add_edge(2, Edge::new(3, label_都miyako))?;

lattice.add_edge(0, Edge::new(1, label_京kei))?;
lattice.add_edge(2, Edge::new(3, label_都to))?;

lattices.push(lattice);

// Generates a model
let trainer = Trainer::new();
let model = trainer.train(&lattices, provider);

// 水の都 (mizu no miyako)
let mut lattice = Lattice::new(3)?;
lattice.add_edge(0, Edge::new(1, label_水mizu))?;
lattice.add_edge(1, Edge::new(2, label_のno))?;
lattice.add_edge(2, Edge::new(3, label_都to))?;
lattice.add_edge(2, Edge::new(3, label_都miyako))?;

let (path, _) = model.search_best_path(&lattice);

assert_eq!(vec![
    Edge::new(1, label_水mizu),
    Edge::new(2, label_のno),
    Edge::new(3, label_都miyako),
], path);

License

Licensed under either of

at your option.

Contribution

See the guidelines.

Dependencies

~1–8.5MB
~67K SLoC