6 releases

0.1.7 Sep 29, 2023
0.1.6 Jul 6, 2023
0.1.5 Jun 29, 2023
0.1.4 May 21, 2023
0.1.0 Apr 10, 2023

#797 in Text processing

39 downloads per month

AGPL-3.0

140KB
3.5K SLoC

jp_utils

Rust crate providing some handy tools for working with Japanese text

Usage

jp_utils = "0.1.4"

Examples

use jp_utils::furi::{segment::SegmentRef, Furigana}; // Feature: "furigana"
use jp_utils::hiragana::Syllable; // Feature: "hiragana"
use jp_utils::JapaneseExt;
use jp_utils::{alphabet::Alphabet, counter::is_counter};

// Basic string functions on japanese alphabet using the `jp_utils::JapaneseExt` trait
assert!("".is_kana());
assert!("".is_hiragana());
assert!("日本語".is_kanji());
assert!("日ほん語".has_kanji());
assert!("日本語".is_japanese());
assert!("例です".starts_with_alphabet(Alphabet::Kanji));
assert!("".is_small_kana());
assert!(!"".is_small_kana());
assert!("".is_symbol());
assert_eq!("".to_halfwidth(), "1");

// Hiragana hacks (requires feature: "hiragana")
assert_eq!(
    Syllable::from_char('').to_dakuten(),
    Syllable::from_char('')
);

// Furigana parsing (requires feature "furigana"!)
let furigana = Furigana("[日本|に|ほん]が[好|す]きです");
assert_eq!(furigana.kanji_str(), "日本が好きです");
assert_eq!(furigana.kana_str(), "にほんがすきです");

let mut iter = furigana.segments(); // and even iterate over each part
assert_eq!(
    iter.next(),
    Some(SegmentRef::new_kanji("日本", &["", "ほん"]))
);
assert_eq!(iter.next(), Some(SegmentRef::new_kana("")));
assert_eq!(iter.next(), Some(SegmentRef::new_kanji("",&[""])));
assert_eq!(iter.next(), Some(SegmentRef::new_kana("きです")));
assert_eq!(iter.next(), None);

// Counter
assert!(is_counter(""));
assert!(!is_counter(""));

Dependencies

~275–570KB
~17K SLoC