#lexer #token #regex #debugging #lex #u64 #string

reglex

A rust lexer generator using regex

5 releases (stable)

1.1.0 Feb 6, 2022
1.0.2 Feb 6, 2022
1.0.1 Feb 5, 2022
0.1.0 Feb 5, 2022

#2340 in Rust patterns

37 downloads per month
Used in rs-lambda

MIT license

9KB
79 lines

reglex

A small rust library for creating regex-based lexers

Example


use reglex::{RuleList, rule_list, lex};

#[derive(Debug, PartialEq)]
enum Token {
    Keyword,
    Number(u64),
    Left,
    Right,
}

fn lexer(input: &String) -> Result<Vec<Token>, usize> {
    let regexes: RuleList<Token> = rule_list! [
        "kw" => |_| Some(Token::Keyword),
        r"\d+" => |s: &str| Some(Token::Number(s.parse().unwrap())),
        r"\{" => |_| Some(Token::Left),
        r"\}" => |_| Some(Token::Right),
        r"\s" => |_| None
    ];

    lex(&regexes, input)
}

fn main() {
    assert_eq!(
        lexer(&"kw  { 12 53 }".to_string()),
        Ok(vec![
            Token::Keyword,
            Token::Left,
            Token::Number(12),
            Token::Number(53),
            Token::Right
        ])
    );

    assert_eq!(lexer(&"kw ERROR! { 12 53 }".to_string()), Err(3));
}

lib.rs:

A small library for creating regex-based lexers

Example


use reglex::{RuleList, RuleInput, rule_list, lex};

#[derive(Debug, PartialEq)]
enum Token {
    Keyword,
    Number(u64),
    Hashtag(String),
    Left,
    Right,
}

fn lexer(input: &str) -> Result<Vec<Token>, usize> {
    let regexes: RuleList<Token> = rule_list! [
        r"kw" => |_| Some(Token::Keyword),
        r"\d+" => |s: RuleInput| Some(Token::Number(s[0].parse().unwrap())),
        r"\{" => |_| Some(Token::Left),
        r"\}" => |_| Some(Token::Right),
        r"#([a-z]+)" => |s: RuleInput| Some(Token::Hashtag(s[1].to_string())),
        r"\s" => |_| None,
    ];

    lex(&regexes, input)
}

fn main() {
    assert_eq!(
        lexer("kw  { 12 #hello 53 }"),
        Ok(vec![
            Token::Keyword,
            Token::Left,
            Token::Number(12),
            Token::Hashtag("hello".to_string()),
            Token::Number(53),
            Token::Right,
        ])
    );

    assert_eq!(lexer("kw ERROR! { 12 #hello 53 }"), Err(3));
}

Dependencies

~2–3MB
~54K SLoC