#bnf #parser #no-std

no-std fn-bnf

A parsing grammar generator that lets you use arbitrary Rust items as rules

2 releases (1 stable)

new 1.0.0 Oct 22, 2024
0.9.0 Oct 20, 2024

#49 in Parser tooling

Download history 343/week @ 2024-10-17

343 downloads per month

MIT/Apache

315KB
862 lines

Repository Latest version Documentation MSRV unsafe forbidden License

fn-bnf

This crate contains a no_std compatible, low-allocation parsing library that uses a BNF-like syntax with the define! macro to allow for using arbitrary Rust items as grammar rules, and for parsing both strs and any [T] (for example, [u8] or [Token]).

Feature flags

This crate has two feature flags:

  • more_tuple_impls, raising the amount of elements Rule is implemented for on tuples of Rules from 16 to 256 - however, enabling this will raise compilation times dramatically
  • error_in_core, enabling use of this library before Rust 1.81.0 on nightly compilers - however, continued support for versions below 1.81.0 is not guaranteed

A note about the stack

This library's very lifeblood is deep - and likely recursive - function calls. You may run into stack overflow issues if you have an overly complex grammar, or are blindly parsing malicious input.

Example

use fn_bnf::{define, Any, Rule, While, Fail, errors::Unexpected};

#[derive(Debug, PartialEq, Copy, Clone)]
pub enum Token {
    Number(f64),
    Plus, Minus, Asterisk, Slash, Carat, Percent, Ans,
    LeftParen, RightParen
}

define! {
    grammar MathTokens<str> {
        // Mapping the individual parses to () makes .hoard() create a Vec<()>, which doesn't allocate
        WhitespaceToken -> () = _ (' ', '\n', '\t');
        Whitespace -> () = _ WhitespaceToken, _ WhitespaceToken.hoard();

        pub LangTokens -> Vec<Token> = LangToken.consume_all()
            .map_parsed(|v| v.into_iter().filter_map(|v| v).collect() );
        LangToken -> Option<Token> = 
            Num : Plus : Minus : Asterisk : Slash : Percent : Carat
            : LParen : RParen : Ans : _ Whitespace 
            : InvalidChar;
        // Since Fail returns !, we can coerce from that to a token
        InvalidChar -> Token from(|_, n| n) = Any, Fail::new(Unexpected::new(arg_0));

        Plus -> Token = '+'.map_parsed(|_| Token::Plus);
        Minus -> Token = '-'.map_parsed(|_| Token::Minus);
        Asterisk -> Token = '*'.map_parsed(|_| Token::Asterisk);
        Slash -> Token = '/'.map_parsed(|_| Token::Slash);
        Percent -> Token = '%'.map_parsed(|_| Token::Percent);
        Carat -> Token = '^'.map_parsed(|_| Token::Carat);
        LParen -> Token = '('.map_parsed(|_| Token::LeftParen);
        RParen -> Token = ')'.map_parsed(|_| Token::RightParen);

        Ans -> Token = "ans".map_parsed(|_| Token::Ans);

        Num -> Token from(|n| Token::Number(n)) = 
            ("nan", "NaN").map_parsed(|_| f64::NAN) : 
            ("inf", "Infinity").map_parsed(|_| f64::INFINITY) : 
            Float;
        Float -> f64 try_from(f64::from_str) = FloatTokens.spanned().map_parsed(|span| span.source);
        
        FloatTokens -> () = _ UInt, _ FloatFract.attempt(), _ FloatExp.attempt();
        FloatFract -> () = _ '.', _ UInt;
        FloatExp -> () = _ ('e', 'E'), _ ('-', '+').attempt(), _ UInt;

        UInt -> &'input str = While::from(char::is_ascii_digit);
    }
}

define! {
    grammar TokenMath<[Token]> {
        pub Expr -> f64 from(parse_expr) = Prod, SumSuf.consume_all();

        EOF -> () = Rule::<'input, [Token]>::prevent(Any);
        Sum -> f64 from(parse_expr) = Prod, SumSuf.hoard();
        SumSuf -> (&'input [Token], f64) = ([Token::Plus], [Token::Minus]), Prod;
        Prod -> f64 from(parse_expr) = Exp, ProdSuf.hoard();
        ProdSuf -> (&'input [Token], f64) = ([Token::Asterisk], [Token::Slash], [Token::Percent]), Exp;
        Exp -> f64 from(parse_expr) = Neg, ExpSuf.hoard();
        ExpSuf -> (&'input [Token], f64) = [Token::Carat], Neg;
        Neg -> f64 from(|negative, num: f64| if negative {-num} else {num}) 
            = [Token::Minus].attempt().map_parsed(|opt| opt.is_ok()), Atom;
        Atom -> f64 = _ [Token::LeftParen], Sum, _ [Token::RightParen] : Number;
        Number -> f64 try_from(|token: &Token| {
            let Token::Number(n) = token else { return Err(Unexpected::<Token>::new(*token)); };
            Ok(*n)
        }) = Any;
    }
}

fn parse_expr(mut lhs: f64, suffixes: Vec<(&[Token], f64)>) -> f64 {
    for (op, rhs) in suffixes {
        match op[0] {
            Token::Plus => lhs += rhs,
            Token::Minus => lhs -= rhs,
            Token::Asterisk => lhs *= rhs,
            Token::Slash => lhs /= rhs,
            Token::Percent => lhs %= rhs,
            Token::Carat => lhs = lhs.powf(rhs),
            _ => unreachable!()
        }
    }
    lhs
}

Licensing

This crate is dual-licensed under the Apache 2.0 or MIT licenses.

Dependencies

~1.8–2.5MB
~49K SLoC