1 unstable release
0.1.0 | Feb 11, 2025 |
---|
#2 in #bnf
108 downloads per month
79KB
1K
SLoC
bnf_syntax_parser
The syntax parser based on BNF rules.
Usage
[dependencies]
bnf_syntax_parser = "0.1.0"
Quickstart
use std::collections::HashMap;
use bnf_syntax_parser::{TokenType, Token, BNFRule, BNFRules, Parser, MatchRecord, ParseTree};
#[derive(Clone, Debug, PartialEq, Eq)]
enum NumericExpressionTokenType {
Number,
OperatorAdd,
OperatorSub,
OperatorMul,
OperatorDiv,
}
impl TokenType for NumericExpressionTokenType {}
#[derive(Clone, Debug, PartialEq, Eq)]
struct NumericExpressionToken {
token_type: NumericExpressionTokenType,
value: usize,
}
impl NumericExpressionToken {
fn number(number: usize) -> Self {
Self { token_type: NumericExpressionTokenType::Number, value: number }
}
fn operator(operator: NumericExpressionTokenType) -> Self {
Self { token_type: operator, value: 0 }
}
}
impl std::fmt::Display for NumericExpressionToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.token_type {
NumericExpressionTokenType::Number => write!(f, "{:?} {}", self.token_type, self.value),
_ => write!(f, "{:?}", self.token_type),
}
}
}
impl Token<NumericExpressionTokenType> for NumericExpressionToken {
fn token_type(&self) -> &NumericExpressionTokenType {
&self.token_type
}
}
// Compile the rules string to BNFRules
const RULES: &str = "
Integer = Number {Number};
MultiplicationExpression = Integer {(OperatorMul | OperatorDiv) Integer};
Expression = MultiplicationExpression {(OperatorAdd | OperatorSub) MultiplicationExpression};
";
let TOKEN_TYPE_CONVERTER = HashMap::from([
("Number".to_string(), NumericExpressionTokenType::Number),
("OperatorAdd".to_string(), NumericExpressionTokenType::OperatorAdd),
("OperatorSub".to_string(), NumericExpressionTokenType::OperatorSub),
("OperatorMul".to_string(), NumericExpressionTokenType::OperatorMul),
("OperatorDiv".to_string(), NumericExpressionTokenType::OperatorDiv),
]);
let rules_vec_char = RULES.chars().collect::<Vec<char>>();
let rules = match BNFRules::new(
&rules_vec_char,
|token_type_identifier| TOKEN_TYPE_CONVERTER.get(token_type_identifier).cloned(),
&"Expression".to_string()
) {
Ok(r) => r,
Err(e) => panic!("{:?}", e),
};
println!("The compiled rules:");
rules.display(|token_type| Some(format!("{:?}", token_type))).unwrap();
println!("");
// Two ways to create a parser
let parser = Parser::new(rules.rules.clone(), rules.start_rule_index.unwrap());
let parser = Parser::try_from(&rules).unwrap();
const EXPRESSION_STRING: &str = "12*3+456/78";
let TOKEN_CONVERTER = HashMap::from([
('1', NumericExpressionToken::number(1)),
('2', NumericExpressionToken::number(2)),
('3', NumericExpressionToken::number(3)),
('4', NumericExpressionToken::number(4)),
('5', NumericExpressionToken::number(5)),
('6', NumericExpressionToken::number(6)),
('7', NumericExpressionToken::number(7)),
('8', NumericExpressionToken::number(8)),
('9', NumericExpressionToken::number(9)),
('0', NumericExpressionToken::number(0)),
('+', NumericExpressionToken::operator(NumericExpressionTokenType::OperatorAdd)),
('-', NumericExpressionToken::operator(NumericExpressionTokenType::OperatorSub)),
('*', NumericExpressionToken::operator(NumericExpressionTokenType::OperatorMul)),
('/', NumericExpressionToken::operator(NumericExpressionTokenType::OperatorDiv)),
]);
// Convert the expression string to expression tokens
let expression_tokens = EXPRESSION_STRING.chars().map(|c| TOKEN_CONVERTER.get(&c).unwrap().clone()).collect::<Vec<NumericExpressionToken>>();
assert!(&expression_tokens == &vec![
NumericExpressionToken::number(1),
NumericExpressionToken::number(2),
NumericExpressionToken::operator(NumericExpressionTokenType::OperatorMul),
NumericExpressionToken::number(3),
NumericExpressionToken::operator(NumericExpressionTokenType::OperatorAdd),
NumericExpressionToken::number(4),
NumericExpressionToken::number(5),
NumericExpressionToken::number(6),
NumericExpressionToken::operator(NumericExpressionTokenType::OperatorDiv),
NumericExpressionToken::number(7),
NumericExpressionToken::number(8),
]);
// Parse the tokens to match_records buffer
let mut match_records = Vec::new();
match parser.parse_to_buffer(&expression_tokens, &mut match_records) {
Ok(r) => match r {
Ok(r) => (),
Err(e) => panic!("{:?}", e),
}
Err(e) => panic!("{:?}", e),
};
// Convert the match_records to parse tree data structure
let parse_tree = ParseTree::new(&match_records).unwrap();
println!("The parse tree:");
parse_tree.display(
|token_index| expression_tokens.get(token_index).map(|t| t),
|rule_index| rules.rules_identifier.get(rule_index)
).unwrap();
Run the above code and you can get the output:
The compiled rules:
0 Integer = Number { Number } ;
1 MultiplicationExpression = Integer { ( OperatorMul | OperatorDiv ) Integer } ;
2 Expression = MultiplicationExpression { ( OperatorAdd | OperatorSub ) MultiplicationExpression } ;
The parse tree:
{ Expression
{ MultiplicationExpression
{ Integer
Number 1
Number 2
} Integer
OperatorMul
{ Integer
Number 3
} Integer
} MultiplicationExpression
OperatorAdd
{ MultiplicationExpression
{ Integer
Number 4
Number 5
Number 6
} Integer
OperatorDiv
{ Integer
Number 7
Number 8
} Integer
} MultiplicationExpression
} Expression
BNF
Each BNF rule has four parts: a left-hand side, a right-hand side, the "=" character separating these two sides and the ";" character marking the end of rule. The left-hand side is the name of the rule and the right-hand side is the description of the rule. The four description forms is explained below.
Form | Semantic |
---|---|
Sequence | Items appear left–to–right, their order in important. |
Choice | Alternative items are enclosed between "(" and ")" (parenthesis) and separated by a "|" (stroke), one item is chosen from this list of alternatives, their order is unimportant. |
Option | The optional item is enclosed between "[" and "]" (square–brackets), the item can be either included or discarded. |
Repetition | The repeatable item is enclosed between "{" and "}" (curly–braces), the item can be repeated zero or more times. |