5 releases
0.1.4 | Jul 14, 2023 |
---|---|
0.1.3 | Oct 3, 2022 |
0.1.2 | Sep 24, 2022 |
0.1.1 | Sep 20, 2022 |
0.1.0 | Sep 20, 2022 |
#766 in Database interfaces
482 downloads per month
140KB
3K
SLoC
search-query-parser
what is this library for
search-query-parser is made to parse complex search query into layered search conditions, so it will be easy to construct Elasticsearch query DSL or something else.
the complex search query like this: ↓↓↓
(word1 and -word2) or (("phrase word 1" or -"phrase word 2") and -(" a long phrase word " or word3))
will be parsed into layered search conditions like this: ↓↓↓
Condition::Operator(
Operator::Or,
vec![
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("word1".into()),
Condition::Not(Box::new(Condition::Keyword("word2".into()))),
]
),
Condition::Operator(
Operator::And,
vec![
Condition::Operator(
Operator::Or,
vec![
Condition::PhraseKeyword("phrase word 1".into()),
Condition::Not(Box::new(Condition::PhraseKeyword(
"phrase word 2".into()
)))
]
),
Condition::Not(Box::new(Condition::Operator(
Operator::Or,
vec![
Condition::PhraseKeyword(" a long phrase word ".into()),
Condition::Keyword("word3".into())
]
)))
]
),
]
)
the conditions are constructed by the enum Condition
and enum Operator
.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Condition {
None,
Keyword(String),
PhraseKeyword(String),
Not(Box<Condition>),
Operator(Operator, Vec<Condition>),
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Operator {
And,
Or,
}
usage
1. for Rust project
[dependencies]
search-query-parser = "0.1.4"
use search_query_parser::parse_query_to_condition;
let condition = parse_query_to_condition("any query string you like")?;
2. for REST Api
refer to search-query-parser-api repository
3. for JVM language via JNI
refer to search-query-parser-cdylib repository
parse rules
1. space {\u0020} or full width space {\u3000} are identified as AND
operator
fn test_keywords_concat_with_spaces() {
let actual = parse_query_to_condition("word1 word2").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("word1".into()),
Condition::Keyword("word2".into())
]
)
)
}
2. AND
operator has higher priority than OR
operator
fn test_keywords_concat_with_and_or() {
let actual =
parse_query_to_condition("word1 OR word2 AND word3").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("word1".into()),
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("word2".into()),
Condition::Keyword("word3".into()),
]
)
]
)
)
}
3. conditions in brackets have higher priority
fn test_brackets() {
let actual =
parse_query_to_condition("word1 AND (word2 OR word3)")
.unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("word1".into()),
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("word2".into()),
Condition::Keyword("word3".into()),
]
)
]
)
)
}
4. double quote will be parsed for phrase keyword
fn test_double_quote() {
let actual = parse_query_to_condition(
"\"word1 AND (word2 OR word3)\" word4",
)
.unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::PhraseKeyword(
"word1 AND (word2 OR word3)".into()
),
Condition::Keyword("word4".into()),
]
)
)
}
5. minus(hyphen) will be parsed for negative condition
※ it can be used before keyword, phrase keyword or brackets
fn test_minus() {
let actual = parse_query_to_condition(
"-word1 -\"word2\" -(word3 OR word4)",
)
.unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Not(Box::new(Condition::Keyword("word1".into()))),
Condition::Not(Box::new(Condition::PhraseKeyword("word2".into()))),
Condition::Not(Box::new(Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("word3".into()),
Condition::Keyword("word4".into())
]
))),
]
)
)
}
6. correcting incorrect search query
- empty brackets
fn test_empty_brackets() {
let actual = parse_query_to_condition("A AND () AND B").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("A".into()),
Condition::Keyword("B".into()),
]
)
)
}
- reversed brackets
fn test_reverse_brackets() {
let actual = parse_query_to_condition("A OR B) AND (C OR D").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("A".into()),
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("B".into()),
Condition::Keyword("C".into()),
]
),
Condition::Keyword("D".into()),
]
)
)
}
- wrong number of brackets
fn test_missing_brackets() {
let actual = parse_query_to_condition("(A OR B) AND (C").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("A".into()),
Condition::Keyword("B".into()),
]
),
Condition::Keyword("C".into()),
]
)
)
}
- empty phrase keyword
fn test_empty_phrase_keywords() {
let actual = parse_query_to_condition("A AND \"\" AND B").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Keyword("A".into()),
Condition::Keyword("B".into()),
]
)
)
}
- wrong number or double quote
fn test_invalid_double_quote() {
let actual = parse_query_to_condition("\"A\" OR \"B OR C").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::Or,
vec![
Condition::PhraseKeyword("A".into()),
Condition::Keyword("B".into()),
Condition::Keyword("C".into()),
]
)
)
}
- and or are next to each other
fn test_invalid_and_or() {
let actual = parse_query_to_condition("A AND OR B").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("A".into()),
Condition::Keyword("B".into()),
]
)
)
}
7. search query optimization
fn test_unnecessary_nest_brackets() {
let actual = parse_query_to_condition("(A OR (B OR C)) AND D").unwrap();
assert_eq!(
actual,
Condition::Operator(
Operator::And,
vec![
Condition::Operator(
Operator::Or,
vec![
Condition::Keyword("A".into()),
Condition::Keyword("B".into()),
Condition::Keyword("C".into()),
]
),
Condition::Keyword("D".into()),
]
)
)
}
Dependencies
~2.6–4MB
~77K SLoC