13 releases
0.1.93 | Mar 6, 2021 |
---|---|
0.1.92 | Mar 6, 2021 |
0.1.91 | Jan 21, 2021 |
#2301 in Parser implementations
49KB
1K
SLoC
Capricorn
Parse html according to configuration.
Capricorn is a html parsing library that supports recursion and custom execution order.
Default execution order
vec![String::from("selects"),
String::from("each"),
String::from("select_params"),
String::from("nodes"),
String::from("has"),
String::from("contains")];
selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html);
selects > select_params > selects > ... text_attr_html > (text or attr or html);
selects > nodes > has > contains > text_attr_html > (text or attr or html);
Support:
Capricorn | support | example | val type |
---|---|---|---|
selects element | ✔ | field_name: selects: - element_name |
String |
selects class | ✔ | field_name: selects: - .class_name |
String |
selects class element | ✔ | field_name: selects: - .class_name - element_name |
String |
first | ✔ | field_name: selects: - element_name nodes: first: true |
String |
last | ✔ | field_name: selects: - element_name nodes: last: true |
String |
eq | ✔ | field_name: selects: - element_name nodes: eq: 0 |
String |
parent | ✔ | field_name: selects: - element_name nodes: parent: true |
String |
children | ✔ | field_name: selects: - element_name nodes: children: true |
String |
prev_sibling | ✔ | field_name: selects: - element_name nodes: prev_sibling: true |
String |
next_sibling | ✔ | field_name: selects: - element_name nodes: next_sibling: true |
String |
has_class | ✔ | field_name: selects: - element_name has: class: class_name |
String |
has_attr | ✔ | field_name: selects: - element_name has: attr: attr_name |
String |
each one | ✔ | field_name: selects: - element_name each: one: selects: - .class_name ... |
String |
each all | ✔ | field_name: selects: - element_name each: all: selects: - .class_name ... |
Array |
each fields | ✔ | field_name: selects: - element_name each: fields: field_name: selects: - .class_name ... field_name1: selects: - .class_name ... |
Map |
select_params | ✔ | field_name: selects: - element_name select_params: selects: - .class_name ... |
... |
text | ✔ | field_name: selects: - element_name text_attr_html: text: true |
String |
attr | ✔ | field_name: selects: - element_name text_attr_html: attr: true |
String |
html | ✔ | field_name: selects: - element_name text_attr_html: html: true |
String |
text contains | ✔ | field_name: selects: - element_name contains: contains: text: - test |
String |
text not contains | ✔ | field_name: selects: - element_name contains: not_contains: text: - test |
String |
html contains | ✔ | field_name: selects: - element_name contains: contains: html: - test |
String |
html not contains | ✔ | field_name: selects: - element_name contains: not_contains: html: - test |
String |
exec order | ✔ | field_name: exec_order: - selects - has - nodes selects: - element_name has: class: class_name nodes: first: true |
String |
data format splits | ✔ | field_name: selects: - element_name data_format: splits: - { key: str } |
Array |
data format splits | ✔ | field_name: selects: - element_name data_format: splits: - { key: str, index: 0 } |
String |
data format replaces | ✔ | field_name: selects: - element_name data_format: replaces: - str |
String |
data format deletes | ✔ | field_name: selects: - element_name data_format: deletes: - str |
String |
data format find | ✔ | field_name: selects: - element_name data_format: find: - regex |
String |
data format find_iter | ✔ | field_name: selects: - element_name data_format: find_iter: - regex |
Array |
Multi-version regular matching err | ✔ | regexes_match_parse_html: - regex: regex version: 1 err: err_msg |
Err |
Multi-version regular matching fields | ✔ | regexes_match_parse_html: - regex: regex version: 1 fields: field_name: selects: ... field_name: selects: ... |
Map |
Parse html code, more...
let yml = read_file("./test_html/test.yml").unwrap();
let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = parse::parse_html(¶ms, &html);
Multi-version regular matching parsing html code, more...
let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap();
let v: match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = v.regexes_match_parse_html(html)?;
Dependencies
~12–20MB
~286K SLoC