10 unstable releases (3 breaking)

new 0.4.0 Sep 1, 2024
0.3.2 Aug 3, 2024
0.3.1 Jul 13, 2024
0.2.1 Jul 9, 2024
0.1.3 Jul 8, 2024

#1510 in Web programming

Download history 313/week @ 2024-07-03 266/week @ 2024-07-10 9/week @ 2024-07-17 4/week @ 2024-07-24 124/week @ 2024-07-31 11/week @ 2024-08-07 141/week @ 2024-08-28

157 downloads per month

MIT license

493 lines

reqwest-scraper - Web scraping integration with reqwest

crates.io Documentation CI

Extends reqwest to support multiple web scraping methods.


Start Guide

  • add dependency
    reqwest = { version = "0.12", features = ["json"] }
  • use ScraperResponse
    use reqwest_scraper::ScraperResponse;


  • Json::select<T: DeserializeOwned>(path: &str) -> Result<Vec<T>>
  • Json::select_one<T: DeserializeOwned>(path: &str) -> Result<T>
  • Json::select_as_str(path: &str) -> Result<String>


use reqwest_scraper::ScraperResponse;

pub async fn request() -> Result<()> {
    let json = reqwest::Client::builder()
        .header("User-Agent", "Rust Reqwest")

    let total_count = json.select_as_str("$.total_count")?;
    let names: Vec<String> = json.select("$.items[*].full_name")?;

    println!("{}", total_count);
    println!("{}", names.join("\t"));


CSS selector

  • Html::select(selector: &str) -> Result<Selectable>
  • Selectable::iter() -> impl Iterator<SelectItem>
  • Selectable::first() -> Option<SelectItem>
  • SelectItem::name() -> &str
  • SelectItem::id() -> Option<&str>
  • SelectItem::has_class(class: &str, case_sensitive: CaseSensitivity) -> bool
  • SelectItem::classes() -> Classes
  • SelectItem::attrs() -> Attrs
  • SelectItem::attr(attr: &str) -> Option<&str>
  • SelectItem::text() -> String
  • SelectItem::html() -> String
  • SelectItem::inner_html() -> String
  • SelectItem::children() -> impl Iterator<SelectItem>
  • SelectItem::find(selector: &str) -> Result<Selectable>


use reqwest_scraper::ScraperResponse;

async fn request() -> Result<()> {
    let html = reqwest::get("https://github.com/holmofy")


    let select_result = html.select(".vcard-details > li.vcard-detail")?;

    for detail_item in select_result.iter() {
        println!("{}", detail_item.attr("aria-label").unwrap())



  • XHtml::select(xpath: &str) -> Result<XPathResult>
  • XPathResult::as_nodes() -> Vec<Node>
  • XPathResult::as_strs() -> Vec<String>
  • XPathResult::as_node() -> Option<Node>
  • XPathResult::as_str() -> Option<String>
  • Node::name() -> String
  • Node::id() -> Option<String>
  • Node::classes() -> HashSet<String>
  • Node::attr(attr: &str) -> Option<String>
  • Node::has_attr(attr: &str) -> bool
  • Node::text() -> String
  • TODO: Node::html() -> String
  • TODO: Node::inner_html() -> String
  • Node::children() -> Vec<Node>
  • Node::findnodes(relative_xpath: &str) -> Result<Vec<Node>>
  • Node::findvalues(relative_xpath: &str) -> Result<Vec<String>>
  • Node::findnode(relative_xpath: &str) -> Result<Option<Node>>
  • Node::findvalue(relative_xpath: &str) -> Result<Option<String>>


async fn request() -> Result<()> {
    let html = reqwest::get("https://github.com/holmofy")

    // simple extract element
    let name = html
    println!("{}", name);
    assert_eq!(name.trim(), "holmofy");

    // iterate elements
    let select_result = html

    println!("{}", select_result.len());

    for item in select_result.into_iter() {
        let attr = item.attr("aria-label").unwrap_or_else(|| "".into());
        println!("{}", attr);
        println!("{}", item.text());

    // attribute extract
    let select_result = html

    println!("{}", select_result.len());
    select_result.into_iter().for_each(|s| println!("{}", s));


Derive macro extract

use FromCssSelector & selector to extract html element into struct

// define struct and derive the FromCssSelector trait
#[derive(Debug, FromCssSelector)]
#[selector(path = "#user-repositories-list > ul > li")]
struct Repo {
    #[selector(path = "a[itemprop~='name']", default = "<unname>", text)]
    name: String,

    #[selector(path = "span[itemprop~='programmingLanguage']", text)]
    program_lang: Option<String>,

    #[selector(path = "div.topics-row-container>a", text)]
    topics: Vec<String>,

// request
let html = reqwest::get("https://github.com/holmofy?tab=repositories")

// Use the generated `from_html` method to extract data into the struct
let items = Repo::from_html(html)?;
items.iter().for_each(|item| println!("{:?}", item));

use FromXPath & xpath to extract html element into struct

// define struct and derive the FromXPath trait
#[derive(Debug, FromXPath)]
#[xpath(path = "//div[@id='user-repositories-list']/ul/li")]
struct Repo {
    #[xpath(path = ".//a[contains(@itemprop,'name')]/text()", default = "<unname>")]
    name: String,

    #[xpath(path = ".//span[contains(@itemprop,'programmingLanguage')]/text()")]
    program_lang: Option<String>,

    #[xpath(path = ".//div[contains(@class,'topics-row-container')]/a/text()")]
    topics: Vec<String>,

let html = reqwest::get("https://github.com/holmofy?tab=repositories")

// Use the generated `from_xhtml` method to extract data into the struct
let items = Repo::from_xhtml(html)?;
items.iter().for_each(|item| println!("{:?}", item));


~344K SLoC