#xml #derive #nom-xml

macro nom-xml-derive

A Rust crate for simplifying the process of extracting nom-xml Documents into structs with fields of various Rust data types

1 unstable release

0.1.0 Sep 12, 2024

#313 in #xml


Used in nom-xml

MIT/Apache

125KB
2.5K SLoC

Nom-XML-Derive

nom-xml-derive is a crate for extracting data from nom-xml version 0.3.0+'s Document type into structs. Due to the nested nature of XML, the interface for extracting information from nom-xml's Document type can be tedious to implement for users. This derive macro crate was established to reduce that burden on users.

Let's look at an example:

If we wanted to extract the first book in a catalog, we would need to manually implement the UpdateFields trait for each struct:

use nom_xml::{
    attribute::{Attribute, AttributeValue},
    tag::Tag,
    Document, UpdateFields,
};

#[derive(Debug, Default)]
struct Book {
    isbn: String,
    authored_by: AuthoredBy,
    title: String,
    genre: String,
    ty: String,
    series_number: u8,
    description: String,
}
impl UpdateFields for Book {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Some(attributes_vec) = &tag.attributes {
            for attr in attributes_vec.iter() {
                if let Attribute::Instance {
                    name,
                    value: AttributeValue::Value(attr_val),
                } = attr
                {
                    if name.local_part == "isbn" {
                        self.isbn = attr_val.to_string();
                    }
                }
            }
        }

        match &doc {
            Document::Content(Some(value)) => match field_name.as_str() {
                "title" => {
                    self.title = value.to_string();
                }
                "genre" => {
                    self.genre = value.to_string();
                }
                "type" => {
                    self.ty = value.to_string();
                }
                "series_number" => {
                    self.series_number = value.parse().unwrap_or_default();
                }
                "description" => {
                    self.description = value.to_string();
                }
                e => {
                    return Err(format!("Unknown field2: {}", e).into());
                }
            },
            Document::Nested(_) => {
                for element in doc.iter_with_depth(1) {
                    if let Document::Element(tag, inner_doc, _) = element {
                        if "authored_by" == tag.name.local_part {
                            self.authored_by.update_fields(inner_doc)?;
                        } else {
                            self.update_field(tag, inner_doc)?;
                        }
                    } else {
                        return Err(format!("Unknown field: {element:#?}").into());
                    }
                }
            }

            _ => {
                return Err("Content is missing".into());
            }
        }

        Ok(())
    }
}

#[derive(Debug, Default, Clone)]
struct AuthoredBy {
    pen_name: String,
    authors: Vec<AuthorName>,
}

impl UpdateFields for AuthoredBy {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        match (tag.name.local_part.as_str(), doc) {
            ("pen_name", Document::Content(Some(value))) => {
                self.pen_name = value.to_string();
                Ok(())
            }
            ("authors", Document::Nested(elements)) => {
                elements.iter().try_for_each(
                    |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
                        if let Document::Element(_, inner_doc, _) = element {
                            let mut author_name = AuthorName::default();
                            if let Document::Nested(inner_elements) = inner_doc.as_ref() {
                                inner_elements.iter().try_for_each(
                                    |inner_element| -> Result<(), Box<dyn std::error::Error>> {
                                        if let Document::Element(tag, content, _) = inner_element {
                                            author_name.update_field(tag, content)?;
                                        }
                                        Ok(())
                                    },
                                )?;
                                self.authors.push(author_name);
                            } else {
                                return Err("Content is missing in Author authors".into());
                            }
                        }
                        Ok(())
                    },
                )?;
                Ok(())
            }
            _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
        }
    }
}

#[derive(Debug, Default, Clone)]
struct AuthorName {
    first_name: String,
    last_name: String,
}
impl UpdateFields for AuthorName {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Document::Content(Some(value)) = &doc {
            match field_name.as_str() {
                "first_name" => {
                    self.first_name = value.to_string();
                    Ok(())
                }
                "last_name" => {
                    self.last_name = value.to_string();
                    Ok(())
                }
                e => Err(format!("Unknown field in AuthorName: {}", e).into()),
            }
        } else {
            Err("Content is missing in AuthorName".into())
        }
    }
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = r#"<catalog>
    <book isbn="978-0316129084">
        <authored_by>
            <pen_name>James S.A. Corey</pen_name>
            <authors>
                <author><first_name>Daniel</first_name><last_name>Abraham</last_name></author>
                <author><first_name>Ty</first_name><last_name>Franck</last_name></author>
            </authors>
        </authored_by>
        <title>Leviathan Wakes</title>
        <genre>Science Fiction</genre>
        <type>Novel</type>
        <series_number>1</series_number>
        <description>The first book in the Expanse series</description>
    </book>
    <book isbn="978-0316129060">
        <authored_by>
            <pen_name>James S.A. Corey</pen_name>
            <authors>
                <author><first_name>Daniel</first_name><last_name>Abraham</last_name></author>
                <author><first_name>Ty</first_name><last_name>Franck</last_name></author>
            </authors>
        </authored_by>
        <title>Caliban's War</title>
        <genre>Science Fiction</genre>
        <type>Novel</type>
        <series_number>2</series_number>
        <description>The second book in the Expanse series</description>
    </book>></catalog>"#
        .to_string();

    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    let mut book = Book::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;

    println!("{book:#?}");
    Ok(())
}

Whereas if we use the nom-xml-derive crate with nom-xml we would simply decorate each struct with #[derive(Debug, Default, Clone, ExtractFields, PartialEq)] and subsequent "special" fields with #[extract(<behavior>)]. This results in those methods being generated at compile time significantly reducing the boilerplate needed:

use nom_xml::{
    attribute::{Attribute, AttributeValue},
    tag::Tag,
    Document, DocumentIteratorExt, UpdateFields,
};
use nom_xml_derive::ExtractFields;
use std::prelude::rust_2021::*;

#[derive(Debug, Default, Clone, ExtractFields, PartialEq)]
struct Book {
    #[extract(from_attribute)]
    isbn: String,
    authored_by: Option<AuthoredBy>,
    title: String,
    genre: String,
    #[extract(from_tag = "type")]
    ty: String,
    series_number: u8,
    description: Option<String>,
}

#[derive(Debug, Default, Clone, ExtractFields, PartialEq)]
struct AuthoredBy {
    pen_name: Option<String>,
    authors: Option<Vec<AuthorName>>,
}

#[derive(Debug, Default, Clone, ExtractFields, PartialEq)]
struct AuthorName {
    first_name: String,
    last_name: String,
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = r#"<catalog>
    <book isbn="978-0316129084">
        <authored_by>
            <pen_name>James S.A. Corey</pen_name>
            <authors>
                <author><first_name>Daniel</first_name><last_name>Abraham</last_name></author>
                <author><first_name>Ty</first_name><last_name>Franck</last_name></author>
            </authors>
        </authored_by>
        <title>Leviathan Wakes</title>
        <genre>Science Fiction</genre>
        <type>Novel</type>
        <series_number>1</series_number>
        <description>The first book in the Expanse series</description>
    </book>
    <book isbn="978-0316129060">
        <authored_by>
            <pen_name>James S.A. Corey</pen_name>
            <authors>
                <author><first_name>Daniel</first_name><last_name>Abraham</last_name></author>
                <author><first_name>Ty</first_name><last_name>Franck</last_name></author>
            </authors>
        </authored_by>
        <title>Caliban's War</title>
        <genre>Science Fiction</genre>
        <type>Novel</type>
        <series_number>2</series_number>
        <description>The second book in the Expanse series</description>
    </book></catalog>"#
        .to_string();

    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    let mut book = Book::default();
    book.update_fields(&doc)?;
    println!("{book:#?}");

    Ok(())
}

Note: in the latter example, book.update_fields(&doc)?; is used in main(){...} instead of manually iterating over the document. This compile-time-generated method is essentially doing that work for you. For more, see the examples and tests in nom-xml.

Currently Supported Field Types:

  • String
  • Vec<String>
  • Vec<Numerical Types>
  • Vec<User Structs>
  • Option //should work with any variation of the above

Note: This is an extremely edge-case-heavy macro. If you're experiencing any errors, please submit an issue.

Dependencies

~245–690KB
~17K SLoC