diff options
| author | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 | 
|---|---|---|
| committer | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 | 
| commit | add06dafdf874b1b419e5eef918c6b1131ab09fd (patch) | |
| tree | c1d52d3ece248d96562a3d77beb44973e7720847 /src/xml | |
| parent | f49d77c2961be28c3cc500af185813dd5e83a367 (diff) | |
perf: improve XML deserialization speed
Diffstat (limited to 'src/xml')
| -rw-r--r-- | src/xml/element.rs | 144 | ||||
| -rw-r--r-- | src/xml/mod.rs | 2 | ||||
| -rw-r--r-- | src/xml/parser.rs | 195 | 
3 files changed, 0 insertions, 341 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs deleted file mode 100644 index f469480..0000000 --- a/src/xml/element.rs +++ /dev/null @@ -1,144 +0,0 @@ -#[derive(Debug, PartialEq, Eq)] -pub struct Elements -{ -    elements: Vec<Element>, -} - -impl Elements -{ -    pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> -    { -        self.elements.iter().find_map(|element| match element { -            Element::Tagged(tagged_element) if tagged_element.name == tag_name => { -                Some(tagged_element) -            } -            _ => None, -        }) -    } - -    pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> -    { -        self.elements -            .iter() -            .filter_map(|element| match element { -                Element::Tagged(tagged_element) if tagged_element.name == tag_name => { -                    Some(tagged_element) -                } -                _ => None, -            }) -            .collect() -    } - -    pub fn get_first_text_element(&self) -> Option<&String> -    { -        self.elements.iter().find_map(|element| match element { -            Element::Text(text) => Some(text), -            _ => None, -        }) -    } - -    pub fn get_all_text_elements(&self) -> Vec<&String> -    { -        self.elements -            .iter() -            .filter_map(|element| match element { -                Element::Text(text) => Some(text), -                _ => None, -            }) -            .collect() -    } - -    pub fn has_tagged_element(&self, tag_name: &str) -> bool -    { -        self.elements.iter().any(|element| { -            matches!( -                element, -                Element::Tagged(tagged_element) if tagged_element.name == tag_name -            ) -        }) -    } -} - -impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements -{ -    fn from(into_iter: IntoIter) -> Self -    { -        Self { -            elements: into_iter.into_iter().collect(), -        } -    } -} - -impl<'elements> IntoIterator for &'elements Elements -{ -    type IntoIter = Iter<'elements>; -    type Item = &'elements Element; - -    fn into_iter(self) -> Self::IntoIter -    { -        Self::IntoIter { -            elements: self.elements.iter(), -        } -    } -} - -pub struct Iter<'elements> -{ -    elements: std::slice::Iter<'elements, Element>, -} - -impl<'elements> Iterator for Iter<'elements> -{ -    type Item = &'elements Element; - -    fn next(&mut self) -> Option<Self::Item> -    { -        self.elements.next() -    } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Element -{ -    Tagged(Tagged), -    Text(String), -    Comment(String), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Tagged -{ -    name: String, -    child_elements: Elements, -} - -impl Tagged -{ -    pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self -    where -        Name: ToString, -        ChildElements: Into<Elements>, -    { -        Self { -            name: name.to_string(), -            child_elements: child_elements.into(), -        } -    } - -    pub fn name(&self) -> &str -    { -        &self.name -    } - -    pub fn child_elements(&self) -> &Elements -    { -        &self.child_elements -    } -} - -pub trait FromElements: Sized -{ -    type Error; - -    fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; -} diff --git a/src/xml/mod.rs b/src/xml/mod.rs deleted file mode 100644 index 12368c3..0000000 --- a/src/xml/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod element; -pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser<Source> -{ -    reader: Reader<Source>, -} - -impl<Source: BufRead> Parser<Source> -{ -    pub fn new(src: Source) -> Self -    { -        Self { -            reader: Reader::from_reader(src), -        } -    } - -    pub fn parse(&mut self) -> Result<Elements, Error> -    { -        let mut buf = Vec::new(); - -        let mut elements = Vec::new(); - -        loop { -            let event = self.reader.read_event_into(&mut buf)?; - -            let element = match self.handle_event(event)? { -                EventHandlingResult::Element(element) => element, -                EventHandlingResult::Event(_) => { -                    continue; -                } -                EventHandlingResult::End => { -                    break; -                } -            }; - -            elements.push(element); -        } - -        Ok(elements.into()) -    } - -    fn parse_text(text: &BytesText) -> Result<String, Error> -    { -        String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) -    } - -    fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> -    { -        let mut child_elements = Vec::new(); - -        let mut buf = Vec::new(); - -        loop { -            let event = self.reader.read_event_into(&mut buf)?; - -            match event { -                Event::End(end) if end.name() == start.name() => { -                    break; -                } -                event => match self.handle_event(event)? { -                    EventHandlingResult::Element(element) => { -                        child_elements.push(element); -                    } -                    EventHandlingResult::End => { -                        return Err(Error::UnexpectedEndOfFile); -                    } -                    EventHandlingResult::Event(_) => {} -                }, -            } -        } - -        Ok(Element::Tagged(Tagged::new( -            &String::from_utf8(start.name().as_ref().to_vec()) -                .map_err(|_| Error::TagNameNotUTF8)?, -            child_elements, -        ))) -    } - -    fn handle_event<'a>( -        &'a mut self, -        event: Event<'a>, -    ) -> Result<EventHandlingResult, Error> -    { -        match event { -            Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( -                Self::parse_text(&text)?, -            ))), -            Event::Start(start) => { -                Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) -            } -            Event::End(_) => Err(Error::UnexpectedTagEnd), -            Event::Eof => Ok(EventHandlingResult::End), -            Event::Comment(comment_text) => Ok(EventHandlingResult::Element( -                Element::Comment(Self::parse_text(&comment_text)?), -            )), -            event => Ok(EventHandlingResult::Event(event)), -        } -    } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ -    #[error(transparent)] -    QuickXMLFailed(#[from] quick_xml::Error), - -    #[error("Text is not UTF-8")] -    TextNotUTF8, - -    #[error("Tag name is not UTF-8")] -    TagNameNotUTF8, - -    #[error("Unexpectedly found the end of a tag")] -    UnexpectedTagEnd, - -    #[error("Unexpected end of file")] -    UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ -    Element(Element), -    Event(Event<'event>), -    End, -} - -#[cfg(test)] -mod tests -{ -    use pretty_assertions::assert_eq; - -    use super::*; - -    #[test] -    fn can_parse() -    { -        let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![Element::Text("Hello there".to_string())] -            ))]) -        ); - -        let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![ -                    Element::Tagged(Tagged::new( -                        &"bar", -                        Elements::from(vec![Element::Text("123".to_string())]) -                    )), -                    Element::Text(" Hello".to_string()) -                ] -            ))]) -        ); - -        let mut parser = Parser::new("".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(Vec::new()) -        ); - -        let mut parser = Parser::new( -            "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), -        ); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![ -                    Element::Comment("XML is awful".to_string()), -                    Element::Text("Hello there".to_string()), -                    Element::Tagged(Tagged::new( -                        &"bar", -                        vec![Element::Text("123".to_string())] -                    )), -                ] -            ))]) -        ); -    } -}  | 
