diff options
author | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
---|---|---|
committer | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
commit | add06dafdf874b1b419e5eef918c6b1131ab09fd (patch) | |
tree | c1d52d3ece248d96562a3d77beb44973e7720847 /src/xml/parser.rs | |
parent | f49d77c2961be28c3cc500af185813dd5e83a367 (diff) |
perf: improve XML deserialization speed
Diffstat (limited to 'src/xml/parser.rs')
-rw-r--r-- | src/xml/parser.rs | 195 |
1 files changed, 0 insertions, 195 deletions
diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser<Source> -{ - reader: Reader<Source>, -} - -impl<Source: BufRead> Parser<Source> -{ - pub fn new(src: Source) -> Self - { - Self { - reader: Reader::from_reader(src), - } - } - - pub fn parse(&mut self) -> Result<Elements, Error> - { - let mut buf = Vec::new(); - - let mut elements = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - let element = match self.handle_event(event)? { - EventHandlingResult::Element(element) => element, - EventHandlingResult::Event(_) => { - continue; - } - EventHandlingResult::End => { - break; - } - }; - - elements.push(element); - } - - Ok(elements.into()) - } - - fn parse_text(text: &BytesText) -> Result<String, Error> - { - String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) - } - - fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> - { - let mut child_elements = Vec::new(); - - let mut buf = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - match event { - Event::End(end) if end.name() == start.name() => { - break; - } - event => match self.handle_event(event)? { - EventHandlingResult::Element(element) => { - child_elements.push(element); - } - EventHandlingResult::End => { - return Err(Error::UnexpectedEndOfFile); - } - EventHandlingResult::Event(_) => {} - }, - } - } - - Ok(Element::Tagged(Tagged::new( - &String::from_utf8(start.name().as_ref().to_vec()) - .map_err(|_| Error::TagNameNotUTF8)?, - child_elements, - ))) - } - - fn handle_event<'a>( - &'a mut self, - event: Event<'a>, - ) -> Result<EventHandlingResult, Error> - { - match event { - Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( - Self::parse_text(&text)?, - ))), - Event::Start(start) => { - Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) - } - Event::End(_) => Err(Error::UnexpectedTagEnd), - Event::Eof => Ok(EventHandlingResult::End), - Event::Comment(comment_text) => Ok(EventHandlingResult::Element( - Element::Comment(Self::parse_text(&comment_text)?), - )), - event => Ok(EventHandlingResult::Event(event)), - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ - #[error(transparent)] - QuickXMLFailed(#[from] quick_xml::Error), - - #[error("Text is not UTF-8")] - TextNotUTF8, - - #[error("Tag name is not UTF-8")] - TagNameNotUTF8, - - #[error("Unexpectedly found the end of a tag")] - UnexpectedTagEnd, - - #[error("Unexpected end of file")] - UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ - Element(Element), - Event(Event<'event>), - End, -} - -#[cfg(test)] -mod tests -{ - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn can_parse() - { - let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![Element::Text("Hello there".to_string())] - ))]) - ); - - let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Tagged(Tagged::new( - &"bar", - Elements::from(vec![Element::Text("123".to_string())]) - )), - Element::Text(" Hello".to_string()) - ] - ))]) - ); - - let mut parser = Parser::new("".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(Vec::new()) - ); - - let mut parser = Parser::new( - "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), - ); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Comment("XML is awful".to_string()), - Element::Text("Hello there".to_string()), - Element::Tagged(Tagged::new( - &"bar", - vec![Element::Text("123".to_string())] - )), - ] - ))]) - ); - } -} |