From add06dafdf874b1b419e5eef918c6b1131ab09fd Mon Sep 17 00:00:00 2001 From: HampusM Date: Sat, 25 Mar 2023 17:42:28 +0100 Subject: perf: improve XML deserialization speed --- src/xml/parser.rs | 195 ------------------------------------------------------ 1 file changed, 195 deletions(-) delete mode 100644 src/xml/parser.rs (limited to 'src/xml/parser.rs') diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser -{ - reader: Reader, -} - -impl Parser -{ - pub fn new(src: Source) -> Self - { - Self { - reader: Reader::from_reader(src), - } - } - - pub fn parse(&mut self) -> Result - { - let mut buf = Vec::new(); - - let mut elements = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - let element = match self.handle_event(event)? { - EventHandlingResult::Element(element) => element, - EventHandlingResult::Event(_) => { - continue; - } - EventHandlingResult::End => { - break; - } - }; - - elements.push(element); - } - - Ok(elements.into()) - } - - fn parse_text(text: &BytesText) -> Result - { - String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) - } - - fn parse_tagged(&mut self, start: &BytesStart) -> Result - { - let mut child_elements = Vec::new(); - - let mut buf = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - match event { - Event::End(end) if end.name() == start.name() => { - break; - } - event => match self.handle_event(event)? { - EventHandlingResult::Element(element) => { - child_elements.push(element); - } - EventHandlingResult::End => { - return Err(Error::UnexpectedEndOfFile); - } - EventHandlingResult::Event(_) => {} - }, - } - } - - Ok(Element::Tagged(Tagged::new( - &String::from_utf8(start.name().as_ref().to_vec()) - .map_err(|_| Error::TagNameNotUTF8)?, - child_elements, - ))) - } - - fn handle_event<'a>( - &'a mut self, - event: Event<'a>, - ) -> Result - { - match event { - Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( - Self::parse_text(&text)?, - ))), - Event::Start(start) => { - Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) - } - Event::End(_) => Err(Error::UnexpectedTagEnd), - Event::Eof => Ok(EventHandlingResult::End), - Event::Comment(comment_text) => Ok(EventHandlingResult::Element( - Element::Comment(Self::parse_text(&comment_text)?), - )), - event => Ok(EventHandlingResult::Event(event)), - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ - #[error(transparent)] - QuickXMLFailed(#[from] quick_xml::Error), - - #[error("Text is not UTF-8")] - TextNotUTF8, - - #[error("Tag name is not UTF-8")] - TagNameNotUTF8, - - #[error("Unexpectedly found the end of a tag")] - UnexpectedTagEnd, - - #[error("Unexpected end of file")] - UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ - Element(Element), - Event(Event<'event>), - End, -} - -#[cfg(test)] -mod tests -{ - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn can_parse() - { - let mut parser = Parser::new("Hello there".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![Element::Text("Hello there".to_string())] - ))]) - ); - - let mut parser = Parser::new("123 Hello".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Tagged(Tagged::new( - &"bar", - Elements::from(vec![Element::Text("123".to_string())]) - )), - Element::Text(" Hello".to_string()) - ] - ))]) - ); - - let mut parser = Parser::new("".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(Vec::new()) - ); - - let mut parser = Parser::new( - "Hello there123".as_bytes(), - ); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Comment("XML is awful".to_string()), - Element::Text("Hello there".to_string()), - Element::Tagged(Tagged::new( - &"bar", - vec![Element::Text("123".to_string())] - )), - ] - ))]) - ); - } -} -- cgit v1.2.3-18-g5258