diff options
author | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
---|---|---|
committer | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
commit | add06dafdf874b1b419e5eef918c6b1131ab09fd (patch) | |
tree | c1d52d3ece248d96562a3d77beb44973e7720847 /src/xml | |
parent | f49d77c2961be28c3cc500af185813dd5e83a367 (diff) |
perf: improve XML deserialization speed
Diffstat (limited to 'src/xml')
-rw-r--r-- | src/xml/element.rs | 144 | ||||
-rw-r--r-- | src/xml/mod.rs | 2 | ||||
-rw-r--r-- | src/xml/parser.rs | 195 |
3 files changed, 0 insertions, 341 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs deleted file mode 100644 index f469480..0000000 --- a/src/xml/element.rs +++ /dev/null @@ -1,144 +0,0 @@ -#[derive(Debug, PartialEq, Eq)] -pub struct Elements -{ - elements: Vec<Element>, -} - -impl Elements -{ - pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> - { - self.elements.iter().find_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - } - - pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - .collect() - } - - pub fn get_first_text_element(&self) -> Option<&String> - { - self.elements.iter().find_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - } - - pub fn get_all_text_elements(&self) -> Vec<&String> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - .collect() - } - - pub fn has_tagged_element(&self, tag_name: &str) -> bool - { - self.elements.iter().any(|element| { - matches!( - element, - Element::Tagged(tagged_element) if tagged_element.name == tag_name - ) - }) - } -} - -impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements -{ - fn from(into_iter: IntoIter) -> Self - { - Self { - elements: into_iter.into_iter().collect(), - } - } -} - -impl<'elements> IntoIterator for &'elements Elements -{ - type IntoIter = Iter<'elements>; - type Item = &'elements Element; - - fn into_iter(self) -> Self::IntoIter - { - Self::IntoIter { - elements: self.elements.iter(), - } - } -} - -pub struct Iter<'elements> -{ - elements: std::slice::Iter<'elements, Element>, -} - -impl<'elements> Iterator for Iter<'elements> -{ - type Item = &'elements Element; - - fn next(&mut self) -> Option<Self::Item> - { - self.elements.next() - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Element -{ - Tagged(Tagged), - Text(String), - Comment(String), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Tagged -{ - name: String, - child_elements: Elements, -} - -impl Tagged -{ - pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self - where - Name: ToString, - ChildElements: Into<Elements>, - { - Self { - name: name.to_string(), - child_elements: child_elements.into(), - } - } - - pub fn name(&self) -> &str - { - &self.name - } - - pub fn child_elements(&self) -> &Elements - { - &self.child_elements - } -} - -pub trait FromElements: Sized -{ - type Error; - - fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; -} diff --git a/src/xml/mod.rs b/src/xml/mod.rs deleted file mode 100644 index 12368c3..0000000 --- a/src/xml/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod element; -pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser<Source> -{ - reader: Reader<Source>, -} - -impl<Source: BufRead> Parser<Source> -{ - pub fn new(src: Source) -> Self - { - Self { - reader: Reader::from_reader(src), - } - } - - pub fn parse(&mut self) -> Result<Elements, Error> - { - let mut buf = Vec::new(); - - let mut elements = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - let element = match self.handle_event(event)? { - EventHandlingResult::Element(element) => element, - EventHandlingResult::Event(_) => { - continue; - } - EventHandlingResult::End => { - break; - } - }; - - elements.push(element); - } - - Ok(elements.into()) - } - - fn parse_text(text: &BytesText) -> Result<String, Error> - { - String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) - } - - fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> - { - let mut child_elements = Vec::new(); - - let mut buf = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - match event { - Event::End(end) if end.name() == start.name() => { - break; - } - event => match self.handle_event(event)? { - EventHandlingResult::Element(element) => { - child_elements.push(element); - } - EventHandlingResult::End => { - return Err(Error::UnexpectedEndOfFile); - } - EventHandlingResult::Event(_) => {} - }, - } - } - - Ok(Element::Tagged(Tagged::new( - &String::from_utf8(start.name().as_ref().to_vec()) - .map_err(|_| Error::TagNameNotUTF8)?, - child_elements, - ))) - } - - fn handle_event<'a>( - &'a mut self, - event: Event<'a>, - ) -> Result<EventHandlingResult, Error> - { - match event { - Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( - Self::parse_text(&text)?, - ))), - Event::Start(start) => { - Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) - } - Event::End(_) => Err(Error::UnexpectedTagEnd), - Event::Eof => Ok(EventHandlingResult::End), - Event::Comment(comment_text) => Ok(EventHandlingResult::Element( - Element::Comment(Self::parse_text(&comment_text)?), - )), - event => Ok(EventHandlingResult::Event(event)), - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ - #[error(transparent)] - QuickXMLFailed(#[from] quick_xml::Error), - - #[error("Text is not UTF-8")] - TextNotUTF8, - - #[error("Tag name is not UTF-8")] - TagNameNotUTF8, - - #[error("Unexpectedly found the end of a tag")] - UnexpectedTagEnd, - - #[error("Unexpected end of file")] - UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ - Element(Element), - Event(Event<'event>), - End, -} - -#[cfg(test)] -mod tests -{ - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn can_parse() - { - let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![Element::Text("Hello there".to_string())] - ))]) - ); - - let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Tagged(Tagged::new( - &"bar", - Elements::from(vec![Element::Text("123".to_string())]) - )), - Element::Text(" Hello".to_string()) - ] - ))]) - ); - - let mut parser = Parser::new("".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(Vec::new()) - ); - - let mut parser = Parser::new( - "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), - ); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Comment("XML is awful".to_string()), - Element::Text("Hello there".to_string()), - Element::Tagged(Tagged::new( - &"bar", - vec![Element::Text("123".to_string())] - )), - ] - ))]) - ); - } -} |