aboutsummaryrefslogtreecommitdiff
path: root/src/xml
diff options
context:
space:
mode:
authorHampusM <hampus@hampusmat.com>2023-03-25 17:42:28 +0100
committerHampusM <hampus@hampusmat.com>2023-03-25 17:42:28 +0100
commitadd06dafdf874b1b419e5eef918c6b1131ab09fd (patch)
treec1d52d3ece248d96562a3d77beb44973e7720847 /src/xml
parentf49d77c2961be28c3cc500af185813dd5e83a367 (diff)
perf: improve XML deserialization speed
Diffstat (limited to 'src/xml')
-rw-r--r--src/xml/element.rs144
-rw-r--r--src/xml/mod.rs2
-rw-r--r--src/xml/parser.rs195
3 files changed, 0 insertions, 341 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs
deleted file mode 100644
index f469480..0000000
--- a/src/xml/element.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-#[derive(Debug, PartialEq, Eq)]
-pub struct Elements
-{
- elements: Vec<Element>,
-}
-
-impl Elements
-{
- pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged>
- {
- self.elements.iter().find_map(|element| match element {
- Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
- Some(tagged_element)
- }
- _ => None,
- })
- }
-
- pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged>
- {
- self.elements
- .iter()
- .filter_map(|element| match element {
- Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
- Some(tagged_element)
- }
- _ => None,
- })
- .collect()
- }
-
- pub fn get_first_text_element(&self) -> Option<&String>
- {
- self.elements.iter().find_map(|element| match element {
- Element::Text(text) => Some(text),
- _ => None,
- })
- }
-
- pub fn get_all_text_elements(&self) -> Vec<&String>
- {
- self.elements
- .iter()
- .filter_map(|element| match element {
- Element::Text(text) => Some(text),
- _ => None,
- })
- .collect()
- }
-
- pub fn has_tagged_element(&self, tag_name: &str) -> bool
- {
- self.elements.iter().any(|element| {
- matches!(
- element,
- Element::Tagged(tagged_element) if tagged_element.name == tag_name
- )
- })
- }
-}
-
-impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements
-{
- fn from(into_iter: IntoIter) -> Self
- {
- Self {
- elements: into_iter.into_iter().collect(),
- }
- }
-}
-
-impl<'elements> IntoIterator for &'elements Elements
-{
- type IntoIter = Iter<'elements>;
- type Item = &'elements Element;
-
- fn into_iter(self) -> Self::IntoIter
- {
- Self::IntoIter {
- elements: self.elements.iter(),
- }
- }
-}
-
-pub struct Iter<'elements>
-{
- elements: std::slice::Iter<'elements, Element>,
-}
-
-impl<'elements> Iterator for Iter<'elements>
-{
- type Item = &'elements Element;
-
- fn next(&mut self) -> Option<Self::Item>
- {
- self.elements.next()
- }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub enum Element
-{
- Tagged(Tagged),
- Text(String),
- Comment(String),
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct Tagged
-{
- name: String,
- child_elements: Elements,
-}
-
-impl Tagged
-{
- pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self
- where
- Name: ToString,
- ChildElements: Into<Elements>,
- {
- Self {
- name: name.to_string(),
- child_elements: child_elements.into(),
- }
- }
-
- pub fn name(&self) -> &str
- {
- &self.name
- }
-
- pub fn child_elements(&self) -> &Elements
- {
- &self.child_elements
- }
-}
-
-pub trait FromElements: Sized
-{
- type Error;
-
- fn from_elements(elements: &Elements) -> Result<Self, Self::Error>;
-}
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
deleted file mode 100644
index 12368c3..0000000
--- a/src/xml/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-pub mod element;
-pub mod parser;
diff --git a/src/xml/parser.rs b/src/xml/parser.rs
deleted file mode 100644
index d152a6e..0000000
--- a/src/xml/parser.rs
+++ /dev/null
@@ -1,195 +0,0 @@
-use std::io::BufRead;
-
-use quick_xml::events::{BytesStart, BytesText, Event};
-use quick_xml::Reader;
-
-use crate::xml::element::{Element, Elements, Tagged};
-
-/// XML parser.
-pub struct Parser<Source>
-{
- reader: Reader<Source>,
-}
-
-impl<Source: BufRead> Parser<Source>
-{
- pub fn new(src: Source) -> Self
- {
- Self {
- reader: Reader::from_reader(src),
- }
- }
-
- pub fn parse(&mut self) -> Result<Elements, Error>
- {
- let mut buf = Vec::new();
-
- let mut elements = Vec::new();
-
- loop {
- let event = self.reader.read_event_into(&mut buf)?;
-
- let element = match self.handle_event(event)? {
- EventHandlingResult::Element(element) => element,
- EventHandlingResult::Event(_) => {
- continue;
- }
- EventHandlingResult::End => {
- break;
- }
- };
-
- elements.push(element);
- }
-
- Ok(elements.into())
- }
-
- fn parse_text(text: &BytesText) -> Result<String, Error>
- {
- String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8)
- }
-
- fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error>
- {
- let mut child_elements = Vec::new();
-
- let mut buf = Vec::new();
-
- loop {
- let event = self.reader.read_event_into(&mut buf)?;
-
- match event {
- Event::End(end) if end.name() == start.name() => {
- break;
- }
- event => match self.handle_event(event)? {
- EventHandlingResult::Element(element) => {
- child_elements.push(element);
- }
- EventHandlingResult::End => {
- return Err(Error::UnexpectedEndOfFile);
- }
- EventHandlingResult::Event(_) => {}
- },
- }
- }
-
- Ok(Element::Tagged(Tagged::new(
- &String::from_utf8(start.name().as_ref().to_vec())
- .map_err(|_| Error::TagNameNotUTF8)?,
- child_elements,
- )))
- }
-
- fn handle_event<'a>(
- &'a mut self,
- event: Event<'a>,
- ) -> Result<EventHandlingResult, Error>
- {
- match event {
- Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text(
- Self::parse_text(&text)?,
- ))),
- Event::Start(start) => {
- Ok(EventHandlingResult::Element(self.parse_tagged(&start)?))
- }
- Event::End(_) => Err(Error::UnexpectedTagEnd),
- Event::Eof => Ok(EventHandlingResult::End),
- Event::Comment(comment_text) => Ok(EventHandlingResult::Element(
- Element::Comment(Self::parse_text(&comment_text)?),
- )),
- event => Ok(EventHandlingResult::Event(event)),
- }
- }
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum Error
-{
- #[error(transparent)]
- QuickXMLFailed(#[from] quick_xml::Error),
-
- #[error("Text is not UTF-8")]
- TextNotUTF8,
-
- #[error("Tag name is not UTF-8")]
- TagNameNotUTF8,
-
- #[error("Unexpectedly found the end of a tag")]
- UnexpectedTagEnd,
-
- #[error("Unexpected end of file")]
- UnexpectedEndOfFile,
-}
-
-enum EventHandlingResult<'event>
-{
- Element(Element),
- Event(Event<'event>),
- End,
-}
-
-#[cfg(test)]
-mod tests
-{
- use pretty_assertions::assert_eq;
-
- use super::*;
-
- #[test]
- fn can_parse()
- {
- let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes());
-
- assert_eq!(
- parser.parse().expect("Expected Ok"),
- Elements::from(vec![Element::Tagged(Tagged::new(
- &"foo",
- vec![Element::Text("Hello there".to_string())]
- ))])
- );
-
- let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes());
-
- assert_eq!(
- parser.parse().expect("Expected Ok"),
- Elements::from(vec![Element::Tagged(Tagged::new(
- &"foo",
- vec![
- Element::Tagged(Tagged::new(
- &"bar",
- Elements::from(vec![Element::Text("123".to_string())])
- )),
- Element::Text(" Hello".to_string())
- ]
- ))])
- );
-
- let mut parser = Parser::new("".as_bytes());
-
- assert_eq!(
- parser.parse().expect("Expected Ok"),
- Elements::from(Vec::new())
- );
-
- let mut parser = Parser::new(
- "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(),
- );
-
- assert_eq!(
- parser.parse().expect("Expected Ok"),
- Elements::from(vec![Element::Tagged(Tagged::new(
- &"foo",
- vec![
- Element::Comment("XML is awful".to_string()),
- Element::Text("Hello there".to_string()),
- Element::Tagged(Tagged::new(
- &"bar",
- vec![Element::Text("123".to_string())]
- )),
- ]
- ))])
- );
- }
-}