diff options
Diffstat (limited to 'src/xml')
-rw-r--r-- | src/xml/element.rs | 186 | ||||
-rw-r--r-- | src/xml/mod.rs | 2 | ||||
-rw-r--r-- | src/xml/parser.rs | 221 |
3 files changed, 409 insertions, 0 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs new file mode 100644 index 0000000..647fe90 --- /dev/null +++ b/src/xml/element.rs @@ -0,0 +1,186 @@ +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Elements +{ + elements: Vec<Element>, +} + +impl Elements +{ + pub fn get_first_tagged(&self) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) => Some(tagged_element), + _ => None, + }) + } + + pub fn get_first_tagged_with_name(&self, tag_name: &str) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) if tagged_element.name == tag_name => { + Some(tagged_element) + } + _ => None, + }) + } + + pub fn get_first_tagged_with_name_and_attr( + &self, + tag_name: &str, + attribute: &Attribute, + ) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) + if tagged_element.name == tag_name + && tagged_element + .attributes + .iter() + .any(|attr| attr == attribute) => + { + Some(tagged_element) + } + _ => None, + }) + } + + pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> + { + self.elements + .iter() + .filter_map(|element| match element { + Element::Tagged(tagged_element) if tagged_element.name == tag_name => { + Some(tagged_element) + } + _ => None, + }) + .collect() + } + + pub fn get_first_text_element(&self) -> Option<&String> + { + self.elements.iter().find_map(|element| match element { + Element::Text(text) => Some(text), + _ => None, + }) + } + + pub fn get_all_text_elements(&self) -> Vec<&String> + { + self.elements + .iter() + .filter_map(|element| match element { + Element::Text(text) => Some(text), + _ => None, + }) + .collect() + } + + pub fn has_tagged_element(&self, tag_name: &str) -> bool + { + self.elements.iter().any(|element| { + matches!( + element, + Element::Tagged(tagged_element) if tagged_element.name == tag_name + ) + }) + } +} + +impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements +{ + fn from(into_iter: IntoIter) -> Self + { + Self { + elements: into_iter.into_iter().collect(), + } + } +} + +impl<'elements> IntoIterator for &'elements Elements +{ + type IntoIter = Iter<'elements>; + type Item = &'elements Element; + + fn into_iter(self) -> Self::IntoIter + { + Self::IntoIter { + elements: self.elements.iter(), + } + } +} + +pub struct Iter<'elements> +{ + elements: std::slice::Iter<'elements, Element>, +} + +impl<'elements> Iterator for Iter<'elements> +{ + type Item = &'elements Element; + + fn next(&mut self) -> Option<Self::Item> + { + self.elements.next() + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Element +{ + Tagged(Tagged), + Text(String), + Comment(String), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Tagged +{ + name: String, + child_elements: Elements, + attributes: Vec<Attribute>, +} + +impl Tagged +{ + pub fn new<Name, ChildElements, Attrs>( + name: &Name, + child_elements: ChildElements, + attributes: Attrs, + ) -> Self + where + Name: ToString, + ChildElements: Into<Elements>, + Attrs: IntoIterator<Item = Attribute>, + { + Self { + name: name.to_string(), + child_elements: child_elements.into(), + attributes: attributes.into_iter().collect(), + } + } + + pub fn name(&self) -> &str + { + &self.name + } + + pub fn child_elements(&self) -> &Elements + { + &self.child_elements + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Attribute +{ + pub key: String, + pub value: Vec<u8>, +} + +pub trait FromElements: Sized +{ + type Error; + + fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; +} diff --git a/src/xml/mod.rs b/src/xml/mod.rs new file mode 100644 index 0000000..12368c3 --- /dev/null +++ b/src/xml/mod.rs @@ -0,0 +1,2 @@ +pub mod element; +pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs new file mode 100644 index 0000000..9cdafb1 --- /dev/null +++ b/src/xml/parser.rs @@ -0,0 +1,221 @@ +use std::io::BufRead; + +use quick_xml::events::attributes::AttrError; +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::Reader; + +use crate::xml::element::{Attribute, Element, Elements, Tagged}; + +/// XML parser. +pub struct Parser<Source> +{ + reader: Reader<Source>, +} + +impl<Source: BufRead> Parser<Source> +{ + pub fn new(src: Source) -> Self + { + Self { + reader: Reader::from_reader(src), + } + } + + pub fn parse(&mut self) -> Result<Elements, Error> + { + let mut buf = Vec::new(); + + let mut elements = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + let element = match self.handle_event(event)? { + EventHandlingResult::Element(element) => element, + EventHandlingResult::Event(_) => { + continue; + } + EventHandlingResult::End => { + break; + } + }; + + elements.push(element); + } + + Ok(elements.into()) + } + + fn parse_text(text: &BytesText) -> Result<String, Error> + { + String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) + } + + fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> + { + let mut child_elements = Vec::new(); + + let mut buf = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + match event { + Event::End(end) if end.name() == start.name() => { + break; + } + event => match self.handle_event(event)? { + EventHandlingResult::Element(element) => { + child_elements.push(element); + } + EventHandlingResult::End => { + return Err(Error::UnexpectedEndOfFile); + } + EventHandlingResult::Event(_) => {} + }, + } + } + + let attributes = start + .attributes() + .map(|attr_result| { + let attr = attr_result?; + + Ok(Attribute { + key: String::from_utf8(attr.key.as_ref().to_vec()) + .map_err(|_| Error::TagAttributeKeyNotUTF8)?, + value: attr.value.into_owned(), + }) + }) + .collect::<Result<Vec<_>, Error>>()?; + + Ok(Element::Tagged(Tagged::new( + &String::from_utf8(start.name().as_ref().to_vec()) + .map_err(|_| Error::TagNameNotUTF8)?, + child_elements, + attributes, + ))) + } + + fn handle_event<'a>( + &'a mut self, + event: Event<'a>, + ) -> Result<EventHandlingResult, Error> + { + match event { + Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( + Self::parse_text(&text)?, + ))), + Event::Start(start) => { + Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) + } + Event::End(_) => Err(Error::UnexpectedTagEnd), + Event::Eof => Ok(EventHandlingResult::End), + Event::Comment(comment_text) => Ok(EventHandlingResult::Element( + Element::Comment(Self::parse_text(&comment_text)?), + )), + event => Ok(EventHandlingResult::Event(event)), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error +{ + #[error(transparent)] + QuickXMLFailed(#[from] quick_xml::Error), + + #[error("Text is not UTF-8")] + TextNotUTF8, + + #[error("Tag name is not UTF-8")] + TagNameNotUTF8, + + #[error("Invalid attribute")] + InvalidTagAttr(#[from] AttrError), + + #[error("Tag attribute key is not UTF-8")] + TagAttributeKeyNotUTF8, + + #[error("Unexpectedly found the end of a tag")] + UnexpectedTagEnd, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +enum EventHandlingResult<'event> +{ + Element(Element), + Event(Event<'event>), + End, +} + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn can_parse() + { + let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![Element::Text("Hello there".to_string())], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Tagged(Tagged::new( + &"bar", + Elements::from(vec![Element::Text("123".to_string())]), + Vec::new() + )), + Element::Text(" Hello".to_string()) + ], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(Vec::new()) + ); + + let mut parser = Parser::new( + "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), + ); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Comment("XML is awful".to_string()), + Element::Text("Hello there".to_string()), + Element::Tagged(Tagged::new( + &"bar", + vec![Element::Text("123".to_string())], + Vec::new() + )), + ], + Vec::new() + ))]) + ); + } +} |