diff options
Diffstat (limited to 'src/xml/parser.rs')
-rw-r--r-- | src/xml/parser.rs | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/src/xml/parser.rs b/src/xml/parser.rs new file mode 100644 index 0000000..d152a6e --- /dev/null +++ b/src/xml/parser.rs @@ -0,0 +1,195 @@ +use std::io::BufRead; + +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::Reader; + +use crate::xml::element::{Element, Elements, Tagged}; + +/// XML parser. +pub struct Parser<Source> +{ + reader: Reader<Source>, +} + +impl<Source: BufRead> Parser<Source> +{ + pub fn new(src: Source) -> Self + { + Self { + reader: Reader::from_reader(src), + } + } + + pub fn parse(&mut self) -> Result<Elements, Error> + { + let mut buf = Vec::new(); + + let mut elements = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + let element = match self.handle_event(event)? { + EventHandlingResult::Element(element) => element, + EventHandlingResult::Event(_) => { + continue; + } + EventHandlingResult::End => { + break; + } + }; + + elements.push(element); + } + + Ok(elements.into()) + } + + fn parse_text(text: &BytesText) -> Result<String, Error> + { + String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) + } + + fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> + { + let mut child_elements = Vec::new(); + + let mut buf = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + match event { + Event::End(end) if end.name() == start.name() => { + break; + } + event => match self.handle_event(event)? { + EventHandlingResult::Element(element) => { + child_elements.push(element); + } + EventHandlingResult::End => { + return Err(Error::UnexpectedEndOfFile); + } + EventHandlingResult::Event(_) => {} + }, + } + } + + Ok(Element::Tagged(Tagged::new( + &String::from_utf8(start.name().as_ref().to_vec()) + .map_err(|_| Error::TagNameNotUTF8)?, + child_elements, + ))) + } + + fn handle_event<'a>( + &'a mut self, + event: Event<'a>, + ) -> Result<EventHandlingResult, Error> + { + match event { + Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( + Self::parse_text(&text)?, + ))), + Event::Start(start) => { + Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) + } + Event::End(_) => Err(Error::UnexpectedTagEnd), + Event::Eof => Ok(EventHandlingResult::End), + Event::Comment(comment_text) => Ok(EventHandlingResult::Element( + Element::Comment(Self::parse_text(&comment_text)?), + )), + event => Ok(EventHandlingResult::Event(event)), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error +{ + #[error(transparent)] + QuickXMLFailed(#[from] quick_xml::Error), + + #[error("Text is not UTF-8")] + TextNotUTF8, + + #[error("Tag name is not UTF-8")] + TagNameNotUTF8, + + #[error("Unexpectedly found the end of a tag")] + UnexpectedTagEnd, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +enum EventHandlingResult<'event> +{ + Element(Element), + Event(Event<'event>), + End, +} + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn can_parse() + { + let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![Element::Text("Hello there".to_string())] + ))]) + ); + + let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Tagged(Tagged::new( + &"bar", + Elements::from(vec![Element::Text("123".to_string())]) + )), + Element::Text(" Hello".to_string()) + ] + ))]) + ); + + let mut parser = Parser::new("".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(Vec::new()) + ); + + let mut parser = Parser::new( + "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), + ); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Comment("XML is awful".to_string()), + Element::Text("Hello there".to_string()), + Element::Tagged(Tagged::new( + &"bar", + vec![Element::Text("123".to_string())] + )), + ] + ))]) + ); + } +} |