use std::io::BufRead; use quick_xml::escape::unescape; use quick_xml::events::attributes::AttrError; use quick_xml::events::{BytesStart, BytesText, Event}; use quick_xml::Reader; use crate::xml::element::{Attribute, Element, Elements, Tagged}; /// XML parser. pub struct Parser { reader: Reader, } impl Parser { pub fn new(src: Source) -> Self { Self { reader: Reader::from_reader(src), } } pub fn parse(&mut self) -> Result { let mut buf = Vec::new(); let mut elements = Vec::new(); loop { let event = self.reader.read_event_into(&mut buf)?; let element = match self.handle_event(event)? { EventHandlingResult::Element(element) => element, EventHandlingResult::Event(_) => { continue; } EventHandlingResult::End => { break; } }; elements.push(element); } Ok(elements.into()) } fn parse_text(text: &BytesText) -> Result { let text_escaped = String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8)?; let text_unescaped = unescape(&text_escaped) .map_err(|err| Error::QuickXMLFailed(quick_xml::Error::EscapeError(err)))?; Ok(text_unescaped.to_string()) } fn parse_tagged(&mut self, start: &BytesStart) -> Result { let mut child_elements = Vec::new(); let mut buf = Vec::new(); loop { let event = self.reader.read_event_into(&mut buf)?; match event { Event::End(end) if end.name() == start.name() => { break; } event => match self.handle_event(event)? { EventHandlingResult::Element(element) => { child_elements.push(element); } EventHandlingResult::End => { return Err(Error::UnexpectedEndOfFile); } EventHandlingResult::Event(_) => {} }, } } let attributes = start .attributes() .map(|attr_result| { let attr = attr_result?; Ok(Attribute { key: String::from_utf8(attr.key.as_ref().to_vec()) .map_err(|_| Error::TagAttributeKeyNotUTF8)?, value: attr.value.into_owned(), }) }) .collect::, Error>>()?; Ok(Element::Tagged(Tagged::new( &String::from_utf8(start.name().as_ref().to_vec()) .map_err(|_| Error::TagNameNotUTF8)?, child_elements, attributes, ))) } fn handle_event<'a>( &'a mut self, event: Event<'a>, ) -> Result { match event { Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( Self::parse_text(&text)?, ))), Event::Start(start) => { Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) } Event::End(_) => Err(Error::UnexpectedTagEnd), Event::Eof => Ok(EventHandlingResult::End), Event::Comment(comment_text) => Ok(EventHandlingResult::Element( Element::Comment(Self::parse_text(&comment_text)?), )), event => Ok(EventHandlingResult::Event(event)), } } } #[derive(Debug, thiserror::Error)] pub enum Error { #[error(transparent)] QuickXMLFailed(#[from] quick_xml::Error), #[error("Text is not UTF-8")] TextNotUTF8, #[error("Tag name is not UTF-8")] TagNameNotUTF8, #[error("Invalid attribute")] InvalidTagAttr(#[from] AttrError), #[error("Tag attribute key is not UTF-8")] TagAttributeKeyNotUTF8, #[error("Unexpectedly found the end of a tag")] UnexpectedTagEnd, #[error("Unexpected end of file")] UnexpectedEndOfFile, } enum EventHandlingResult<'event> { Element(Element), Event(Event<'event>), End, } #[cfg(test)] mod tests { use pretty_assertions::assert_eq; use super::*; #[test] fn can_parse() { let mut parser = Parser::new("Hello there".as_bytes()); assert_eq!( parser.parse().expect("Expected Ok"), Elements::from(vec![Element::Tagged(Tagged::new( &"foo", vec![Element::Text("Hello there".to_string())], Vec::new() ))]) ); let mut parser = Parser::new("123 Hello".as_bytes()); assert_eq!( parser.parse().expect("Expected Ok"), Elements::from(vec![Element::Tagged(Tagged::new( &"foo", vec![ Element::Tagged(Tagged::new( &"bar", Elements::from(vec![Element::Text("123".to_string())]), Vec::new() )), Element::Text(" Hello".to_string()) ], Vec::new() ))]) ); let mut parser = Parser::new("".as_bytes()); assert_eq!( parser.parse().expect("Expected Ok"), Elements::from(Vec::new()) ); let mut parser = Parser::new( "Hello there123".as_bytes(), ); assert_eq!( parser.parse().expect("Expected Ok"), Elements::from(vec![Element::Tagged(Tagged::new( &"foo", vec![ Element::Comment("XML is awful".to_string()), Element::Text("Hello there".to_string()), Element::Tagged(Tagged::new( &"bar", vec![Element::Text("123".to_string())], Vec::new() )), ], Vec::new() ))]) ); } }