From 11c39d50411a747eedac4c6a16fedf598ae798f5 Mon Sep 17 00:00:00 2001 From: HampusM Date: Thu, 23 Feb 2023 22:35:06 +0100 Subject: feat: add project & getting function entries --- src/xml/parser.rs | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 src/xml/parser.rs (limited to 'src/xml/parser.rs') diff --git a/src/xml/parser.rs b/src/xml/parser.rs new file mode 100644 index 0000000..9cdafb1 --- /dev/null +++ b/src/xml/parser.rs @@ -0,0 +1,221 @@ +use std::io::BufRead; + +use quick_xml::events::attributes::AttrError; +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::Reader; + +use crate::xml::element::{Attribute, Element, Elements, Tagged}; + +/// XML parser. +pub struct Parser +{ + reader: Reader, +} + +impl Parser +{ + pub fn new(src: Source) -> Self + { + Self { + reader: Reader::from_reader(src), + } + } + + pub fn parse(&mut self) -> Result + { + let mut buf = Vec::new(); + + let mut elements = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + let element = match self.handle_event(event)? { + EventHandlingResult::Element(element) => element, + EventHandlingResult::Event(_) => { + continue; + } + EventHandlingResult::End => { + break; + } + }; + + elements.push(element); + } + + Ok(elements.into()) + } + + fn parse_text(text: &BytesText) -> Result + { + String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) + } + + fn parse_tagged(&mut self, start: &BytesStart) -> Result + { + let mut child_elements = Vec::new(); + + let mut buf = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + match event { + Event::End(end) if end.name() == start.name() => { + break; + } + event => match self.handle_event(event)? { + EventHandlingResult::Element(element) => { + child_elements.push(element); + } + EventHandlingResult::End => { + return Err(Error::UnexpectedEndOfFile); + } + EventHandlingResult::Event(_) => {} + }, + } + } + + let attributes = start + .attributes() + .map(|attr_result| { + let attr = attr_result?; + + Ok(Attribute { + key: String::from_utf8(attr.key.as_ref().to_vec()) + .map_err(|_| Error::TagAttributeKeyNotUTF8)?, + value: attr.value.into_owned(), + }) + }) + .collect::, Error>>()?; + + Ok(Element::Tagged(Tagged::new( + &String::from_utf8(start.name().as_ref().to_vec()) + .map_err(|_| Error::TagNameNotUTF8)?, + child_elements, + attributes, + ))) + } + + fn handle_event<'a>( + &'a mut self, + event: Event<'a>, + ) -> Result + { + match event { + Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( + Self::parse_text(&text)?, + ))), + Event::Start(start) => { + Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) + } + Event::End(_) => Err(Error::UnexpectedTagEnd), + Event::Eof => Ok(EventHandlingResult::End), + Event::Comment(comment_text) => Ok(EventHandlingResult::Element( + Element::Comment(Self::parse_text(&comment_text)?), + )), + event => Ok(EventHandlingResult::Event(event)), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error +{ + #[error(transparent)] + QuickXMLFailed(#[from] quick_xml::Error), + + #[error("Text is not UTF-8")] + TextNotUTF8, + + #[error("Tag name is not UTF-8")] + TagNameNotUTF8, + + #[error("Invalid attribute")] + InvalidTagAttr(#[from] AttrError), + + #[error("Tag attribute key is not UTF-8")] + TagAttributeKeyNotUTF8, + + #[error("Unexpectedly found the end of a tag")] + UnexpectedTagEnd, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +enum EventHandlingResult<'event> +{ + Element(Element), + Event(Event<'event>), + End, +} + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn can_parse() + { + let mut parser = Parser::new("Hello there".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![Element::Text("Hello there".to_string())], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("123 Hello".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Tagged(Tagged::new( + &"bar", + Elements::from(vec![Element::Text("123".to_string())]), + Vec::new() + )), + Element::Text(" Hello".to_string()) + ], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(Vec::new()) + ); + + let mut parser = Parser::new( + "Hello there123".as_bytes(), + ); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Comment("XML is awful".to_string()), + Element::Text("Hello there".to_string()), + Element::Tagged(Tagged::new( + &"bar", + vec![Element::Text("123".to_string())], + Vec::new() + )), + ], + Vec::new() + ))]) + ); + } +} -- cgit v1.2.3-18-g5258