summaryrefslogtreecommitdiff
path: root/src/xml/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml/parser.rs')
-rw-r--r--src/xml/parser.rs221
1 files changed, 221 insertions, 0 deletions
diff --git a/src/xml/parser.rs b/src/xml/parser.rs
new file mode 100644
index 0000000..9cdafb1
--- /dev/null
+++ b/src/xml/parser.rs
@@ -0,0 +1,221 @@
+use std::io::BufRead;
+
+use quick_xml::events::attributes::AttrError;
+use quick_xml::events::{BytesStart, BytesText, Event};
+use quick_xml::Reader;
+
+use crate::xml::element::{Attribute, Element, Elements, Tagged};
+
+/// XML parser.
+pub struct Parser<Source>
+{
+ reader: Reader<Source>,
+}
+
+impl<Source: BufRead> Parser<Source>
+{
+ pub fn new(src: Source) -> Self
+ {
+ Self {
+ reader: Reader::from_reader(src),
+ }
+ }
+
+ pub fn parse(&mut self) -> Result<Elements, Error>
+ {
+ let mut buf = Vec::new();
+
+ let mut elements = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ let element = match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => element,
+ EventHandlingResult::Event(_) => {
+ continue;
+ }
+ EventHandlingResult::End => {
+ break;
+ }
+ };
+
+ elements.push(element);
+ }
+
+ Ok(elements.into())
+ }
+
+ fn parse_text(text: &BytesText) -> Result<String, Error>
+ {
+ String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8)
+ }
+
+ fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error>
+ {
+ let mut child_elements = Vec::new();
+
+ let mut buf = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ match event {
+ Event::End(end) if end.name() == start.name() => {
+ break;
+ }
+ event => match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => {
+ child_elements.push(element);
+ }
+ EventHandlingResult::End => {
+ return Err(Error::UnexpectedEndOfFile);
+ }
+ EventHandlingResult::Event(_) => {}
+ },
+ }
+ }
+
+ let attributes = start
+ .attributes()
+ .map(|attr_result| {
+ let attr = attr_result?;
+
+ Ok(Attribute {
+ key: String::from_utf8(attr.key.as_ref().to_vec())
+ .map_err(|_| Error::TagAttributeKeyNotUTF8)?,
+ value: attr.value.into_owned(),
+ })
+ })
+ .collect::<Result<Vec<_>, Error>>()?;
+
+ Ok(Element::Tagged(Tagged::new(
+ &String::from_utf8(start.name().as_ref().to_vec())
+ .map_err(|_| Error::TagNameNotUTF8)?,
+ child_elements,
+ attributes,
+ )))
+ }
+
+ fn handle_event<'a>(
+ &'a mut self,
+ event: Event<'a>,
+ ) -> Result<EventHandlingResult, Error>
+ {
+ match event {
+ Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text(
+ Self::parse_text(&text)?,
+ ))),
+ Event::Start(start) => {
+ Ok(EventHandlingResult::Element(self.parse_tagged(&start)?))
+ }
+ Event::End(_) => Err(Error::UnexpectedTagEnd),
+ Event::Eof => Ok(EventHandlingResult::End),
+ Event::Comment(comment_text) => Ok(EventHandlingResult::Element(
+ Element::Comment(Self::parse_text(&comment_text)?),
+ )),
+ event => Ok(EventHandlingResult::Event(event)),
+ }
+ }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error
+{
+ #[error(transparent)]
+ QuickXMLFailed(#[from] quick_xml::Error),
+
+ #[error("Text is not UTF-8")]
+ TextNotUTF8,
+
+ #[error("Tag name is not UTF-8")]
+ TagNameNotUTF8,
+
+ #[error("Invalid attribute")]
+ InvalidTagAttr(#[from] AttrError),
+
+ #[error("Tag attribute key is not UTF-8")]
+ TagAttributeKeyNotUTF8,
+
+ #[error("Unexpectedly found the end of a tag")]
+ UnexpectedTagEnd,
+
+ #[error("Unexpected end of file")]
+ UnexpectedEndOfFile,
+}
+
+enum EventHandlingResult<'event>
+{
+ Element(Element),
+ Event(Event<'event>),
+ End,
+}
+
+#[cfg(test)]
+mod tests
+{
+ use pretty_assertions::assert_eq;
+
+ use super::*;
+
+ #[test]
+ fn can_parse()
+ {
+ let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![Element::Text("Hello there".to_string())],
+ Vec::new()
+ ))])
+ );
+
+ let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Tagged(Tagged::new(
+ &"bar",
+ Elements::from(vec![Element::Text("123".to_string())]),
+ Vec::new()
+ )),
+ Element::Text(" Hello".to_string())
+ ],
+ Vec::new()
+ ))])
+ );
+
+ let mut parser = Parser::new("".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(Vec::new())
+ );
+
+ let mut parser = Parser::new(
+ "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(),
+ );
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Comment("XML is awful".to_string()),
+ Element::Text("Hello there".to_string()),
+ Element::Tagged(Tagged::new(
+ &"bar",
+ vec![Element::Text("123".to_string())],
+ Vec::new()
+ )),
+ ],
+ Vec::new()
+ ))])
+ );
+ }
+}