summaryrefslogtreecommitdiff
path: root/src/xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml')
-rw-r--r--src/xml/element.rs186
-rw-r--r--src/xml/mod.rs2
-rw-r--r--src/xml/parser.rs221
3 files changed, 409 insertions, 0 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs
new file mode 100644
index 0000000..647fe90
--- /dev/null
+++ b/src/xml/element.rs
@@ -0,0 +1,186 @@
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Elements
+{
+ elements: Vec<Element>,
+}
+
+impl Elements
+{
+ pub fn get_first_tagged(&self) -> Option<&Tagged>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Tagged(tagged_element) => Some(tagged_element),
+ _ => None,
+ })
+ }
+
+ pub fn get_first_tagged_with_name(&self, tag_name: &str) -> Option<&Tagged>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
+ Some(tagged_element)
+ }
+ _ => None,
+ })
+ }
+
+ pub fn get_first_tagged_with_name_and_attr(
+ &self,
+ tag_name: &str,
+ attribute: &Attribute,
+ ) -> Option<&Tagged>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Tagged(tagged_element)
+ if tagged_element.name == tag_name
+ && tagged_element
+ .attributes
+ .iter()
+ .any(|attr| attr == attribute) =>
+ {
+ Some(tagged_element)
+ }
+ _ => None,
+ })
+ }
+
+ pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged>
+ {
+ self.elements
+ .iter()
+ .filter_map(|element| match element {
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
+ Some(tagged_element)
+ }
+ _ => None,
+ })
+ .collect()
+ }
+
+ pub fn get_first_text_element(&self) -> Option<&String>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Text(text) => Some(text),
+ _ => None,
+ })
+ }
+
+ pub fn get_all_text_elements(&self) -> Vec<&String>
+ {
+ self.elements
+ .iter()
+ .filter_map(|element| match element {
+ Element::Text(text) => Some(text),
+ _ => None,
+ })
+ .collect()
+ }
+
+ pub fn has_tagged_element(&self, tag_name: &str) -> bool
+ {
+ self.elements.iter().any(|element| {
+ matches!(
+ element,
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name
+ )
+ })
+ }
+}
+
+impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements
+{
+ fn from(into_iter: IntoIter) -> Self
+ {
+ Self {
+ elements: into_iter.into_iter().collect(),
+ }
+ }
+}
+
+impl<'elements> IntoIterator for &'elements Elements
+{
+ type IntoIter = Iter<'elements>;
+ type Item = &'elements Element;
+
+ fn into_iter(self) -> Self::IntoIter
+ {
+ Self::IntoIter {
+ elements: self.elements.iter(),
+ }
+ }
+}
+
+pub struct Iter<'elements>
+{
+ elements: std::slice::Iter<'elements, Element>,
+}
+
+impl<'elements> Iterator for Iter<'elements>
+{
+ type Item = &'elements Element;
+
+ fn next(&mut self) -> Option<Self::Item>
+ {
+ self.elements.next()
+ }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum Element
+{
+ Tagged(Tagged),
+ Text(String),
+ Comment(String),
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Tagged
+{
+ name: String,
+ child_elements: Elements,
+ attributes: Vec<Attribute>,
+}
+
+impl Tagged
+{
+ pub fn new<Name, ChildElements, Attrs>(
+ name: &Name,
+ child_elements: ChildElements,
+ attributes: Attrs,
+ ) -> Self
+ where
+ Name: ToString,
+ ChildElements: Into<Elements>,
+ Attrs: IntoIterator<Item = Attribute>,
+ {
+ Self {
+ name: name.to_string(),
+ child_elements: child_elements.into(),
+ attributes: attributes.into_iter().collect(),
+ }
+ }
+
+ pub fn name(&self) -> &str
+ {
+ &self.name
+ }
+
+ pub fn child_elements(&self) -> &Elements
+ {
+ &self.child_elements
+ }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Attribute
+{
+ pub key: String,
+ pub value: Vec<u8>,
+}
+
+pub trait FromElements: Sized
+{
+ type Error;
+
+ fn from_elements(elements: &Elements) -> Result<Self, Self::Error>;
+}
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
new file mode 100644
index 0000000..12368c3
--- /dev/null
+++ b/src/xml/mod.rs
@@ -0,0 +1,2 @@
+pub mod element;
+pub mod parser;
diff --git a/src/xml/parser.rs b/src/xml/parser.rs
new file mode 100644
index 0000000..9cdafb1
--- /dev/null
+++ b/src/xml/parser.rs
@@ -0,0 +1,221 @@
+use std::io::BufRead;
+
+use quick_xml::events::attributes::AttrError;
+use quick_xml::events::{BytesStart, BytesText, Event};
+use quick_xml::Reader;
+
+use crate::xml::element::{Attribute, Element, Elements, Tagged};
+
+/// XML parser.
+pub struct Parser<Source>
+{
+ reader: Reader<Source>,
+}
+
+impl<Source: BufRead> Parser<Source>
+{
+ pub fn new(src: Source) -> Self
+ {
+ Self {
+ reader: Reader::from_reader(src),
+ }
+ }
+
+ pub fn parse(&mut self) -> Result<Elements, Error>
+ {
+ let mut buf = Vec::new();
+
+ let mut elements = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ let element = match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => element,
+ EventHandlingResult::Event(_) => {
+ continue;
+ }
+ EventHandlingResult::End => {
+ break;
+ }
+ };
+
+ elements.push(element);
+ }
+
+ Ok(elements.into())
+ }
+
+ fn parse_text(text: &BytesText) -> Result<String, Error>
+ {
+ String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8)
+ }
+
+ fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error>
+ {
+ let mut child_elements = Vec::new();
+
+ let mut buf = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ match event {
+ Event::End(end) if end.name() == start.name() => {
+ break;
+ }
+ event => match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => {
+ child_elements.push(element);
+ }
+ EventHandlingResult::End => {
+ return Err(Error::UnexpectedEndOfFile);
+ }
+ EventHandlingResult::Event(_) => {}
+ },
+ }
+ }
+
+ let attributes = start
+ .attributes()
+ .map(|attr_result| {
+ let attr = attr_result?;
+
+ Ok(Attribute {
+ key: String::from_utf8(attr.key.as_ref().to_vec())
+ .map_err(|_| Error::TagAttributeKeyNotUTF8)?,
+ value: attr.value.into_owned(),
+ })
+ })
+ .collect::<Result<Vec<_>, Error>>()?;
+
+ Ok(Element::Tagged(Tagged::new(
+ &String::from_utf8(start.name().as_ref().to_vec())
+ .map_err(|_| Error::TagNameNotUTF8)?,
+ child_elements,
+ attributes,
+ )))
+ }
+
+ fn handle_event<'a>(
+ &'a mut self,
+ event: Event<'a>,
+ ) -> Result<EventHandlingResult, Error>
+ {
+ match event {
+ Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text(
+ Self::parse_text(&text)?,
+ ))),
+ Event::Start(start) => {
+ Ok(EventHandlingResult::Element(self.parse_tagged(&start)?))
+ }
+ Event::End(_) => Err(Error::UnexpectedTagEnd),
+ Event::Eof => Ok(EventHandlingResult::End),
+ Event::Comment(comment_text) => Ok(EventHandlingResult::Element(
+ Element::Comment(Self::parse_text(&comment_text)?),
+ )),
+ event => Ok(EventHandlingResult::Event(event)),
+ }
+ }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error
+{
+ #[error(transparent)]
+ QuickXMLFailed(#[from] quick_xml::Error),
+
+ #[error("Text is not UTF-8")]
+ TextNotUTF8,
+
+ #[error("Tag name is not UTF-8")]
+ TagNameNotUTF8,
+
+ #[error("Invalid attribute")]
+ InvalidTagAttr(#[from] AttrError),
+
+ #[error("Tag attribute key is not UTF-8")]
+ TagAttributeKeyNotUTF8,
+
+ #[error("Unexpectedly found the end of a tag")]
+ UnexpectedTagEnd,
+
+ #[error("Unexpected end of file")]
+ UnexpectedEndOfFile,
+}
+
+enum EventHandlingResult<'event>
+{
+ Element(Element),
+ Event(Event<'event>),
+ End,
+}
+
+#[cfg(test)]
+mod tests
+{
+ use pretty_assertions::assert_eq;
+
+ use super::*;
+
+ #[test]
+ fn can_parse()
+ {
+ let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![Element::Text("Hello there".to_string())],
+ Vec::new()
+ ))])
+ );
+
+ let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Tagged(Tagged::new(
+ &"bar",
+ Elements::from(vec![Element::Text("123".to_string())]),
+ Vec::new()
+ )),
+ Element::Text(" Hello".to_string())
+ ],
+ Vec::new()
+ ))])
+ );
+
+ let mut parser = Parser::new("".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(Vec::new())
+ );
+
+ let mut parser = Parser::new(
+ "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(),
+ );
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Comment("XML is awful".to_string()),
+ Element::Text("Hello there".to_string()),
+ Element::Tagged(Tagged::new(
+ &"bar",
+ vec![Element::Text("123".to_string())],
+ Vec::new()
+ )),
+ ],
+ Vec::new()
+ ))])
+ );
+ }
+}