aboutsummaryrefslogtreecommitdiff
path: root/src/xml
diff options
context:
space:
mode:
authorHampusM <hampus@hampusmat.com>2023-02-19 13:49:41 +0100
committerHampusM <hampus@hampusmat.com>2023-02-19 18:27:23 +0100
commit7c5bec7db2a2fc8c796d5f31bdeb03da0946133d (patch)
tree7c80fa13c3aae658e63d9d6ae553593ebd7a93be /src/xml
parent46c316bd9665cdc6026ccd2a4119e4c46778b65b (diff)
feat: add project & registry parsing /w commands
Diffstat (limited to 'src/xml')
-rw-r--r--src/xml/element.rs144
-rw-r--r--src/xml/mod.rs2
-rw-r--r--src/xml/parser.rs195
3 files changed, 341 insertions, 0 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs
new file mode 100644
index 0000000..f469480
--- /dev/null
+++ b/src/xml/element.rs
@@ -0,0 +1,144 @@
+#[derive(Debug, PartialEq, Eq)]
+pub struct Elements
+{
+ elements: Vec<Element>,
+}
+
+impl Elements
+{
+ pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
+ Some(tagged_element)
+ }
+ _ => None,
+ })
+ }
+
+ pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged>
+ {
+ self.elements
+ .iter()
+ .filter_map(|element| match element {
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name => {
+ Some(tagged_element)
+ }
+ _ => None,
+ })
+ .collect()
+ }
+
+ pub fn get_first_text_element(&self) -> Option<&String>
+ {
+ self.elements.iter().find_map(|element| match element {
+ Element::Text(text) => Some(text),
+ _ => None,
+ })
+ }
+
+ pub fn get_all_text_elements(&self) -> Vec<&String>
+ {
+ self.elements
+ .iter()
+ .filter_map(|element| match element {
+ Element::Text(text) => Some(text),
+ _ => None,
+ })
+ .collect()
+ }
+
+ pub fn has_tagged_element(&self, tag_name: &str) -> bool
+ {
+ self.elements.iter().any(|element| {
+ matches!(
+ element,
+ Element::Tagged(tagged_element) if tagged_element.name == tag_name
+ )
+ })
+ }
+}
+
+impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements
+{
+ fn from(into_iter: IntoIter) -> Self
+ {
+ Self {
+ elements: into_iter.into_iter().collect(),
+ }
+ }
+}
+
+impl<'elements> IntoIterator for &'elements Elements
+{
+ type IntoIter = Iter<'elements>;
+ type Item = &'elements Element;
+
+ fn into_iter(self) -> Self::IntoIter
+ {
+ Self::IntoIter {
+ elements: self.elements.iter(),
+ }
+ }
+}
+
+pub struct Iter<'elements>
+{
+ elements: std::slice::Iter<'elements, Element>,
+}
+
+impl<'elements> Iterator for Iter<'elements>
+{
+ type Item = &'elements Element;
+
+ fn next(&mut self) -> Option<Self::Item>
+ {
+ self.elements.next()
+ }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum Element
+{
+ Tagged(Tagged),
+ Text(String),
+ Comment(String),
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Tagged
+{
+ name: String,
+ child_elements: Elements,
+}
+
+impl Tagged
+{
+ pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self
+ where
+ Name: ToString,
+ ChildElements: Into<Elements>,
+ {
+ Self {
+ name: name.to_string(),
+ child_elements: child_elements.into(),
+ }
+ }
+
+ pub fn name(&self) -> &str
+ {
+ &self.name
+ }
+
+ pub fn child_elements(&self) -> &Elements
+ {
+ &self.child_elements
+ }
+}
+
+pub trait FromElements: Sized
+{
+ type Error;
+
+ fn from_elements(elements: &Elements) -> Result<Self, Self::Error>;
+}
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
new file mode 100644
index 0000000..12368c3
--- /dev/null
+++ b/src/xml/mod.rs
@@ -0,0 +1,2 @@
+pub mod element;
+pub mod parser;
diff --git a/src/xml/parser.rs b/src/xml/parser.rs
new file mode 100644
index 0000000..d152a6e
--- /dev/null
+++ b/src/xml/parser.rs
@@ -0,0 +1,195 @@
+use std::io::BufRead;
+
+use quick_xml::events::{BytesStart, BytesText, Event};
+use quick_xml::Reader;
+
+use crate::xml::element::{Element, Elements, Tagged};
+
+/// XML parser.
+pub struct Parser<Source>
+{
+ reader: Reader<Source>,
+}
+
+impl<Source: BufRead> Parser<Source>
+{
+ pub fn new(src: Source) -> Self
+ {
+ Self {
+ reader: Reader::from_reader(src),
+ }
+ }
+
+ pub fn parse(&mut self) -> Result<Elements, Error>
+ {
+ let mut buf = Vec::new();
+
+ let mut elements = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ let element = match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => element,
+ EventHandlingResult::Event(_) => {
+ continue;
+ }
+ EventHandlingResult::End => {
+ break;
+ }
+ };
+
+ elements.push(element);
+ }
+
+ Ok(elements.into())
+ }
+
+ fn parse_text(text: &BytesText) -> Result<String, Error>
+ {
+ String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8)
+ }
+
+ fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error>
+ {
+ let mut child_elements = Vec::new();
+
+ let mut buf = Vec::new();
+
+ loop {
+ let event = self.reader.read_event_into(&mut buf)?;
+
+ match event {
+ Event::End(end) if end.name() == start.name() => {
+ break;
+ }
+ event => match self.handle_event(event)? {
+ EventHandlingResult::Element(element) => {
+ child_elements.push(element);
+ }
+ EventHandlingResult::End => {
+ return Err(Error::UnexpectedEndOfFile);
+ }
+ EventHandlingResult::Event(_) => {}
+ },
+ }
+ }
+
+ Ok(Element::Tagged(Tagged::new(
+ &String::from_utf8(start.name().as_ref().to_vec())
+ .map_err(|_| Error::TagNameNotUTF8)?,
+ child_elements,
+ )))
+ }
+
+ fn handle_event<'a>(
+ &'a mut self,
+ event: Event<'a>,
+ ) -> Result<EventHandlingResult, Error>
+ {
+ match event {
+ Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text(
+ Self::parse_text(&text)?,
+ ))),
+ Event::Start(start) => {
+ Ok(EventHandlingResult::Element(self.parse_tagged(&start)?))
+ }
+ Event::End(_) => Err(Error::UnexpectedTagEnd),
+ Event::Eof => Ok(EventHandlingResult::End),
+ Event::Comment(comment_text) => Ok(EventHandlingResult::Element(
+ Element::Comment(Self::parse_text(&comment_text)?),
+ )),
+ event => Ok(EventHandlingResult::Event(event)),
+ }
+ }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error
+{
+ #[error(transparent)]
+ QuickXMLFailed(#[from] quick_xml::Error),
+
+ #[error("Text is not UTF-8")]
+ TextNotUTF8,
+
+ #[error("Tag name is not UTF-8")]
+ TagNameNotUTF8,
+
+ #[error("Unexpectedly found the end of a tag")]
+ UnexpectedTagEnd,
+
+ #[error("Unexpected end of file")]
+ UnexpectedEndOfFile,
+}
+
+enum EventHandlingResult<'event>
+{
+ Element(Element),
+ Event(Event<'event>),
+ End,
+}
+
+#[cfg(test)]
+mod tests
+{
+ use pretty_assertions::assert_eq;
+
+ use super::*;
+
+ #[test]
+ fn can_parse()
+ {
+ let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![Element::Text("Hello there".to_string())]
+ ))])
+ );
+
+ let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Tagged(Tagged::new(
+ &"bar",
+ Elements::from(vec![Element::Text("123".to_string())])
+ )),
+ Element::Text(" Hello".to_string())
+ ]
+ ))])
+ );
+
+ let mut parser = Parser::new("".as_bytes());
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(Vec::new())
+ );
+
+ let mut parser = Parser::new(
+ "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(),
+ );
+
+ assert_eq!(
+ parser.parse().expect("Expected Ok"),
+ Elements::from(vec![Element::Tagged(Tagged::new(
+ &"foo",
+ vec![
+ Element::Comment("XML is awful".to_string()),
+ Element::Text("Hello there".to_string()),
+ Element::Tagged(Tagged::new(
+ &"bar",
+ vec![Element::Text("123".to_string())]
+ )),
+ ]
+ ))])
+ );
+ }
+}