diff options
| author | HampusM <hampus@hampusmat.com> | 2023-02-19 13:49:41 +0100 | 
|---|---|---|
| committer | HampusM <hampus@hampusmat.com> | 2023-02-19 18:27:23 +0100 | 
| commit | 7c5bec7db2a2fc8c796d5f31bdeb03da0946133d (patch) | |
| tree | 7c80fa13c3aae658e63d9d6ae553593ebd7a93be /src/xml | |
| parent | 46c316bd9665cdc6026ccd2a4119e4c46778b65b (diff) | |
feat: add project & registry parsing /w commands
Diffstat (limited to 'src/xml')
| -rw-r--r-- | src/xml/element.rs | 144 | ||||
| -rw-r--r-- | src/xml/mod.rs | 2 | ||||
| -rw-r--r-- | src/xml/parser.rs | 195 | 
3 files changed, 341 insertions, 0 deletions
diff --git a/src/xml/element.rs b/src/xml/element.rs new file mode 100644 index 0000000..f469480 --- /dev/null +++ b/src/xml/element.rs @@ -0,0 +1,144 @@ +#[derive(Debug, PartialEq, Eq)] +pub struct Elements +{ +    elements: Vec<Element>, +} + +impl Elements +{ +    pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> +    { +        self.elements.iter().find_map(|element| match element { +            Element::Tagged(tagged_element) if tagged_element.name == tag_name => { +                Some(tagged_element) +            } +            _ => None, +        }) +    } + +    pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> +    { +        self.elements +            .iter() +            .filter_map(|element| match element { +                Element::Tagged(tagged_element) if tagged_element.name == tag_name => { +                    Some(tagged_element) +                } +                _ => None, +            }) +            .collect() +    } + +    pub fn get_first_text_element(&self) -> Option<&String> +    { +        self.elements.iter().find_map(|element| match element { +            Element::Text(text) => Some(text), +            _ => None, +        }) +    } + +    pub fn get_all_text_elements(&self) -> Vec<&String> +    { +        self.elements +            .iter() +            .filter_map(|element| match element { +                Element::Text(text) => Some(text), +                _ => None, +            }) +            .collect() +    } + +    pub fn has_tagged_element(&self, tag_name: &str) -> bool +    { +        self.elements.iter().any(|element| { +            matches!( +                element, +                Element::Tagged(tagged_element) if tagged_element.name == tag_name +            ) +        }) +    } +} + +impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements +{ +    fn from(into_iter: IntoIter) -> Self +    { +        Self { +            elements: into_iter.into_iter().collect(), +        } +    } +} + +impl<'elements> IntoIterator for &'elements Elements +{ +    type IntoIter = Iter<'elements>; +    type Item = &'elements Element; + +    fn into_iter(self) -> Self::IntoIter +    { +        Self::IntoIter { +            elements: self.elements.iter(), +        } +    } +} + +pub struct Iter<'elements> +{ +    elements: std::slice::Iter<'elements, Element>, +} + +impl<'elements> Iterator for Iter<'elements> +{ +    type Item = &'elements Element; + +    fn next(&mut self) -> Option<Self::Item> +    { +        self.elements.next() +    } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Element +{ +    Tagged(Tagged), +    Text(String), +    Comment(String), +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Tagged +{ +    name: String, +    child_elements: Elements, +} + +impl Tagged +{ +    pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self +    where +        Name: ToString, +        ChildElements: Into<Elements>, +    { +        Self { +            name: name.to_string(), +            child_elements: child_elements.into(), +        } +    } + +    pub fn name(&self) -> &str +    { +        &self.name +    } + +    pub fn child_elements(&self) -> &Elements +    { +        &self.child_elements +    } +} + +pub trait FromElements: Sized +{ +    type Error; + +    fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; +} diff --git a/src/xml/mod.rs b/src/xml/mod.rs new file mode 100644 index 0000000..12368c3 --- /dev/null +++ b/src/xml/mod.rs @@ -0,0 +1,2 @@ +pub mod element; +pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs new file mode 100644 index 0000000..d152a6e --- /dev/null +++ b/src/xml/parser.rs @@ -0,0 +1,195 @@ +use std::io::BufRead; + +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::Reader; + +use crate::xml::element::{Element, Elements, Tagged}; + +/// XML parser. +pub struct Parser<Source> +{ +    reader: Reader<Source>, +} + +impl<Source: BufRead> Parser<Source> +{ +    pub fn new(src: Source) -> Self +    { +        Self { +            reader: Reader::from_reader(src), +        } +    } + +    pub fn parse(&mut self) -> Result<Elements, Error> +    { +        let mut buf = Vec::new(); + +        let mut elements = Vec::new(); + +        loop { +            let event = self.reader.read_event_into(&mut buf)?; + +            let element = match self.handle_event(event)? { +                EventHandlingResult::Element(element) => element, +                EventHandlingResult::Event(_) => { +                    continue; +                } +                EventHandlingResult::End => { +                    break; +                } +            }; + +            elements.push(element); +        } + +        Ok(elements.into()) +    } + +    fn parse_text(text: &BytesText) -> Result<String, Error> +    { +        String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) +    } + +    fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> +    { +        let mut child_elements = Vec::new(); + +        let mut buf = Vec::new(); + +        loop { +            let event = self.reader.read_event_into(&mut buf)?; + +            match event { +                Event::End(end) if end.name() == start.name() => { +                    break; +                } +                event => match self.handle_event(event)? { +                    EventHandlingResult::Element(element) => { +                        child_elements.push(element); +                    } +                    EventHandlingResult::End => { +                        return Err(Error::UnexpectedEndOfFile); +                    } +                    EventHandlingResult::Event(_) => {} +                }, +            } +        } + +        Ok(Element::Tagged(Tagged::new( +            &String::from_utf8(start.name().as_ref().to_vec()) +                .map_err(|_| Error::TagNameNotUTF8)?, +            child_elements, +        ))) +    } + +    fn handle_event<'a>( +        &'a mut self, +        event: Event<'a>, +    ) -> Result<EventHandlingResult, Error> +    { +        match event { +            Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( +                Self::parse_text(&text)?, +            ))), +            Event::Start(start) => { +                Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) +            } +            Event::End(_) => Err(Error::UnexpectedTagEnd), +            Event::Eof => Ok(EventHandlingResult::End), +            Event::Comment(comment_text) => Ok(EventHandlingResult::Element( +                Element::Comment(Self::parse_text(&comment_text)?), +            )), +            event => Ok(EventHandlingResult::Event(event)), +        } +    } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error +{ +    #[error(transparent)] +    QuickXMLFailed(#[from] quick_xml::Error), + +    #[error("Text is not UTF-8")] +    TextNotUTF8, + +    #[error("Tag name is not UTF-8")] +    TagNameNotUTF8, + +    #[error("Unexpectedly found the end of a tag")] +    UnexpectedTagEnd, + +    #[error("Unexpected end of file")] +    UnexpectedEndOfFile, +} + +enum EventHandlingResult<'event> +{ +    Element(Element), +    Event(Event<'event>), +    End, +} + +#[cfg(test)] +mod tests +{ +    use pretty_assertions::assert_eq; + +    use super::*; + +    #[test] +    fn can_parse() +    { +        let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); + +        assert_eq!( +            parser.parse().expect("Expected Ok"), +            Elements::from(vec![Element::Tagged(Tagged::new( +                &"foo", +                vec![Element::Text("Hello there".to_string())] +            ))]) +        ); + +        let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); + +        assert_eq!( +            parser.parse().expect("Expected Ok"), +            Elements::from(vec![Element::Tagged(Tagged::new( +                &"foo", +                vec![ +                    Element::Tagged(Tagged::new( +                        &"bar", +                        Elements::from(vec![Element::Text("123".to_string())]) +                    )), +                    Element::Text(" Hello".to_string()) +                ] +            ))]) +        ); + +        let mut parser = Parser::new("".as_bytes()); + +        assert_eq!( +            parser.parse().expect("Expected Ok"), +            Elements::from(Vec::new()) +        ); + +        let mut parser = Parser::new( +            "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), +        ); + +        assert_eq!( +            parser.parse().expect("Expected Ok"), +            Elements::from(vec![Element::Tagged(Tagged::new( +                &"foo", +                vec![ +                    Element::Comment("XML is awful".to_string()), +                    Element::Text("Hello there".to_string()), +                    Element::Tagged(Tagged::new( +                        &"bar", +                        vec![Element::Text("123".to_string())] +                    )), +                ] +            ))]) +        ); +    } +}  | 
