From 11c39d50411a747eedac4c6a16fedf598ae798f5 Mon Sep 17 00:00:00 2001 From: HampusM Date: Thu, 23 Feb 2023 22:35:06 +0100 Subject: feat: add project & getting function entries --- src/description.rs | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 158 ++++++++++++++++++++++++++++++++++++++ src/util.rs | 34 +++++++++ src/xml/element.rs | 186 ++++++++++++++++++++++++++++++++++++++++++++ src/xml/mod.rs | 2 + src/xml/parser.rs | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 812 insertions(+) create mode 100644 src/description.rs create mode 100644 src/lib.rs create mode 100644 src/util.rs create mode 100644 src/xml/element.rs create mode 100644 src/xml/mod.rs create mode 100644 src/xml/parser.rs (limited to 'src') diff --git a/src/description.rs b/src/description.rs new file mode 100644 index 0000000..ba37abc --- /dev/null +++ b/src/description.rs @@ -0,0 +1,211 @@ +//! Reference entry description. +use crate::util::enum_with_get_inner; +use crate::xml::element::{Elements, FromElements, Tagged}; + +/// Reference entry description. +#[derive(Debug)] +pub struct Description +{ + paragraphs: Vec, +} + +impl Description +{ + /// Returns a new `ReferenceDescription`. + #[must_use] + pub fn new() -> Self + { + Self { + paragraphs: Vec::new(), + } + } + + /// Returns the reference description's paragraphs. + #[must_use] + pub fn paragraphs(&self) -> &[Paragraph] + { + &self.paragraphs + } +} + +impl Default for Description +{ + fn default() -> Self + { + Self::new() + } +} + +impl FromElements for Description +{ + type Error = Error; + + fn from_elements(elements: &Elements) -> Result + { + let paragraphs = elements + .get_all_tagged_elements_with_name("para") + .into_iter() + .map(|paragraph_element| { + Paragraph::from_elements(paragraph_element.child_elements()) + }) + .collect::, _>>()?; + + Ok(Description { paragraphs }) + } +} + +/// [`Description`] error. +#[derive(Debug, thiserror::Error)] +pub enum Error +{ + /// Invalid paragraph. + #[error("Invalid paragraph")] + InvalidParagraph(#[from] ParagraphError), +} + +/// Reference entry description paragraph. +#[derive(Debug)] +pub struct Paragraph +{ + parts: Vec, +} + +impl Paragraph +{ + /// Returns the parts of the paragraph. + #[must_use] + pub fn parts(&self) -> &[ParagraphPart] + { + &self.parts + } +} + +impl FromElements for Paragraph +{ + type Error = ParagraphError; + + fn from_elements(elements: &Elements) -> Result + { + let parts = elements + .into_iter() + .map(|element| { + ParagraphPart::from_elements(&Elements::from([element.clone()])) + }) + .collect::, _>>()?; + + Ok(Self { parts }) + } +} + +/// [`Paragraph`] error. +#[derive(Debug, thiserror::Error)] +pub enum ParagraphError +{ + /// Invalid reference description part. + #[error("Invalid part")] + InvalidPart(#[from] ParagraphPartError), +} + +enum_with_get_inner! { +inner = String; + +/// Reference entry description paragraph part. +#[derive(Debug)] +pub enum ParagraphPart +{ + /// Text part. + Text(String), + + /// .. part. + Constant(String), + + /// .. part. + Function(String), + + /// .. part. + Parameter(String), + + /// Reference entry citation part. + Entry(String), +} +} + +impl FromElements for ParagraphPart +{ + type Error = ParagraphPartError; + + fn from_elements(elements: &Elements) -> Result + { + if let Some(tagged_element) = elements.get_first_tagged() { + return Self::from_tagged_element(tagged_element); + } + + let text = elements + .get_first_text_element() + .ok_or(Self::Error::InputIsComment)?; + + Ok(Self::Text(text.clone())) + } +} + +impl ParagraphPart +{ + fn from_tagged_element( + tagged_element: &Tagged, + ) -> Result::Error> + { + let create: fn(String) -> Self = match tagged_element.name() { + "constant" => Self::Constant, + "function" => Self::Function, + "parameter" => Self::Parameter, + "citerefentry" => Self::Entry, + _ => { + return Err(::Error::UnknownPart( + tagged_element.name().to_string(), + )); + } + }; + + if tagged_element.name() == "citerefentry" { + let title_element = tagged_element + .child_elements() + .get_first_tagged_with_name("refentrytitle") + .ok_or(::Error::NoEntryTitleFound)?; + + let title = title_element + .child_elements() + .get_first_text_element() + .ok_or(::Error::NoTextInTagged)?; + + return Ok(Self::Entry(title.clone())); + } + + let text_element = tagged_element + .child_elements() + .get_first_text_element() + .ok_or(::Error::NoTextInTagged)?; + + Ok(create(text_element.clone())) + } +} + +/// [`ParagraphPart`] error. +#[derive(Debug, thiserror::Error)] +pub enum ParagraphPartError +{ + /// Input element is a comment. + #[error("Input element is a comment")] + InputIsComment, + + /// A input element is a unknown reference description part. + #[error("Input element with name '{0}' is a unknown reference description part")] + UnknownPart(String), + + /// No text was found in tagged input element. + #[error("No text was found in tagged input element")] + NoTextInTagged, + + /// No entry title found. + #[error("No entry title found")] + NoEntryTitleFound, +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8e9e1e7 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,158 @@ +//! Rust API for the [OpenGL reference page sources]. +//! +//! [OpenGL reference page sources]: https://github.com/KhronosGroup/OpenGL-Refpages +#![cfg_attr(doc_cfg, feature(doc_cfg))] +#![deny(clippy::all, clippy::pedantic, missing_docs)] + +use std::os::unix::prelude::OsStrExt; + +use include_dir::{include_dir, Dir}; + +use crate::description::{Description, Error as DescriptionError}; +use crate::xml::element::{Attribute, Elements, FromElements}; +use crate::xml::parser::{Error as ParserError, Parser}; + +pub mod description; + +mod util; +mod xml; + +static GL4_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/OpenGL-Refpages/gl4"); + +/// Reference entry. +#[derive(Debug)] +pub struct ReferenceEntry +{ + purpose: String, + description: Description, +} + +impl ReferenceEntry +{ + /// Returns a function reference entry. + /// + /// # Errors + /// Returns `Err` if + /// - No reference entry file was found. + /// - Parsing the reference entry file data fails. + /// - The reference entry file data is invalid. + pub fn get_function(function_name: &str) -> Result + { + let function_file = GL4_DIR + .files() + .find_map(|file| { + if file.path().extension()?.as_bytes() != b"xml" { + return None; + } + + if function_name.starts_with(file.path().file_stem()?.to_str()?) { + Some(file) + } else { + None + } + }) + .ok_or_else(|| ReferenceEntryError::NoFileFound(function_name.to_string()))?; + + let function_ref_content = function_file.contents(); + + let mut parser = Parser::new(function_ref_content); + + let root_elements = parser.parse()?; + + ReferenceEntry::from_elements(&root_elements) + } + + /// Returns the reference entry purpose. + #[must_use] + pub fn purpose(&self) -> &str + { + &self.purpose + } + + /// Returns the reference entry description. + #[must_use] + pub fn description(&self) -> &Description + { + &self.description + } +} + +impl FromElements for ReferenceEntry +{ + type Error = ReferenceEntryError; + + fn from_elements(elements: &Elements) -> Result + { + let refentry_element = elements + .get_first_tagged_with_name("refentry") + .ok_or(ReferenceEntryError::MissingRefEntry)?; + + let refnamediv_element = refentry_element + .child_elements() + .get_first_tagged_with_name("refnamediv") + .ok_or(ReferenceEntryError::MissingRefNameDiv)?; + + let refpurpose_element = refnamediv_element + .child_elements() + .get_first_tagged_with_name("refpurpose") + .ok_or(ReferenceEntryError::MissingRefPurpose)?; + + let purpose = refpurpose_element + .child_elements() + .get_first_text_element() + .cloned() + .unwrap_or_default(); + + let description_refsect = refentry_element + .child_elements() + .get_first_tagged_with_name_and_attr( + "refsect1", + &Attribute { + key: "xml:id".to_string(), + value: b"description".to_vec(), + }, + ) + .ok_or(ReferenceEntryError::MissingDescriptionRefSect)?; + + let description = + Description::from_elements(description_refsect.child_elements())?; + + Ok(ReferenceEntry { + purpose, + description, + }) + } +} + +/// [`ReferenceEntry`] error. +#[derive(Debug, thiserror::Error)] +pub enum ReferenceEntryError +{ + /// No reference entry file was found. + #[error("No reference entry file was found for '{0}'")] + NoFileFound(String), + + /// No 'refentry' element was found. + #[error("No 'refentry' element was found")] + MissingRefEntry, + + /// No 'refnamediv' element was found. + #[error("No 'refnamediv' element was found")] + MissingRefNameDiv, + + /// No 'refpurpose' element was found. + #[error("No 'refpurpose' element was found")] + MissingRefPurpose, + + /// No 'refsect1' element was found with id 'description''. + #[error("No 'refsect1' element was found with id 'description'")] + MissingDescriptionRefSect, + + /// Invalid description. + #[error("Invalid description")] + InvalidDescription(#[from] DescriptionError), + + /// Parsing failed. + #[error("Parsing failed")] + ParsingFailed(#[from] ParserError), +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..309a471 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,34 @@ +macro_rules! enum_with_get_inner { + ( + inner = $inner: ty; + $(#[$attr: meta])* + $visibility: vis enum $enum_name: ident { + $( + $(#[$variant_attr: meta])* + $variant: ident($variant_inner: ty), + )* + } + ) => { + $(#[$attr])* + $visibility enum $enum_name { + $( + $(#[$variant_attr])* + $variant($inner) + ),* + } + + impl $enum_name { + /// Returns the inner value. + #[must_use] + pub fn inner(&self) -> &$inner { + match self { + $( + $enum_name::$variant(inner) => inner + ),* + } + } + } + }; +} + +pub(crate) use enum_with_get_inner; diff --git a/src/xml/element.rs b/src/xml/element.rs new file mode 100644 index 0000000..647fe90 --- /dev/null +++ b/src/xml/element.rs @@ -0,0 +1,186 @@ +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Elements +{ + elements: Vec, +} + +impl Elements +{ + pub fn get_first_tagged(&self) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) => Some(tagged_element), + _ => None, + }) + } + + pub fn get_first_tagged_with_name(&self, tag_name: &str) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) if tagged_element.name == tag_name => { + Some(tagged_element) + } + _ => None, + }) + } + + pub fn get_first_tagged_with_name_and_attr( + &self, + tag_name: &str, + attribute: &Attribute, + ) -> Option<&Tagged> + { + self.elements.iter().find_map(|element| match element { + Element::Tagged(tagged_element) + if tagged_element.name == tag_name + && tagged_element + .attributes + .iter() + .any(|attr| attr == attribute) => + { + Some(tagged_element) + } + _ => None, + }) + } + + pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> + { + self.elements + .iter() + .filter_map(|element| match element { + Element::Tagged(tagged_element) if tagged_element.name == tag_name => { + Some(tagged_element) + } + _ => None, + }) + .collect() + } + + pub fn get_first_text_element(&self) -> Option<&String> + { + self.elements.iter().find_map(|element| match element { + Element::Text(text) => Some(text), + _ => None, + }) + } + + pub fn get_all_text_elements(&self) -> Vec<&String> + { + self.elements + .iter() + .filter_map(|element| match element { + Element::Text(text) => Some(text), + _ => None, + }) + .collect() + } + + pub fn has_tagged_element(&self, tag_name: &str) -> bool + { + self.elements.iter().any(|element| { + matches!( + element, + Element::Tagged(tagged_element) if tagged_element.name == tag_name + ) + }) + } +} + +impl> From for Elements +{ + fn from(into_iter: IntoIter) -> Self + { + Self { + elements: into_iter.into_iter().collect(), + } + } +} + +impl<'elements> IntoIterator for &'elements Elements +{ + type IntoIter = Iter<'elements>; + type Item = &'elements Element; + + fn into_iter(self) -> Self::IntoIter + { + Self::IntoIter { + elements: self.elements.iter(), + } + } +} + +pub struct Iter<'elements> +{ + elements: std::slice::Iter<'elements, Element>, +} + +impl<'elements> Iterator for Iter<'elements> +{ + type Item = &'elements Element; + + fn next(&mut self) -> Option + { + self.elements.next() + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Element +{ + Tagged(Tagged), + Text(String), + Comment(String), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Tagged +{ + name: String, + child_elements: Elements, + attributes: Vec, +} + +impl Tagged +{ + pub fn new( + name: &Name, + child_elements: ChildElements, + attributes: Attrs, + ) -> Self + where + Name: ToString, + ChildElements: Into, + Attrs: IntoIterator, + { + Self { + name: name.to_string(), + child_elements: child_elements.into(), + attributes: attributes.into_iter().collect(), + } + } + + pub fn name(&self) -> &str + { + &self.name + } + + pub fn child_elements(&self) -> &Elements + { + &self.child_elements + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Attribute +{ + pub key: String, + pub value: Vec, +} + +pub trait FromElements: Sized +{ + type Error; + + fn from_elements(elements: &Elements) -> Result; +} diff --git a/src/xml/mod.rs b/src/xml/mod.rs new file mode 100644 index 0000000..12368c3 --- /dev/null +++ b/src/xml/mod.rs @@ -0,0 +1,2 @@ +pub mod element; +pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs new file mode 100644 index 0000000..9cdafb1 --- /dev/null +++ b/src/xml/parser.rs @@ -0,0 +1,221 @@ +use std::io::BufRead; + +use quick_xml::events::attributes::AttrError; +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::Reader; + +use crate::xml::element::{Attribute, Element, Elements, Tagged}; + +/// XML parser. +pub struct Parser +{ + reader: Reader, +} + +impl Parser +{ + pub fn new(src: Source) -> Self + { + Self { + reader: Reader::from_reader(src), + } + } + + pub fn parse(&mut self) -> Result + { + let mut buf = Vec::new(); + + let mut elements = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + let element = match self.handle_event(event)? { + EventHandlingResult::Element(element) => element, + EventHandlingResult::Event(_) => { + continue; + } + EventHandlingResult::End => { + break; + } + }; + + elements.push(element); + } + + Ok(elements.into()) + } + + fn parse_text(text: &BytesText) -> Result + { + String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) + } + + fn parse_tagged(&mut self, start: &BytesStart) -> Result + { + let mut child_elements = Vec::new(); + + let mut buf = Vec::new(); + + loop { + let event = self.reader.read_event_into(&mut buf)?; + + match event { + Event::End(end) if end.name() == start.name() => { + break; + } + event => match self.handle_event(event)? { + EventHandlingResult::Element(element) => { + child_elements.push(element); + } + EventHandlingResult::End => { + return Err(Error::UnexpectedEndOfFile); + } + EventHandlingResult::Event(_) => {} + }, + } + } + + let attributes = start + .attributes() + .map(|attr_result| { + let attr = attr_result?; + + Ok(Attribute { + key: String::from_utf8(attr.key.as_ref().to_vec()) + .map_err(|_| Error::TagAttributeKeyNotUTF8)?, + value: attr.value.into_owned(), + }) + }) + .collect::, Error>>()?; + + Ok(Element::Tagged(Tagged::new( + &String::from_utf8(start.name().as_ref().to_vec()) + .map_err(|_| Error::TagNameNotUTF8)?, + child_elements, + attributes, + ))) + } + + fn handle_event<'a>( + &'a mut self, + event: Event<'a>, + ) -> Result + { + match event { + Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( + Self::parse_text(&text)?, + ))), + Event::Start(start) => { + Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) + } + Event::End(_) => Err(Error::UnexpectedTagEnd), + Event::Eof => Ok(EventHandlingResult::End), + Event::Comment(comment_text) => Ok(EventHandlingResult::Element( + Element::Comment(Self::parse_text(&comment_text)?), + )), + event => Ok(EventHandlingResult::Event(event)), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error +{ + #[error(transparent)] + QuickXMLFailed(#[from] quick_xml::Error), + + #[error("Text is not UTF-8")] + TextNotUTF8, + + #[error("Tag name is not UTF-8")] + TagNameNotUTF8, + + #[error("Invalid attribute")] + InvalidTagAttr(#[from] AttrError), + + #[error("Tag attribute key is not UTF-8")] + TagAttributeKeyNotUTF8, + + #[error("Unexpectedly found the end of a tag")] + UnexpectedTagEnd, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +enum EventHandlingResult<'event> +{ + Element(Element), + Event(Event<'event>), + End, +} + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn can_parse() + { + let mut parser = Parser::new("Hello there".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![Element::Text("Hello there".to_string())], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("123 Hello".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Tagged(Tagged::new( + &"bar", + Elements::from(vec![Element::Text("123".to_string())]), + Vec::new() + )), + Element::Text(" Hello".to_string()) + ], + Vec::new() + ))]) + ); + + let mut parser = Parser::new("".as_bytes()); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(Vec::new()) + ); + + let mut parser = Parser::new( + "Hello there123".as_bytes(), + ); + + assert_eq!( + parser.parse().expect("Expected Ok"), + Elements::from(vec![Element::Tagged(Tagged::new( + &"foo", + vec![ + Element::Comment("XML is awful".to_string()), + Element::Text("Hello there".to_string()), + Element::Tagged(Tagged::new( + &"bar", + vec![Element::Text("123".to_string())], + Vec::new() + )), + ], + Vec::new() + ))]) + ); + } +} -- cgit v1.2.3-18-g5258