aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.toml3
-rw-r--r--src/attribute.rs170
-rw-r--r--src/deserializer/buffered.rs212
-rw-r--r--src/deserializer/mod.rs179
-rw-r--r--src/event.rs54
-rw-r--r--src/lib.rs54
-rw-r--r--src/tagged.rs62
7 files changed, 734 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 02dc18c..721ab10 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,3 +5,6 @@ edition = "2021"
license = "MIT OR Apache-2.0"
[dependencies]
+quick-xml = "0.27.1"
+thiserror = "1.0.38"
+
diff --git a/src/attribute.rs b/src/attribute.rs
new file mode 100644
index 0000000..8fb4778
--- /dev/null
+++ b/src/attribute.rs
@@ -0,0 +1,170 @@
+//! Attribute.
+
+use quick_xml::events::attributes::{
+ AttrError,
+ Attribute as QuickXMLAttribute,
+ Attributes,
+};
+
+/// Represent a XML attribute.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Attribute<'a>
+{
+ inner: QuickXMLAttribute<'a>,
+}
+
+impl<'a> Attribute<'a>
+{
+ /// Attribute key.
+ #[must_use]
+ pub fn key(&self) -> &[u8]
+ {
+ self.inner.key.as_ref()
+ }
+
+ /// Attribute value.
+ #[must_use]
+ pub fn value(&self) -> &[u8]
+ {
+ &self.inner.value
+ }
+}
+
+// Crate-local functions
+impl<'a> Attribute<'a>
+{
+ pub(crate) fn from_inner(inner: QuickXMLAttribute<'a>) -> Self
+ {
+ Self { inner }
+ }
+}
+
+/// Errors that can be raised when parsing [`Attribute`]s.
+///
+/// Recovery position in examples shows the position from which parsing of the
+/// next attribute will be attempted.
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum Error
+{
+ /// Attribute key was not followed by `=`, position relative to the start of
+ /// the owning tag is provided.
+ ///
+ /// Example of input that raises this error:
+ /// ```xml
+ /// <tag key another="attribute"/>
+ /// <!-- ^~~ error position, recovery position (8) -->
+ /// ```
+ #[error("Position {0}: attribute key must be directly followed by `=` or space")]
+ ExpectedEq(usize),
+
+ /// Attribute value was not found after `=`, position relative to the start
+ /// of the owning tag is provided.
+ ///
+ /// Example of input that raises this error:
+ /// ```xml
+ /// <tag key = />
+ /// <!-- ^~~ error position, recovery position (10) -->
+ /// ```
+ ///
+ /// This error can be returned only for the last attribute in the list,
+ /// because otherwise any content after `=` will be threated as a value.
+ /// The XML
+ /// ```xml
+ /// <tag key = another-key = "value"/>
+ /// <!-- ^ ^- recovery position (24) -->
+ /// <!-- '~~ error position (22) -->
+ /// ```
+ ///
+ /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
+ /// and or [`Attribute`] is returned, or [`Error::UnquotedValue`] is raised,
+ /// depending on the parsing mode.
+ #[error("Position {0}: `=` must be followed by an attribute value")]
+ ExpectedValue(usize),
+
+ /// Attribute value is not quoted, position relative to the start of the
+ /// owning tag is provided.
+ ///
+ /// Example of input that raises this error:
+ /// ```xml
+ /// <tag key = value />
+ /// <!-- ^ ^~~ recovery position (15) -->
+ /// <!-- '~~ error position (10) -->
+ /// ```
+ #[error("Position {0}: attribute value must be enclosed in `\"` or `'`")]
+ UnquotedValue(usize),
+
+ /// Attribute value was not finished with a matching quote, position relative
+ /// to the start of owning tag and a quote is provided. That position is always
+ /// a last character in the tag content.
+ ///
+ /// Example of input that raises this error:
+ /// ```xml
+ /// <tag key = "value />
+ /// <tag key = 'value />
+ /// <!-- ^~~ error position, recovery position (18) -->
+ /// ```
+ ///
+ /// This error can be returned only for the last attribute in the list,
+ /// because all input was consumed during scanning for a quote.
+ #[error("Position {0}: missing closing quote `{1}` in attribute value")]
+ ExpectedQuote(usize, u8),
+
+ /// An attribute with the same name was already encountered. Two parameters
+ /// define (1) the error position relative to the start of the owning tag
+ /// for a new attribute and (2) the start position of a previously encountered
+ /// attribute with the same name.
+ ///
+ /// Example of input that raises this error:
+ /// ```xml
+ /// <tag key = 'value' key="value2" attr3='value3' />
+ /// <!-- ^ ^ ^~~ recovery position (32) -->
+ /// <!-- | '~~ error position (19) -->
+ /// <!-- '~~ previous position (4) -->
+ /// ```
+ #[error("Position {0}: duplicated attribute, previous declaration at position {1}")]
+ Duplicated(usize, usize),
+}
+
+impl From<AttrError> for Error
+{
+ fn from(attr_err: AttrError) -> Self
+ {
+ match attr_err {
+ AttrError::ExpectedEq(pos) => Self::ExpectedEq(pos),
+ AttrError::ExpectedValue(pos) => Self::ExpectedValue(pos),
+ AttrError::UnquotedValue(pos) => Self::UnquotedValue(pos),
+ AttrError::ExpectedQuote(pos, quote) => Self::ExpectedQuote(pos, quote),
+ AttrError::Duplicated(pos, same_attr_pos) => {
+ Self::Duplicated(pos, same_attr_pos)
+ }
+ }
+ }
+}
+
+/// Iterates through [`Attribute`]s.
+#[derive(Debug)]
+pub struct Iter<'a>
+{
+ attrs: Attributes<'a>,
+}
+
+impl<'a> Iter<'a>
+{
+ pub(crate) fn new(attrs: Attributes<'a>) -> Self
+ {
+ Self { attrs }
+ }
+}
+
+impl<'a> Iterator for Iter<'a>
+{
+ type Item = Result<Attribute<'a>, Error>;
+
+ fn next(&mut self) -> Option<Self::Item>
+ {
+ let attr = self.attrs.next()?;
+
+ Some(attr.map(Attribute::from_inner).map_err(Into::into))
+ }
+}
diff --git a/src/deserializer/buffered.rs b/src/deserializer/buffered.rs
new file mode 100644
index 0000000..7a6058b
--- /dev/null
+++ b/src/deserializer/buffered.rs
@@ -0,0 +1,212 @@
+//! Buffered XML deserializer.
+use std::convert::Infallible;
+use std::io::BufRead;
+
+use quick_xml::events::Event;
+use quick_xml::Reader;
+
+use crate::deserializer::{Deserializer, Error, IgnoreEnd};
+use crate::event::EventExt;
+use crate::tagged::TagStart;
+use crate::DeserializeTagged;
+
+/// XML deserializer using a source which has an internal buffer.
+pub struct Buffered<Source: BufRead>
+{
+ reader: Reader<Source>,
+ leftover_event: Option<Event<'static>>,
+ buf: Vec<u8>,
+}
+
+impl<Source> Buffered<Source>
+where
+ Source: BufRead,
+{
+ /// Returns a new [`Buffered`].
+ pub fn new(source: Source) -> Self
+ {
+ let mut reader = Reader::from_reader(source);
+
+ reader.trim_text(true);
+ reader.expand_empty_elements(true);
+
+ Self {
+ reader,
+ leftover_event: None,
+ buf: Vec::new(),
+ }
+ }
+}
+
+impl<Source> Deserializer for Buffered<Source>
+where
+ Source: BufRead,
+{
+ fn de_tag<De: DeserializeTagged>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ ) -> Result<De, Error<De::Error>>
+ {
+ self.de_tag_with(tag_name, ignore_end, De::deserialize)
+ }
+
+ fn de_tag_with<Output, Err, DeserializeFn>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ deserialize: DeserializeFn,
+ ) -> Result<Output, Error<Err>>
+ where
+ Err: std::error::Error + Send + Sync + 'static,
+ DeserializeFn: FnOnce(&TagStart, &mut Self) -> Result<Output, Err>,
+ {
+ let deserialized = match self.read_event()? {
+ Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => {
+ deserialize(&TagStart::from_inner(start), self)
+ .map_err(Error::DeserializeFailed)?
+ }
+ event => {
+ self.leftover_event = Some(event.clone().into_owned());
+
+ return Err(Error::UnexpectedEvent {
+ expected_event_name: format!("start({tag_name})"),
+ found_event: event.describe().unwrap(),
+ });
+ }
+ };
+
+ if let IgnoreEnd::No = ignore_end {
+ self.read_end_event(tag_name.as_bytes())
+ .map_err(Error::into_with_de_error)?;
+ }
+
+ Ok(deserialized)
+ }
+
+ fn de_tag_list<De: DeserializeTagged>(
+ &mut self,
+ tag_name: Option<&str>,
+ ) -> Result<Vec<De>, Error<De::Error>>
+ {
+ let mut deserialized_items = Vec::new();
+
+ loop {
+ let start = match self.read_event()? {
+ Event::Start(start)
+ if tag_name.map_or_else(
+ || true,
+ |expected_tag_name| {
+ start.name().as_ref() == expected_tag_name.as_bytes()
+ },
+ ) =>
+ {
+ TagStart::from_inner(start)
+ }
+ Event::Comment(_) => {
+ continue;
+ }
+ event => {
+ self.leftover_event = Some(event.into_owned());
+ break;
+ }
+ };
+
+ let deserialized =
+ De::deserialize(&start, self).map_err(Error::DeserializeFailed)?;
+
+ self.read_end_event(start.name())
+ .map_err(Error::into_with_de_error)?;
+
+ deserialized_items.push(deserialized);
+ }
+
+ Ok(deserialized_items)
+ }
+
+ fn de_text(&mut self) -> Result<String, Error<Infallible>>
+ {
+ let text = match self.read_event::<Infallible>()? {
+ Event::Text(text) => Ok(text),
+ event => {
+ self.leftover_event = Some(event.clone().into_owned());
+
+ Err(Error::<Infallible>::UnexpectedEvent {
+ expected_event_name: "text".to_string(),
+ found_event: event.describe().unwrap(),
+ })
+ }
+ }?
+ .unescape()
+ .map_err(|err| Error::<Infallible>::XMLError(err.into()))?;
+
+ Ok(text.to_string())
+ }
+
+ fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), Error<Infallible>>
+ {
+ loop {
+ match self.read_event::<Infallible>()? {
+ Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => {
+ self.leftover_event = Some(Event::Start(start).into_owned());
+
+ break;
+ }
+ _ => {}
+ }
+ }
+
+ Ok(())
+ }
+
+ fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), Error<Infallible>>
+ {
+ loop {
+ match self.read_event::<Infallible>()? {
+ Event::End(end) if end.name().as_ref() == tag_name.as_bytes() => {
+ self.leftover_event = Some(Event::End(end).into_owned());
+
+ return Ok(());
+ }
+ _ => {}
+ }
+ }
+ }
+}
+
+impl<Source> Buffered<Source>
+where
+ Source: BufRead,
+{
+ fn read_end_event(&mut self, tag_name: &[u8]) -> Result<(), Error<Infallible>>
+ {
+ let event = self.read_event::<Infallible>()?;
+
+ if matches!(&event, Event::End(end) if end.name().as_ref() == tag_name) {
+ return Ok(());
+ }
+
+ Err(Error::UnexpectedEvent {
+ expected_event_name: "end".to_string(),
+ found_event: event.describe().unwrap(),
+ })
+ }
+
+ fn read_event<DeError>(&mut self) -> Result<Event<'static>, Error<DeError>>
+ {
+ let event = if let Some(leftover_event) = self.leftover_event.take() {
+ leftover_event
+ } else {
+ self.reader
+ .read_event_into(&mut self.buf)
+ .map_err(|err| Error::<DeError>::XMLError(err.into()))?
+ .into_owned()
+ };
+
+ if let Event::Eof = &event {
+ return Err(Error::UnexpectedEndOfFile);
+ }
+
+ Ok(event)
+ }
+}
diff --git a/src/deserializer/mod.rs b/src/deserializer/mod.rs
new file mode 100644
index 0000000..bd0c0e4
--- /dev/null
+++ b/src/deserializer/mod.rs
@@ -0,0 +1,179 @@
+//! Deserializer.
+use std::convert::Infallible;
+
+use crate::tagged::TagStart;
+use crate::DeserializeTagged;
+
+pub mod buffered;
+
+/// XML deserializer.
+pub trait Deserializer
+{
+ /// Deserializes a tagged element.
+ ///
+ /// # Errors
+ /// Returns `Err` if deserialization fails.
+ fn de_tag<De: DeserializeTagged>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ ) -> Result<De, Error<De::Error>>;
+
+ /// Deserializes a tagged element using the given function.
+ ///
+ /// # Errors
+ /// Returns `Err` if deserialization fails.
+ fn de_tag_with<Output, Err, DeserializeFn>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ deserialize: DeserializeFn,
+ ) -> Result<Output, Error<Err>>
+ where
+ Err: std::error::Error + Send + Sync + 'static,
+ DeserializeFn: FnOnce(&TagStart, &mut Self) -> Result<Output, Err>;
+
+ /// Deserializes a list of tagged elements.
+ ///
+ /// # Errors
+ /// Returns `Err` if deserialization fails.
+ fn de_tag_list<De: DeserializeTagged>(
+ &mut self,
+ tag_name: Option<&str>,
+ ) -> Result<Vec<De>, Error<De::Error>>;
+
+ /// Deserializes a text element.
+ ///
+ /// # Errors
+ /// Returns `Err` if deserialization fails.
+ fn de_text(&mut self) -> Result<String, Error<Infallible>>;
+
+ /// Skips past all elements until a tagged element with the name `tag_name` is
+ /// reached.
+ ///
+ /// # Errors
+ /// Returns `Err` if unsuccessful.
+ fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), Error<Infallible>>;
+
+ /// Skips past all elements until the end of a tagged element with the name `tag_name`
+ /// is reached.
+ ///
+ /// # Errors
+ /// Returns `Err` if unsuccessful.
+ fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), Error<Infallible>>;
+}
+
+/// Whether or not to skip the end tag of a tagged element.
+///
+/// **Should be `No`**.
+#[derive(Debug, Default)]
+pub enum IgnoreEnd
+{
+ /// Skip the end tag.
+ ///
+ /// **Will cause problems in most cases and should be used very carefully**.
+ Yes,
+
+ /// Don't skip the end tag.
+ #[default]
+ No,
+}
+
+/// [`Deserializer`] error.
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum Error<DeError>
+{
+ /// A XML error occurred.
+ #[error("A XML error occurred")]
+ XMLError(#[source] XMLError),
+
+ /// Failed to deserialize.
+ #[error("Failed to deserialize")]
+ DeserializeFailed(#[from] DeError),
+
+ /// Unexpected event.
+ #[error("Expected {expected_event_name} event. Found {found_event}")]
+ UnexpectedEvent
+ {
+ /// The name of the expected event.
+ expected_event_name: String,
+
+ /// The found event.
+ found_event: String,
+ },
+
+ /// Unexpected end of file.
+ #[error("Unexpected end of file")]
+ UnexpectedEndOfFile,
+}
+
+impl<DeError> Error<DeError>
+{
+ /// Returns `Self` with `DeError` as [`Infallible`].
+ ///
+ /// # Panics
+ /// Will panic if `Self` is the `DeserializeFailed` variant.
+ pub fn into_never_de_err(self) -> Error<Infallible>
+ {
+ match self {
+ Self::XMLError(xml_err) => Error::XMLError(xml_err),
+ Self::DeserializeFailed(_) => {
+ panic!("is a deserialization error");
+ }
+ Self::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ } => Error::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ },
+ Self::UnexpectedEndOfFile => Error::UnexpectedEndOfFile,
+ }
+ }
+}
+
+impl Error<Infallible>
+{
+ fn into_with_de_error<DeError>(self) -> Error<DeError>
+ {
+ match self {
+ Self::XMLError(xml_err) => Error::XMLError(xml_err),
+ Self::DeserializeFailed(_) => {
+ unreachable!();
+ }
+ Self::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ } => Error::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ },
+ Self::UnexpectedEndOfFile => Error::UnexpectedEndOfFile,
+ }
+ }
+}
+
+impl From<Error<Error<Infallible>>> for Error<Infallible>
+{
+ fn from(err: Error<Error<Infallible>>) -> Self
+ {
+ match err {
+ Error::XMLError(xml_err) => Self::XMLError(xml_err),
+ Error::DeserializeFailed(de_err) => de_err,
+ Error::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ } => Self::UnexpectedEvent {
+ expected_event_name,
+ found_event,
+ },
+ Error::UnexpectedEndOfFile => Self::UnexpectedEndOfFile,
+ }
+ }
+}
+
+/// XML error.
+#[derive(Debug, thiserror::Error)]
+#[error(transparent)]
+pub struct XMLError(#[from] quick_xml::Error);
diff --git a/src/event.rs b/src/event.rs
new file mode 100644
index 0000000..ae0624d
--- /dev/null
+++ b/src/event.rs
@@ -0,0 +1,54 @@
+use std::str::Utf8Error;
+
+use quick_xml::events::Event;
+
+#[allow(clippy::module_name_repetitions)]
+pub trait EventExt
+{
+ fn describe(&self) -> Result<String, Utf8Error>;
+}
+
+impl<'a> EventExt for Event<'a>
+{
+ fn describe(&self) -> Result<String, Utf8Error>
+ {
+ Ok(match self {
+ Event::Start(start) => {
+ format!(
+ "tag start with name \"{}\"",
+ std::str::from_utf8(start.name().as_ref())?
+ )
+ }
+ Event::End(end) => {
+ format!(
+ "tag end with name \"{}\"",
+ std::str::from_utf8(end.name().as_ref())?
+ )
+ }
+ Event::Empty(start) => {
+ format!(
+ "empty tag with name \"{}\"",
+ std::str::from_utf8(start.name().as_ref())?
+ )
+ }
+ Event::Text(text) => {
+ format!("text \"{}\"", std::str::from_utf8(text)?)
+ }
+ Event::Comment(comment) => {
+ format!("comment \"{}\"", std::str::from_utf8(comment)?)
+ }
+ Event::CData(cdata) => {
+ format!("cdata \"{}\"", std::str::from_utf8(cdata)?)
+ }
+ Event::Decl(_) => "XML declaration".to_string(),
+ Event::PI(processing_instruction) => {
+ format!(
+ "processing instruction \"{}\"",
+ std::str::from_utf8(processing_instruction)?
+ )
+ }
+ Event::DocType(_) => "doctype".to_string(),
+ Event::Eof => "end of file".to_string(),
+ })
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8b13789..e5086bc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1 +1,55 @@
+//! XML is awful.
+#![deny(clippy::all, clippy::pedantic, unsafe_code, missing_docs)]
+use crate::deserializer::{Deserializer, Error as DeserializerError};
+use crate::tagged::TagStart;
+pub mod attribute;
+pub mod deserializer;
+pub mod tagged;
+
+mod event;
+
+/// Trait implemented by types that want to be deserializable from tagged XML elements.
+pub trait DeserializeTagged: Sized
+{
+ /// Error type.
+ type Error: std::error::Error + Send + Sync + 'static;
+
+ /// Deserializes into a new `Self`.
+ ///
+ /// # Errors
+ /// When or if a error is returned is decided by the type implementing this trait.
+ fn deserialize<TDeserializer: Deserializer>(
+ start: &TagStart,
+ deserializer: &mut TDeserializer,
+ ) -> Result<Self, Self::Error>;
+}
+
+/// Result extension.
+pub trait ResultExt<Value, DeError>
+{
+ /// Returns `Ok(None)` if `Err` is `DeserializerError::UnexpectedEvent`.
+ fn try_event(self) -> Result<Option<Value>, DeserializerError<DeError>>;
+}
+
+impl<Value, DeError> ResultExt<Value, DeError>
+ for Result<Value, DeserializerError<DeError>>
+{
+ fn try_event(self) -> Result<Option<Value>, DeserializerError<DeError>>
+ {
+ self.map_or_else(
+ |err| {
+ if let DeserializerError::UnexpectedEvent {
+ expected_event_name: _,
+ found_event: _,
+ } = err
+ {
+ return Ok(None);
+ }
+
+ Err(err)
+ },
+ |value| Ok(Some(value)),
+ )
+ }
+}
diff --git a/src/tagged.rs b/src/tagged.rs
new file mode 100644
index 0000000..19ae03b
--- /dev/null
+++ b/src/tagged.rs
@@ -0,0 +1,62 @@
+//! Tagged element.
+
+use std::borrow::Cow;
+use std::str::Utf8Error;
+
+use quick_xml::events::BytesStart;
+
+use crate::attribute::Iter as AttributeIter;
+
+/// The start tag of a tagged element.
+///
+/// The `<xyz foo="bar">` in `<xyz foo="bar">Hello</xyz>`
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TagStart<'a>
+{
+ inner: BytesStart<'a>,
+}
+
+impl<'a> TagStart<'a>
+{
+ /// Returns a new `TagStart`.
+ pub fn new(name: impl Into<Cow<'a, str>>) -> Self
+ {
+ Self {
+ inner: BytesStart::new(name),
+ }
+ }
+
+ /// Returns the tag name.
+ #[must_use]
+ pub fn name(&self) -> &[u8]
+ {
+ let name_length = self.inner.name().as_ref().len();
+
+ &self.inner.as_ref()[..name_length]
+ }
+
+ /// Returns the tag name in UTF-8.
+ ///
+ /// # Errors
+ /// Returns `Err` if the name is not valid UTF-8.
+ pub fn name_utf8(&self) -> Result<&str, Utf8Error>
+ {
+ std::str::from_utf8(self.name())
+ }
+
+ /// Returns the tag attributes.
+ #[must_use]
+ pub fn attributes(&'a self) -> AttributeIter<'a>
+ {
+ AttributeIter::new(self.inner.attributes())
+ }
+}
+
+// Crate-local functions
+impl<'a> TagStart<'a>
+{
+ pub(crate) fn from_inner(inner: BytesStart<'a>) -> Self
+ {
+ Self { inner }
+ }
+}