From 1a405580b30f0b877ca48cd95ca51da9e5723667 Mon Sep 17 00:00:00 2001 From: HampusM Date: Sun, 14 May 2023 15:46:53 +0200 Subject: feat: add Attribute functions --- src/attribute.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++------ src/escape.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 src/escape.rs diff --git a/src/attribute.rs b/src/attribute.rs index 8fb4778..4ae2142 100644 --- a/src/attribute.rs +++ b/src/attribute.rs @@ -1,30 +1,78 @@ //! Attribute. +use std::borrow::Cow; +use std::str::Utf8Error; + use quick_xml::events::attributes::{ AttrError, Attribute as QuickXMLAttribute, Attributes, }; +use quick_xml::name::QName; +use quick_xml::Error as QuickXMLError; + +use crate::escape::EscapeError; /// Represent a XML attribute. #[derive(Debug, Clone, PartialEq)] -pub struct Attribute<'a> +pub struct Attribute<'data> { - inner: QuickXMLAttribute<'a>, + inner: QuickXMLAttribute<'data>, } -impl<'a> Attribute<'a> +impl<'data> Attribute<'data> { - /// Attribute key. + /// Returns a new `Attribute`. + pub fn new(key: &'data impl AsRef<[u8]>, value: impl Into>) -> Self + { + Self { + inner: QuickXMLAttribute { + key: QName(key.as_ref()), + value: value.into(), + }, + } + } + + /// Returns the key. + /// + /// # Errors + /// Will return `Err` if the key is invalid UTF-8. + pub fn key(&self) -> Result<&str, Error> + { + std::str::from_utf8(self.key_bytes()).map_err(Error::KeyNotUTF8) + } + + /// Returns the key as bytes. #[must_use] - pub fn key(&self) -> &[u8] + pub fn key_bytes(&self) -> &[u8] { self.inner.key.as_ref() } - /// Attribute value. + /// Returns the value. + /// + /// # Errors + /// Will return `Err` if: + /// - The value is invalid UTF-8 + /// - Unescaping the value fails + pub fn value(&self) -> Result, Error> + { + self.inner.unescape_value().map_err(|err| match err { + QuickXMLError::NonDecodable(Some(utf8_error)) => { + Error::ValueNotUTF8(utf8_error) + } + QuickXMLError::EscapeError(escape_err) => { + Error::UnescapeValueFailed(EscapeError::from_quick_xml(escape_err)) + } + _ => { + unreachable!(); + } + }) + } + + /// Returns the value as bytes. They may or may not be escaped. #[must_use] - pub fn value(&self) -> &[u8] + pub fn value_bytes(&self) -> &[u8] { &self.inner.value } @@ -124,6 +172,18 @@ pub enum Error /// ``` #[error("Position {0}: duplicated attribute, previous declaration at position {1}")] Duplicated(usize, usize), + + /// Attribute key is not valid UTF-8. + #[error("Attribute key is not valid UTF-8")] + KeyNotUTF8(#[source] Utf8Error), + + /// Attribute value is not valid UTF-8. + #[error("Attribute value is not valid UTF-8")] + ValueNotUTF8(#[source] Utf8Error), + + /// Failed to unescape value. + #[error("Failed to unescape value")] + UnescapeValueFailed(#[source] EscapeError), } impl From for Error diff --git a/src/escape.rs b/src/escape.rs new file mode 100644 index 0000000..c58a5ae --- /dev/null +++ b/src/escape.rs @@ -0,0 +1,74 @@ +//! XML character escape things. +use std::ops::Range; + +use quick_xml::escape::EscapeError as QuickXMLEscapeError; + +/// Escape/unescape error. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +#[allow(clippy::module_name_repetitions)] +pub enum EscapeError +{ + /// Null character entity not allowed. + #[error( + "Error while escaping character at range {0:?}: Null character entity not allowed" + )] + EntityWithNull(Range), + + /// Unrecognized escape symbol. + #[error( + "Error while escaping character at range {0:?}: Unrecognized escape symbol: {1:?}" + )] + UnrecognizedSymbol(Range, String), + + /// Missing ; after &. + #[error("Error while escaping character at range {0:?}: Cannot find ';' after '&'")] + UnterminatedEntity(Range), + + /// Hexadecimal value is too long too convert to UTF-8. + #[error("Hexadecimal value is too long to convert to UTF-8")] + TooLongHexadecimal, + + /// Invalid hexadecimal character. + #[error("{0}' is not a valid hexadecimal character")] + InvalidHexadecimal(char), + + /// Decimal value is too long to convert to UTF-8. + #[error("Decimal value is too long to convert to UTF-8")] + TooLongDecimal, + + /// Invalid decimal character. + #[error("'{0}' is not a valid decimal character")] + InvalidDecimal(char), + + /// Invalid codepoint. + #[error("'{0}' is not a valid codepoint")] + InvalidCodepoint(u32), +} + +impl EscapeError +{ + pub(crate) fn from_quick_xml(err: QuickXMLEscapeError) -> Self + { + match err { + QuickXMLEscapeError::EntityWithNull(range) => Self::EntityWithNull(range), + QuickXMLEscapeError::UnrecognizedSymbol(range, symbol) => { + Self::UnrecognizedSymbol(range, symbol) + } + QuickXMLEscapeError::UnterminatedEntity(range) => { + Self::UnterminatedEntity(range) + } + QuickXMLEscapeError::TooLongHexadecimal => Self::TooLongHexadecimal, + QuickXMLEscapeError::InvalidHexadecimal(character) => { + Self::InvalidHexadecimal(character) + } + QuickXMLEscapeError::TooLongDecimal => Self::TooLongDecimal, + QuickXMLEscapeError::InvalidDecimal(character) => { + Self::InvalidDecimal(character) + } + QuickXMLEscapeError::InvalidCodepoint(codepoint) => { + Self::InvalidCodepoint(codepoint) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 9cf3716..c07e0a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ use crate::tagged::TagStart; pub mod attribute; pub mod deserializer; +pub mod escape; pub mod tagged; mod event; -- cgit v1.2.3-18-g5258