aboutsummaryrefslogtreecommitdiff
path: root/src/deserialization
diff options
context:
space:
mode:
authorHampusM <hampus@hampusmat.com>2023-03-25 17:42:28 +0100
committerHampusM <hampus@hampusmat.com>2023-03-25 17:42:28 +0100
commitadd06dafdf874b1b419e5eef918c6b1131ab09fd (patch)
treec1d52d3ece248d96562a3d77beb44973e7720847 /src/deserialization
parentf49d77c2961be28c3cc500af185813dd5e83a367 (diff)
perf: improve XML deserialization speed
Diffstat (limited to 'src/deserialization')
-rw-r--r--src/deserialization/buffer_deserializer.rs210
-rw-r--r--src/deserialization/mod.rs124
2 files changed, 334 insertions, 0 deletions
diff --git a/src/deserialization/buffer_deserializer.rs b/src/deserialization/buffer_deserializer.rs
new file mode 100644
index 0000000..652e1ff
--- /dev/null
+++ b/src/deserialization/buffer_deserializer.rs
@@ -0,0 +1,210 @@
+use std::any::type_name;
+use std::error::Error;
+use std::io::BufRead;
+
+use quick_xml::events::{BytesStart, Event};
+use quick_xml::Reader;
+
+use crate::deserialization::{
+ Deserialize,
+ Deserializer,
+ DeserializerError,
+ IgnoreEnd,
+ WrappedDeserializeError,
+};
+
+macro_rules! read_event {
+ ($self: ident) => {{
+ let event = if let Some(leftover_event) = $self.leftover_event.take() {
+ leftover_event
+ } else {
+ $self.reader.read_event_into(&mut $self.buf)?.into_owned()
+ };
+
+ if let Event::Eof = &event {
+ return Err(DeserializerError::UnexpectedEndOfFile);
+ }
+
+ event
+ }};
+}
+
+pub struct BufferDeserializer<Source>
+{
+ reader: Reader<Source>,
+ leftover_event: Option<Event<'static>>,
+ buf: Vec<u8>,
+}
+
+impl<Source> BufferDeserializer<Source>
+where
+ Source: BufRead,
+{
+ pub fn new(source: Source) -> Self
+ {
+ let mut reader = Reader::from_reader(source);
+
+ reader.trim_text(true);
+ reader.expand_empty_elements(true);
+
+ Self {
+ reader,
+ leftover_event: None,
+ buf: Vec::new(),
+ }
+ }
+}
+
+impl<Source> Deserializer for BufferDeserializer<Source>
+where
+ Source: BufRead,
+{
+ fn de_tag<De: Deserialize>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ ) -> Result<De, DeserializerError>
+ {
+ self.de_tag_with(tag_name, ignore_end, De::deserialize)
+ }
+
+ fn de_tag_with<Output, Err, DeserializeFn>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ deserialize: DeserializeFn,
+ ) -> Result<Output, DeserializerError>
+ where
+ Err: Error + Send + Sync + 'static,
+ DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>,
+ {
+ let deserialized = match read_event!(self) {
+ Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => {
+ deserialize(&start, self).map_err(|err| {
+ DeserializerError::DeserializeFailed(
+ type_name::<Output>(),
+ WrappedDeserializeError::new(err),
+ )
+ })?
+ }
+ event => {
+ self.leftover_event = Some(event.clone().into_owned());
+
+ return Err(DeserializerError::UnexpectedEvent {
+ expected_event_name: format!("start({tag_name})"),
+ found_event: event,
+ });
+ }
+ };
+
+ if let IgnoreEnd::No = ignore_end {
+ self.read_end_event(tag_name)?;
+ }
+
+ Ok(deserialized)
+ }
+
+ fn de_tag_list<De: Deserialize>(
+ &mut self,
+ tag_name: &str,
+ ) -> Result<Vec<De>, DeserializerError>
+ {
+ let mut deserialized_items = Vec::new();
+
+ loop {
+ let start = match read_event!(self) {
+ Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => {
+ start
+ }
+ Event::Comment(_) => {
+ continue;
+ }
+ event => {
+ self.leftover_event = Some(event.into_owned());
+ break;
+ }
+ };
+
+ let deserialized = De::deserialize(&start, self).map_err(|err| {
+ DeserializerError::DeserializeFailed(
+ type_name::<De>(),
+ WrappedDeserializeError::new(err),
+ )
+ })?;
+
+ self.read_end_event(tag_name)?;
+
+ deserialized_items.push(deserialized);
+ }
+
+ Ok(deserialized_items)
+ }
+
+ fn de_text(&mut self) -> Result<String, DeserializerError>
+ {
+ let text = match read_event!(self) {
+ Event::Text(text) => Ok(text),
+ event => {
+ self.leftover_event = Some(event.clone().into_owned());
+
+ Err(DeserializerError::UnexpectedEvent {
+ expected_event_name: "text".to_string(),
+ found_event: event,
+ })
+ }
+ }?
+ .unescape()?;
+
+ Ok(text.to_string())
+ }
+
+ fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>
+ {
+ loop {
+ match read_event!(self) {
+ Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => {
+ self.leftover_event = Some(Event::Start(start).into_owned());
+
+ break;
+ }
+ _ => {}
+ }
+ }
+
+ Ok(())
+ }
+
+ fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>
+ {
+ loop {
+ match read_event!(self) {
+ Event::End(end) if end.name().as_ref() == tag_name.as_bytes() => {
+ self.leftover_event = Some(Event::End(end).into_owned());
+
+ return Ok(());
+ }
+ _ => {}
+ }
+ }
+ }
+}
+
+impl<Source> BufferDeserializer<Source>
+where
+ Source: BufRead,
+{
+ fn read_end_event(&mut self, tag_name: &str) -> Result<(), DeserializerError>
+ {
+ let event = read_event!(self);
+
+ if matches!(&event, Event::End(end) if end.name().as_ref() == tag_name.as_bytes())
+ {
+ return Ok(());
+ }
+
+ Err(DeserializerError::UnexpectedEvent {
+ expected_event_name: "end".to_string(),
+ found_event: event.into_owned(),
+ })
+ }
+}
diff --git a/src/deserialization/mod.rs b/src/deserialization/mod.rs
new file mode 100644
index 0000000..fa25e4b
--- /dev/null
+++ b/src/deserialization/mod.rs
@@ -0,0 +1,124 @@
+use std::error::Error;
+use std::ops::Deref;
+
+use quick_xml::events::{BytesStart, Event};
+
+pub mod buffer_deserializer;
+
+pub trait Deserialize: Sized
+{
+ type Error: Error + Send + Sync + 'static;
+
+ fn deserialize<TDeserializer: Deserializer>(
+ start: &BytesStart,
+ deserializer: &mut TDeserializer,
+ ) -> Result<Self, Self::Error>;
+}
+
+pub trait Deserializer
+{
+ fn de_tag<De: Deserialize>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ ) -> Result<De, DeserializerError>;
+
+ fn de_tag_with<Output, Err, DeserializeFn>(
+ &mut self,
+ tag_name: &str,
+ ignore_end: IgnoreEnd,
+ deserialize: DeserializeFn,
+ ) -> Result<Output, DeserializerError>
+ where
+ Err: Error + Send + Sync + 'static,
+ DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>;
+
+ fn de_tag_list<De: Deserialize>(
+ &mut self,
+ tag_name: &str,
+ ) -> Result<Vec<De>, DeserializerError>;
+
+ fn de_text(&mut self) -> Result<String, DeserializerError>;
+
+ fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>;
+
+ fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>;
+}
+
+pub enum IgnoreEnd
+{
+ Yes,
+ No,
+}
+
+/// Function pointer type passable to [`Deserializer::de_tag_with`].
+pub type DeserializeWithFn<Output, Err, Deserializer> =
+ fn(&BytesStart, &mut Deserializer) -> Result<Output, Err>;
+
+#[derive(Debug, thiserror::Error)]
+pub enum DeserializerError
+{
+ #[error("Failed to read")]
+ ReadFailed(#[from] quick_xml::Error),
+
+ #[error("Failed to deserialize {0}")]
+ DeserializeFailed(&'static str, #[source] WrappedDeserializeError),
+
+ #[error("Expected {expected_event_name} event. Found {found_event:?}")]
+ UnexpectedEvent
+ {
+ expected_event_name: String,
+ found_event: Event<'static>,
+ },
+
+ #[error("Unexpected end of file")]
+ UnexpectedEndOfFile,
+}
+
+#[derive(Debug, thiserror::Error)]
+#[error(transparent)]
+pub struct WrappedDeserializeError(Box<dyn Error + Send + Sync>);
+
+impl WrappedDeserializeError
+{
+ fn new<Err: Error + Send + Sync + 'static>(err: Err) -> Self
+ {
+ Self(Box::new(err))
+ }
+}
+
+impl Deref for WrappedDeserializeError
+{
+ type Target = dyn Error;
+
+ fn deref(&self) -> &Self::Target
+ {
+ self.0.as_ref()
+ }
+}
+
+pub trait ResultExt<Value>
+{
+ fn try_event(self) -> Result<Option<Value>, DeserializerError>;
+}
+
+impl<Value> ResultExt<Value> for Result<Value, DeserializerError>
+{
+ fn try_event(self) -> Result<Option<Value>, DeserializerError>
+ {
+ self.map_or_else(
+ |err| {
+ if let DeserializerError::UnexpectedEvent {
+ expected_event_name: _,
+ found_event: _,
+ } = err
+ {
+ return Ok(None);
+ }
+
+ Err(err)
+ },
+ |value| Ok(Some(value)),
+ )
+ }
+}