From add06dafdf874b1b419e5eef918c6b1131ab09fd Mon Sep 17 00:00:00 2001 From: HampusM Date: Sat, 25 Mar 2023 17:42:28 +0100 Subject: perf: improve XML deserialization speed --- src/command.rs | 559 +++++++++++++++++++++++++---- src/deserialization/buffer_deserializer.rs | 210 +++++++++++ src/deserialization/mod.rs | 124 +++++++ src/lib.rs | 70 ++-- src/xml/element.rs | 144 -------- src/xml/mod.rs | 2 - src/xml/parser.rs | 195 ---------- 7 files changed, 857 insertions(+), 447 deletions(-) create mode 100644 src/deserialization/buffer_deserializer.rs create mode 100644 src/deserialization/mod.rs delete mode 100644 src/xml/element.rs delete mode 100644 src/xml/mod.rs delete mode 100644 src/xml/parser.rs (limited to 'src') diff --git a/src/command.rs b/src/command.rs index c7ada95..2ba92ea 100644 --- a/src/command.rs +++ b/src/command.rs @@ -1,5 +1,14 @@ //! OpenGL command. -use crate::xml::element::{Elements, FromElements}; +use quick_xml::events::BytesStart; + +use crate::deserialization::{ + Deserialize, + DeserializeWithFn, + Deserializer, + DeserializerError, + IgnoreEnd, + ResultExt, +}; /// A command. #[derive(Debug, Clone, PartialEq, Eq)] @@ -38,25 +47,21 @@ impl Command } } -impl FromElements for Command +impl Deserialize for Command { type Error = Error; - fn from_elements( - elements: &crate::xml::element::Elements, + fn deserialize( + start: &BytesStart, + deserializer: &mut TDeserializer, ) -> Result { - let proto_element = elements - .get_first_tagged_element("proto") - .ok_or(Self::Error::MissingPrototype)?; + let prototype = deserializer.de_tag::("proto", IgnoreEnd::No)?; - let prototype = Prototype::from_elements(proto_element.child_elements())?; + let parameters = deserializer.de_tag_list::("param")?; - let parameters = elements - .get_all_tagged_elements_with_name("param") - .into_iter() - .map(|param_element| Parameter::from_elements(param_element.child_elements())) - .collect::, _>>()?; + deserializer + .skip_to_tag_end(std::str::from_utf8(start.name().as_ref()).unwrap())?; Ok(Self { prototype, @@ -80,6 +85,10 @@ pub enum Error /// Invalid parameter. #[error("Invalid parameter")] InvalidParameter(#[from] ParameterError), + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } /// A command prototype. @@ -116,23 +125,22 @@ impl Prototype } } -impl FromElements for Prototype +impl Deserialize for Prototype { type Error = PrototypeError; - fn from_elements( - elements: &crate::xml::element::Elements, + fn deserialize( + _start: &BytesStart, + deserializer: &mut TDeserializer, ) -> Result { - let name = elements - .get_first_tagged_element("name") - .ok_or(Self::Error::MissingName)? - .child_elements() - .get_first_text_element() - .cloned() - .unwrap_or_default(); + let return_type = deserialize_type::(deserializer)?; - let return_type = find_type(elements); + let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "name", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + )?; Ok(Self { name, return_type }) } @@ -145,6 +153,14 @@ pub enum PrototypeError /// No 'name' element was found. #[error("No 'name' element was found")] MissingName, + + /// No return type was found. + #[error("No return type was found")] + MissingReturnType, + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } /// A command parameter. @@ -181,21 +197,22 @@ impl Parameter } } -impl FromElements for Parameter +impl Deserialize for Parameter { type Error = ParameterError; - fn from_elements(elements: &Elements) -> Result + fn deserialize( + _start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result { - let name = elements - .get_first_tagged_element("name") - .ok_or(Self::Error::MissingName)? - .child_elements() - .get_first_text_element() - .cloned() - .unwrap_or_default(); + let ty = deserialize_type::(deserializer)?; - let ty = find_type(elements); + let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "name", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + )?; Ok(Self { name, ty }) } @@ -208,46 +225,47 @@ pub enum ParameterError /// No 'name' element was found. #[error("No 'name' element was found")] MissingName, + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } -fn find_type(elements: &Elements) -> String +fn deserialize_type(deserializer: &mut impl Deserializer) -> Result +where + Err: From, { - let text_type_parts = elements - .get_all_text_elements() - .into_iter() - .map(|text_type_part| text_type_part.trim()) - .filter(|text_type_part| !text_type_part.is_empty()) - .collect::>(); + let type_before = deserializer.de_text().try_event()?; - let opt_ptype_text = get_ptype_text(elements); + let type_ptype = deserializer + .de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "ptype", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + ) + .try_event()?; - opt_ptype_text.map_or_else( - || join_space_strs(text_type_parts.iter()), - |ptype_text| { - let Some(first_part) = text_type_parts.first() else { - return ptype_text.clone(); - }; + let type_after = deserializer.de_text().try_event()?; - let before = if *first_part == "const" { "const " } else { "" }; + let type_before_after = [type_before.clone(), type_after.clone()] + .into_iter() + .flatten(); - let after_start_index = usize::from(*first_part == "const"); + Ok(type_ptype.map_or_else( + || join_space_strs(type_before_after), + |ptype_text| { + let before = type_before + .map(|before| format!("{before} ")) + .unwrap_or_default(); format!( - "{before}{ptype_text} {}", - text_type_parts - .get(after_start_index..) - .map(|parts| join_space_strs(parts.iter())) + "{before}{ptype_text}{}", + type_after + .map(|after| format!(" {after}")) .unwrap_or_default() ) }, - ) -} - -fn get_ptype_text(elements: &Elements) -> Option<&String> -{ - let ptype_element = elements.get_first_tagged_element("ptype")?; - - ptype_element.child_elements().get_first_text_element() + )) } fn join_space_strs(strings: Strings) -> String @@ -261,3 +279,420 @@ where .collect::>() .join(" ") } + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_str_eq; + use quick_xml::events::Event; + use ridicule::mock; + use ridicule::predicate::{always, eq, function}; + + use super::*; + + mock! { + MockDeserializer {} + + impl Deserializer for MockDeserializer { + fn de_tag( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result; + + fn de_tag_with( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result + where + Err: std::error::Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut MockDeserializer) -> Result; + + fn de_tag_list( + &mut self, + tag_name: &str + ) -> Result, DeserializerError>; + + fn de_text(&mut self) -> Result; + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + } + } + + #[test] + fn deserialize_prototype_works_with_ptype() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLuint".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("glDoComplicatedThing".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let prototype = + Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(prototype.name, "glDoComplicatedThing"); + assert_str_eq!(prototype.return_type, "GLuint"); + } + + #[test] + fn deserialize_prototype_works_with_text() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("void".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|_, _, _, _| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "start".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("glDoSomeThing".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let prototype = + Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(prototype.name, "glDoSomeThing"); + assert_str_eq!(prototype.return_type, "void"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_only() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLenum".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("value".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "value"); + assert_str_eq!(parameter.ty, "GLenum"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_and_text_after() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLchar".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("*".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("source".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "source"); + assert_str_eq!(parameter.ty, "GLchar *"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_and_text_before_and_after() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("const".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLchar".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("*".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("name".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "name"); + assert_str_eq!(parameter.ty, "const GLchar *"); + } + + #[test] + fn deserialize_parameter_works_with_text() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("void *".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|_, _, _, _| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "start".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("pixels".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "pixels"); + assert_str_eq!(parameter.ty, "void *"); + } +} diff --git a/src/deserialization/buffer_deserializer.rs b/src/deserialization/buffer_deserializer.rs new file mode 100644 index 0000000..652e1ff --- /dev/null +++ b/src/deserialization/buffer_deserializer.rs @@ -0,0 +1,210 @@ +use std::any::type_name; +use std::error::Error; +use std::io::BufRead; + +use quick_xml::events::{BytesStart, Event}; +use quick_xml::Reader; + +use crate::deserialization::{ + Deserialize, + Deserializer, + DeserializerError, + IgnoreEnd, + WrappedDeserializeError, +}; + +macro_rules! read_event { + ($self: ident) => {{ + let event = if let Some(leftover_event) = $self.leftover_event.take() { + leftover_event + } else { + $self.reader.read_event_into(&mut $self.buf)?.into_owned() + }; + + if let Event::Eof = &event { + return Err(DeserializerError::UnexpectedEndOfFile); + } + + event + }}; +} + +pub struct BufferDeserializer +{ + reader: Reader, + leftover_event: Option>, + buf: Vec, +} + +impl BufferDeserializer +where + Source: BufRead, +{ + pub fn new(source: Source) -> Self + { + let mut reader = Reader::from_reader(source); + + reader.trim_text(true); + reader.expand_empty_elements(true); + + Self { + reader, + leftover_event: None, + buf: Vec::new(), + } + } +} + +impl Deserializer for BufferDeserializer +where + Source: BufRead, +{ + fn de_tag( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result + { + self.de_tag_with(tag_name, ignore_end, De::deserialize) + } + + fn de_tag_with( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result + where + Err: Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result, + { + let deserialized = match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + deserialize(&start, self).map_err(|err| { + DeserializerError::DeserializeFailed( + type_name::(), + WrappedDeserializeError::new(err), + ) + })? + } + event => { + self.leftover_event = Some(event.clone().into_owned()); + + return Err(DeserializerError::UnexpectedEvent { + expected_event_name: format!("start({tag_name})"), + found_event: event, + }); + } + }; + + if let IgnoreEnd::No = ignore_end { + self.read_end_event(tag_name)?; + } + + Ok(deserialized) + } + + fn de_tag_list( + &mut self, + tag_name: &str, + ) -> Result, DeserializerError> + { + let mut deserialized_items = Vec::new(); + + loop { + let start = match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + start + } + Event::Comment(_) => { + continue; + } + event => { + self.leftover_event = Some(event.into_owned()); + break; + } + }; + + let deserialized = De::deserialize(&start, self).map_err(|err| { + DeserializerError::DeserializeFailed( + type_name::(), + WrappedDeserializeError::new(err), + ) + })?; + + self.read_end_event(tag_name)?; + + deserialized_items.push(deserialized); + } + + Ok(deserialized_items) + } + + fn de_text(&mut self) -> Result + { + let text = match read_event!(self) { + Event::Text(text) => Ok(text), + event => { + self.leftover_event = Some(event.clone().into_owned()); + + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: event, + }) + } + }? + .unescape()?; + + Ok(text.to_string()) + } + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + loop { + match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + self.leftover_event = Some(Event::Start(start).into_owned()); + + break; + } + _ => {} + } + } + + Ok(()) + } + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + loop { + match read_event!(self) { + Event::End(end) if end.name().as_ref() == tag_name.as_bytes() => { + self.leftover_event = Some(Event::End(end).into_owned()); + + return Ok(()); + } + _ => {} + } + } + } +} + +impl BufferDeserializer +where + Source: BufRead, +{ + fn read_end_event(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + let event = read_event!(self); + + if matches!(&event, Event::End(end) if end.name().as_ref() == tag_name.as_bytes()) + { + return Ok(()); + } + + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "end".to_string(), + found_event: event.into_owned(), + }) + } +} diff --git a/src/deserialization/mod.rs b/src/deserialization/mod.rs new file mode 100644 index 0000000..fa25e4b --- /dev/null +++ b/src/deserialization/mod.rs @@ -0,0 +1,124 @@ +use std::error::Error; +use std::ops::Deref; + +use quick_xml::events::{BytesStart, Event}; + +pub mod buffer_deserializer; + +pub trait Deserialize: Sized +{ + type Error: Error + Send + Sync + 'static; + + fn deserialize( + start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result; +} + +pub trait Deserializer +{ + fn de_tag( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result; + + fn de_tag_with( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result + where + Err: Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result; + + fn de_tag_list( + &mut self, + tag_name: &str, + ) -> Result, DeserializerError>; + + fn de_text(&mut self) -> Result; + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; +} + +pub enum IgnoreEnd +{ + Yes, + No, +} + +/// Function pointer type passable to [`Deserializer::de_tag_with`]. +pub type DeserializeWithFn = + fn(&BytesStart, &mut Deserializer) -> Result; + +#[derive(Debug, thiserror::Error)] +pub enum DeserializerError +{ + #[error("Failed to read")] + ReadFailed(#[from] quick_xml::Error), + + #[error("Failed to deserialize {0}")] + DeserializeFailed(&'static str, #[source] WrappedDeserializeError), + + #[error("Expected {expected_event_name} event. Found {found_event:?}")] + UnexpectedEvent + { + expected_event_name: String, + found_event: Event<'static>, + }, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct WrappedDeserializeError(Box); + +impl WrappedDeserializeError +{ + fn new(err: Err) -> Self + { + Self(Box::new(err)) + } +} + +impl Deref for WrappedDeserializeError +{ + type Target = dyn Error; + + fn deref(&self) -> &Self::Target + { + self.0.as_ref() + } +} + +pub trait ResultExt +{ + fn try_event(self) -> Result, DeserializerError>; +} + +impl ResultExt for Result +{ + fn try_event(self) -> Result, DeserializerError> + { + self.map_or_else( + |err| { + if let DeserializerError::UnexpectedEvent { + expected_event_name: _, + found_event: _, + } = err + { + return Ok(None); + } + + Err(err) + }, + |value| Ok(Some(value)), + ) + } +} diff --git a/src/lib.rs b/src/lib.rs index c305ae3..f3bbc54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,20 +24,24 @@ use std::fs::File; use std::io::Read; +use quick_xml::events::BytesStart; + use crate::command::{Command, Error as CommandError}; -use crate::xml::element::{Element, Elements, FromElements}; -use crate::xml::parser::{Error as ParserError, Parser}; +use crate::deserialization::buffer_deserializer::BufferDeserializer; +use crate::deserialization::{Deserialize, Deserializer, DeserializerError, IgnoreEnd}; pub mod command; -mod xml; +mod deserialization; +/// XML. #[cfg(feature = "include-xml")] const GL_REGISTRY_XML: &[u8] = include_bytes!("../OpenGL-Registry/xml/gl.xml"); const REGISTRY_TAG_NAME: &str = "registry"; /// Representation of the OpenGL registry. +#[derive(Debug, PartialEq, Eq)] pub struct Registry { commands: Vec, @@ -62,15 +66,12 @@ impl Registry /// Returns `Err` if parsing fails in any way. pub fn retrieve_from_bytes(xml_bytes: &[u8]) -> Result { - let mut parser = Parser::new(xml_bytes); - - let elements = parser.parse().map_err(ParsingError)?; + let mut deserializer = BufferDeserializer::new(xml_bytes); - let registry_element = elements - .get_first_tagged_element(REGISTRY_TAG_NAME) - .ok_or(RegistryError::MissingRegistryElement)?; + deserializer.skip_to_tag_start(REGISTRY_TAG_NAME)?; - let registry = Registry::from_elements(registry_element.child_elements())?; + let registry = + deserializer.de_tag::(REGISTRY_TAG_NAME, IgnoreEnd::Yes)?; Ok(registry) } @@ -110,35 +111,21 @@ impl Registry } } -impl FromElements for Registry +impl Deserialize for Registry { type Error = RegistryError; - fn from_elements(elements: &Elements) -> Result + fn deserialize( + _start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result { - let commands_element = elements - .get_first_tagged_element("commands") - .ok_or(Self::Error::MissingCommandsElement)?; - - let command_elements = - commands_element - .child_elements() - .into_iter() - .filter_map(|element| match element { - Element::Tagged(tagged_element) - if tagged_element.name() == "command" => - { - Some(tagged_element) - } - _ => None, - }); - - let commands = command_elements - .into_iter() - .map(|command_element| { - Command::from_elements(command_element.child_elements()) - }) - .collect::, _>>()?; + deserializer.skip_to_tag_start("commands")?; + + let commands = + deserializer.de_tag_with("commands", IgnoreEnd::No, |_, deserializer| { + deserializer.de_tag_list::("command") + })?; Ok(Self { commands }) } @@ -160,16 +147,11 @@ pub enum RegistryError #[error("Invalid command")] InvalidCommand(#[from] CommandError), - /// Parsing failed. - #[error("Parsing failed")] - ParsingFailed(#[from] ParsingError), - /// I/O failed. #[error("I/O failed")] IOFailed(#[from] std::io::Error), -} -/// Parsing error. -#[derive(Debug, thiserror::Error)] -#[error(transparent)] -pub struct ParsingError(#[from] ParserError); + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), +} diff --git a/src/xml/element.rs b/src/xml/element.rs deleted file mode 100644 index f469480..0000000 --- a/src/xml/element.rs +++ /dev/null @@ -1,144 +0,0 @@ -#[derive(Debug, PartialEq, Eq)] -pub struct Elements -{ - elements: Vec, -} - -impl Elements -{ - pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> - { - self.elements.iter().find_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - } - - pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - .collect() - } - - pub fn get_first_text_element(&self) -> Option<&String> - { - self.elements.iter().find_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - } - - pub fn get_all_text_elements(&self) -> Vec<&String> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - .collect() - } - - pub fn has_tagged_element(&self, tag_name: &str) -> bool - { - self.elements.iter().any(|element| { - matches!( - element, - Element::Tagged(tagged_element) if tagged_element.name == tag_name - ) - }) - } -} - -impl> From for Elements -{ - fn from(into_iter: IntoIter) -> Self - { - Self { - elements: into_iter.into_iter().collect(), - } - } -} - -impl<'elements> IntoIterator for &'elements Elements -{ - type IntoIter = Iter<'elements>; - type Item = &'elements Element; - - fn into_iter(self) -> Self::IntoIter - { - Self::IntoIter { - elements: self.elements.iter(), - } - } -} - -pub struct Iter<'elements> -{ - elements: std::slice::Iter<'elements, Element>, -} - -impl<'elements> Iterator for Iter<'elements> -{ - type Item = &'elements Element; - - fn next(&mut self) -> Option - { - self.elements.next() - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Element -{ - Tagged(Tagged), - Text(String), - Comment(String), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Tagged -{ - name: String, - child_elements: Elements, -} - -impl Tagged -{ - pub fn new(name: &Name, child_elements: ChildElements) -> Self - where - Name: ToString, - ChildElements: Into, - { - Self { - name: name.to_string(), - child_elements: child_elements.into(), - } - } - - pub fn name(&self) -> &str - { - &self.name - } - - pub fn child_elements(&self) -> &Elements - { - &self.child_elements - } -} - -pub trait FromElements: Sized -{ - type Error; - - fn from_elements(elements: &Elements) -> Result; -} diff --git a/src/xml/mod.rs b/src/xml/mod.rs deleted file mode 100644 index 12368c3..0000000 --- a/src/xml/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod element; -pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser -{ - reader: Reader, -} - -impl Parser -{ - pub fn new(src: Source) -> Self - { - Self { - reader: Reader::from_reader(src), - } - } - - pub fn parse(&mut self) -> Result - { - let mut buf = Vec::new(); - - let mut elements = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - let element = match self.handle_event(event)? { - EventHandlingResult::Element(element) => element, - EventHandlingResult::Event(_) => { - continue; - } - EventHandlingResult::End => { - break; - } - }; - - elements.push(element); - } - - Ok(elements.into()) - } - - fn parse_text(text: &BytesText) -> Result - { - String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) - } - - fn parse_tagged(&mut self, start: &BytesStart) -> Result - { - let mut child_elements = Vec::new(); - - let mut buf = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - match event { - Event::End(end) if end.name() == start.name() => { - break; - } - event => match self.handle_event(event)? { - EventHandlingResult::Element(element) => { - child_elements.push(element); - } - EventHandlingResult::End => { - return Err(Error::UnexpectedEndOfFile); - } - EventHandlingResult::Event(_) => {} - }, - } - } - - Ok(Element::Tagged(Tagged::new( - &String::from_utf8(start.name().as_ref().to_vec()) - .map_err(|_| Error::TagNameNotUTF8)?, - child_elements, - ))) - } - - fn handle_event<'a>( - &'a mut self, - event: Event<'a>, - ) -> Result - { - match event { - Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( - Self::parse_text(&text)?, - ))), - Event::Start(start) => { - Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) - } - Event::End(_) => Err(Error::UnexpectedTagEnd), - Event::Eof => Ok(EventHandlingResult::End), - Event::Comment(comment_text) => Ok(EventHandlingResult::Element( - Element::Comment(Self::parse_text(&comment_text)?), - )), - event => Ok(EventHandlingResult::Event(event)), - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ - #[error(transparent)] - QuickXMLFailed(#[from] quick_xml::Error), - - #[error("Text is not UTF-8")] - TextNotUTF8, - - #[error("Tag name is not UTF-8")] - TagNameNotUTF8, - - #[error("Unexpectedly found the end of a tag")] - UnexpectedTagEnd, - - #[error("Unexpected end of file")] - UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ - Element(Element), - Event(Event<'event>), - End, -} - -#[cfg(test)] -mod tests -{ - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn can_parse() - { - let mut parser = Parser::new("Hello there".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![Element::Text("Hello there".to_string())] - ))]) - ); - - let mut parser = Parser::new("123 Hello".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Tagged(Tagged::new( - &"bar", - Elements::from(vec![Element::Text("123".to_string())]) - )), - Element::Text(" Hello".to_string()) - ] - ))]) - ); - - let mut parser = Parser::new("".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(Vec::new()) - ); - - let mut parser = Parser::new( - "Hello there123".as_bytes(), - ); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Comment("XML is awful".to_string()), - Element::Text("Hello there".to_string()), - Element::Tagged(Tagged::new( - &"bar", - vec![Element::Text("123".to_string())] - )), - ] - ))]) - ); - } -} -- cgit v1.2.3-18-g5258