diff options
author | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
---|---|---|
committer | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 |
commit | add06dafdf874b1b419e5eef918c6b1131ab09fd (patch) | |
tree | c1d52d3ece248d96562a3d77beb44973e7720847 | |
parent | f49d77c2961be28c3cc500af185813dd5e83a367 (diff) |
perf: improve XML deserialization speed
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | benches/registry.rs | 2 | ||||
-rw-r--r-- | src/command.rs | 559 | ||||
-rw-r--r-- | src/deserialization/buffer_deserializer.rs | 210 | ||||
-rw-r--r-- | src/deserialization/mod.rs | 124 | ||||
-rw-r--r-- | src/lib.rs | 70 | ||||
-rw-r--r-- | src/xml/element.rs | 144 | ||||
-rw-r--r-- | src/xml/mod.rs | 2 | ||||
-rw-r--r-- | src/xml/parser.rs | 195 |
9 files changed, 859 insertions, 448 deletions
@@ -30,6 +30,7 @@ thiserror = "1.0.38" [dev-dependencies] pretty_assertions = "1.3.0" criterion = "0.4.0" +ridicule = "0.2.0" [[bench]] name = "registry" diff --git a/benches/registry.rs b/benches/registry.rs index 9bfe9c4..27dc65d 100644 --- a/benches/registry.rs +++ b/benches/registry.rs @@ -4,7 +4,7 @@ use opengl_registry::Registry; fn bench_registry(crit: &mut Criterion) { crit.bench_function("retrieve registry from included XML file", |bencher| { - bencher.iter(Registry::retrieve) + bencher.iter(|| Registry::retrieve().unwrap()) }); } diff --git a/src/command.rs b/src/command.rs index c7ada95..2ba92ea 100644 --- a/src/command.rs +++ b/src/command.rs @@ -1,5 +1,14 @@ //! OpenGL command. -use crate::xml::element::{Elements, FromElements}; +use quick_xml::events::BytesStart; + +use crate::deserialization::{ + Deserialize, + DeserializeWithFn, + Deserializer, + DeserializerError, + IgnoreEnd, + ResultExt, +}; /// A command. #[derive(Debug, Clone, PartialEq, Eq)] @@ -38,25 +47,21 @@ impl Command } } -impl FromElements for Command +impl Deserialize for Command { type Error = Error; - fn from_elements( - elements: &crate::xml::element::Elements, + fn deserialize<TDeserializer: Deserializer>( + start: &BytesStart, + deserializer: &mut TDeserializer, ) -> Result<Self, Self::Error> { - let proto_element = elements - .get_first_tagged_element("proto") - .ok_or(Self::Error::MissingPrototype)?; + let prototype = deserializer.de_tag::<Prototype>("proto", IgnoreEnd::No)?; - let prototype = Prototype::from_elements(proto_element.child_elements())?; + let parameters = deserializer.de_tag_list::<Parameter>("param")?; - let parameters = elements - .get_all_tagged_elements_with_name("param") - .into_iter() - .map(|param_element| Parameter::from_elements(param_element.child_elements())) - .collect::<Result<Vec<_>, _>>()?; + deserializer + .skip_to_tag_end(std::str::from_utf8(start.name().as_ref()).unwrap())?; Ok(Self { prototype, @@ -80,6 +85,10 @@ pub enum Error /// Invalid parameter. #[error("Invalid parameter")] InvalidParameter(#[from] ParameterError), + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } /// A command prototype. @@ -116,23 +125,22 @@ impl Prototype } } -impl FromElements for Prototype +impl Deserialize for Prototype { type Error = PrototypeError; - fn from_elements( - elements: &crate::xml::element::Elements, + fn deserialize<TDeserializer: Deserializer>( + _start: &BytesStart, + deserializer: &mut TDeserializer, ) -> Result<Self, Self::Error> { - let name = elements - .get_first_tagged_element("name") - .ok_or(Self::Error::MissingName)? - .child_elements() - .get_first_text_element() - .cloned() - .unwrap_or_default(); + let return_type = deserialize_type::<PrototypeError>(deserializer)?; - let return_type = find_type(elements); + let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "name", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + )?; Ok(Self { name, return_type }) } @@ -145,6 +153,14 @@ pub enum PrototypeError /// No 'name' element was found. #[error("No 'name' element was found")] MissingName, + + /// No return type was found. + #[error("No return type was found")] + MissingReturnType, + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } /// A command parameter. @@ -181,21 +197,22 @@ impl Parameter } } -impl FromElements for Parameter +impl Deserialize for Parameter { type Error = ParameterError; - fn from_elements(elements: &Elements) -> Result<Self, Self::Error> + fn deserialize<TDeserializer: Deserializer>( + _start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result<Self, Self::Error> { - let name = elements - .get_first_tagged_element("name") - .ok_or(Self::Error::MissingName)? - .child_elements() - .get_first_text_element() - .cloned() - .unwrap_or_default(); + let ty = deserialize_type::<ParameterError>(deserializer)?; - let ty = find_type(elements); + let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "name", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + )?; Ok(Self { name, ty }) } @@ -208,46 +225,47 @@ pub enum ParameterError /// No 'name' element was found. #[error("No 'name' element was found")] MissingName, + + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), } -fn find_type(elements: &Elements) -> String +fn deserialize_type<Err>(deserializer: &mut impl Deserializer) -> Result<String, Err> +where + Err: From<DeserializerError>, { - let text_type_parts = elements - .get_all_text_elements() - .into_iter() - .map(|text_type_part| text_type_part.trim()) - .filter(|text_type_part| !text_type_part.is_empty()) - .collect::<Vec<_>>(); + let type_before = deserializer.de_text().try_event()?; - let opt_ptype_text = get_ptype_text(elements); + let type_ptype = deserializer + .de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( + "ptype", + IgnoreEnd::No, + |_, deserializer| deserializer.de_text(), + ) + .try_event()?; - opt_ptype_text.map_or_else( - || join_space_strs(text_type_parts.iter()), - |ptype_text| { - let Some(first_part) = text_type_parts.first() else { - return ptype_text.clone(); - }; + let type_after = deserializer.de_text().try_event()?; - let before = if *first_part == "const" { "const " } else { "" }; + let type_before_after = [type_before.clone(), type_after.clone()] + .into_iter() + .flatten(); - let after_start_index = usize::from(*first_part == "const"); + Ok(type_ptype.map_or_else( + || join_space_strs(type_before_after), + |ptype_text| { + let before = type_before + .map(|before| format!("{before} ")) + .unwrap_or_default(); format!( - "{before}{ptype_text} {}", - text_type_parts - .get(after_start_index..) - .map(|parts| join_space_strs(parts.iter())) + "{before}{ptype_text}{}", + type_after + .map(|after| format!(" {after}")) .unwrap_or_default() ) }, - ) -} - -fn get_ptype_text(elements: &Elements) -> Option<&String> -{ - let ptype_element = elements.get_first_tagged_element("ptype")?; - - ptype_element.child_elements().get_first_text_element() + )) } fn join_space_strs<Strings, StrItem>(strings: Strings) -> String @@ -261,3 +279,420 @@ where .collect::<Vec<_>>() .join(" ") } + +#[cfg(test)] +mod tests +{ + use pretty_assertions::assert_str_eq; + use quick_xml::events::Event; + use ridicule::mock; + use ridicule::predicate::{always, eq, function}; + + use super::*; + + mock! { + MockDeserializer {} + + impl Deserializer for MockDeserializer { + fn de_tag<De: Deserialize>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result<De, DeserializerError>; + + fn de_tag_with<Output, Err, DeserializeFn>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result<Output, DeserializerError> + where + Err: std::error::Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut MockDeserializer) -> Result<Output, Err>; + + fn de_tag_list<De: Deserialize>( + &mut self, + tag_name: &str + ) -> Result<Vec<De>, DeserializerError>; + + fn de_text(&mut self) -> Result<String, DeserializerError>; + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + } + } + + #[test] + fn deserialize_prototype_works_with_ptype() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLuint".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("glDoComplicatedThing".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let prototype = + Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(prototype.name, "glDoComplicatedThing"); + assert_str_eq!(prototype.return_type, "GLuint"); + } + + #[test] + fn deserialize_prototype_works_with_text() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("void".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|_, _, _, _| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "start".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("glDoSomeThing".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let prototype = + Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(prototype.name, "glDoSomeThing"); + assert_str_eq!(prototype.return_type, "void"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_only() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLenum".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("value".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "value"); + assert_str_eq!(parameter.ty, "GLenum"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_and_text_after() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("ptype")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLchar".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("*".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("source".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "source"); + assert_str_eq!(parameter.ty, "GLchar *"); + } + + #[test] + fn deserialize_parameter_works_with_ptype_and_text_before_and_after() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("const".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("GLchar".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("*".to_string())) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("name".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "name"); + assert_str_eq!(parameter.ty, "const GLchar *"); + } + + #[test] + fn deserialize_parameter_works_with_text() + { + let mut mock_deserializer = MockDeserializer::new(); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("void *".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("ptype"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|_, _, _, _| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "start".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| { + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: Event::Start(BytesStart::new("name")), + }) + }) + .times(1); + + mock_deserializer + .expect_de_text() + .returning(|_| Ok("pixels".to_string())) + .times(1); + + mock_deserializer + .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() + .with( + eq("name"), + function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), + always(), + ) + .returning(|deserializer, tag_name, _, func| { + func(&BytesStart::new(tag_name), deserializer) + }) + .times(1); + + let parameter = + Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) + .expect("Expected Ok"); + + assert_str_eq!(parameter.name, "pixels"); + assert_str_eq!(parameter.ty, "void *"); + } +} diff --git a/src/deserialization/buffer_deserializer.rs b/src/deserialization/buffer_deserializer.rs new file mode 100644 index 0000000..652e1ff --- /dev/null +++ b/src/deserialization/buffer_deserializer.rs @@ -0,0 +1,210 @@ +use std::any::type_name; +use std::error::Error; +use std::io::BufRead; + +use quick_xml::events::{BytesStart, Event}; +use quick_xml::Reader; + +use crate::deserialization::{ + Deserialize, + Deserializer, + DeserializerError, + IgnoreEnd, + WrappedDeserializeError, +}; + +macro_rules! read_event { + ($self: ident) => {{ + let event = if let Some(leftover_event) = $self.leftover_event.take() { + leftover_event + } else { + $self.reader.read_event_into(&mut $self.buf)?.into_owned() + }; + + if let Event::Eof = &event { + return Err(DeserializerError::UnexpectedEndOfFile); + } + + event + }}; +} + +pub struct BufferDeserializer<Source> +{ + reader: Reader<Source>, + leftover_event: Option<Event<'static>>, + buf: Vec<u8>, +} + +impl<Source> BufferDeserializer<Source> +where + Source: BufRead, +{ + pub fn new(source: Source) -> Self + { + let mut reader = Reader::from_reader(source); + + reader.trim_text(true); + reader.expand_empty_elements(true); + + Self { + reader, + leftover_event: None, + buf: Vec::new(), + } + } +} + +impl<Source> Deserializer for BufferDeserializer<Source> +where + Source: BufRead, +{ + fn de_tag<De: Deserialize>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result<De, DeserializerError> + { + self.de_tag_with(tag_name, ignore_end, De::deserialize) + } + + fn de_tag_with<Output, Err, DeserializeFn>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result<Output, DeserializerError> + where + Err: Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>, + { + let deserialized = match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + deserialize(&start, self).map_err(|err| { + DeserializerError::DeserializeFailed( + type_name::<Output>(), + WrappedDeserializeError::new(err), + ) + })? + } + event => { + self.leftover_event = Some(event.clone().into_owned()); + + return Err(DeserializerError::UnexpectedEvent { + expected_event_name: format!("start({tag_name})"), + found_event: event, + }); + } + }; + + if let IgnoreEnd::No = ignore_end { + self.read_end_event(tag_name)?; + } + + Ok(deserialized) + } + + fn de_tag_list<De: Deserialize>( + &mut self, + tag_name: &str, + ) -> Result<Vec<De>, DeserializerError> + { + let mut deserialized_items = Vec::new(); + + loop { + let start = match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + start + } + Event::Comment(_) => { + continue; + } + event => { + self.leftover_event = Some(event.into_owned()); + break; + } + }; + + let deserialized = De::deserialize(&start, self).map_err(|err| { + DeserializerError::DeserializeFailed( + type_name::<De>(), + WrappedDeserializeError::new(err), + ) + })?; + + self.read_end_event(tag_name)?; + + deserialized_items.push(deserialized); + } + + Ok(deserialized_items) + } + + fn de_text(&mut self) -> Result<String, DeserializerError> + { + let text = match read_event!(self) { + Event::Text(text) => Ok(text), + event => { + self.leftover_event = Some(event.clone().into_owned()); + + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "text".to_string(), + found_event: event, + }) + } + }? + .unescape()?; + + Ok(text.to_string()) + } + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + loop { + match read_event!(self) { + Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { + self.leftover_event = Some(Event::Start(start).into_owned()); + + break; + } + _ => {} + } + } + + Ok(()) + } + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + loop { + match read_event!(self) { + Event::End(end) if end.name().as_ref() == tag_name.as_bytes() => { + self.leftover_event = Some(Event::End(end).into_owned()); + + return Ok(()); + } + _ => {} + } + } + } +} + +impl<Source> BufferDeserializer<Source> +where + Source: BufRead, +{ + fn read_end_event(&mut self, tag_name: &str) -> Result<(), DeserializerError> + { + let event = read_event!(self); + + if matches!(&event, Event::End(end) if end.name().as_ref() == tag_name.as_bytes()) + { + return Ok(()); + } + + Err(DeserializerError::UnexpectedEvent { + expected_event_name: "end".to_string(), + found_event: event.into_owned(), + }) + } +} diff --git a/src/deserialization/mod.rs b/src/deserialization/mod.rs new file mode 100644 index 0000000..fa25e4b --- /dev/null +++ b/src/deserialization/mod.rs @@ -0,0 +1,124 @@ +use std::error::Error; +use std::ops::Deref; + +use quick_xml::events::{BytesStart, Event}; + +pub mod buffer_deserializer; + +pub trait Deserialize: Sized +{ + type Error: Error + Send + Sync + 'static; + + fn deserialize<TDeserializer: Deserializer>( + start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result<Self, Self::Error>; +} + +pub trait Deserializer +{ + fn de_tag<De: Deserialize>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + ) -> Result<De, DeserializerError>; + + fn de_tag_with<Output, Err, DeserializeFn>( + &mut self, + tag_name: &str, + ignore_end: IgnoreEnd, + deserialize: DeserializeFn, + ) -> Result<Output, DeserializerError> + where + Err: Error + Send + Sync + 'static, + DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>; + + fn de_tag_list<De: Deserialize>( + &mut self, + tag_name: &str, + ) -> Result<Vec<De>, DeserializerError>; + + fn de_text(&mut self) -> Result<String, DeserializerError>; + + fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + + fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; +} + +pub enum IgnoreEnd +{ + Yes, + No, +} + +/// Function pointer type passable to [`Deserializer::de_tag_with`]. +pub type DeserializeWithFn<Output, Err, Deserializer> = + fn(&BytesStart, &mut Deserializer) -> Result<Output, Err>; + +#[derive(Debug, thiserror::Error)] +pub enum DeserializerError +{ + #[error("Failed to read")] + ReadFailed(#[from] quick_xml::Error), + + #[error("Failed to deserialize {0}")] + DeserializeFailed(&'static str, #[source] WrappedDeserializeError), + + #[error("Expected {expected_event_name} event. Found {found_event:?}")] + UnexpectedEvent + { + expected_event_name: String, + found_event: Event<'static>, + }, + + #[error("Unexpected end of file")] + UnexpectedEndOfFile, +} + +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct WrappedDeserializeError(Box<dyn Error + Send + Sync>); + +impl WrappedDeserializeError +{ + fn new<Err: Error + Send + Sync + 'static>(err: Err) -> Self + { + Self(Box::new(err)) + } +} + +impl Deref for WrappedDeserializeError +{ + type Target = dyn Error; + + fn deref(&self) -> &Self::Target + { + self.0.as_ref() + } +} + +pub trait ResultExt<Value> +{ + fn try_event(self) -> Result<Option<Value>, DeserializerError>; +} + +impl<Value> ResultExt<Value> for Result<Value, DeserializerError> +{ + fn try_event(self) -> Result<Option<Value>, DeserializerError> + { + self.map_or_else( + |err| { + if let DeserializerError::UnexpectedEvent { + expected_event_name: _, + found_event: _, + } = err + { + return Ok(None); + } + + Err(err) + }, + |value| Ok(Some(value)), + ) + } +} @@ -24,20 +24,24 @@ use std::fs::File; use std::io::Read; +use quick_xml::events::BytesStart; + use crate::command::{Command, Error as CommandError}; -use crate::xml::element::{Element, Elements, FromElements}; -use crate::xml::parser::{Error as ParserError, Parser}; +use crate::deserialization::buffer_deserializer::BufferDeserializer; +use crate::deserialization::{Deserialize, Deserializer, DeserializerError, IgnoreEnd}; pub mod command; -mod xml; +mod deserialization; +/// XML. #[cfg(feature = "include-xml")] const GL_REGISTRY_XML: &[u8] = include_bytes!("../OpenGL-Registry/xml/gl.xml"); const REGISTRY_TAG_NAME: &str = "registry"; /// Representation of the OpenGL registry. +#[derive(Debug, PartialEq, Eq)] pub struct Registry { commands: Vec<Command>, @@ -62,15 +66,12 @@ impl Registry /// Returns `Err` if parsing fails in any way. pub fn retrieve_from_bytes(xml_bytes: &[u8]) -> Result<Registry, RegistryError> { - let mut parser = Parser::new(xml_bytes); - - let elements = parser.parse().map_err(ParsingError)?; + let mut deserializer = BufferDeserializer::new(xml_bytes); - let registry_element = elements - .get_first_tagged_element(REGISTRY_TAG_NAME) - .ok_or(RegistryError::MissingRegistryElement)?; + deserializer.skip_to_tag_start(REGISTRY_TAG_NAME)?; - let registry = Registry::from_elements(registry_element.child_elements())?; + let registry = + deserializer.de_tag::<Registry>(REGISTRY_TAG_NAME, IgnoreEnd::Yes)?; Ok(registry) } @@ -110,35 +111,21 @@ impl Registry } } -impl FromElements for Registry +impl Deserialize for Registry { type Error = RegistryError; - fn from_elements(elements: &Elements) -> Result<Self, Self::Error> + fn deserialize<TDeserializer: Deserializer>( + _start: &BytesStart, + deserializer: &mut TDeserializer, + ) -> Result<Self, Self::Error> { - let commands_element = elements - .get_first_tagged_element("commands") - .ok_or(Self::Error::MissingCommandsElement)?; - - let command_elements = - commands_element - .child_elements() - .into_iter() - .filter_map(|element| match element { - Element::Tagged(tagged_element) - if tagged_element.name() == "command" => - { - Some(tagged_element) - } - _ => None, - }); - - let commands = command_elements - .into_iter() - .map(|command_element| { - Command::from_elements(command_element.child_elements()) - }) - .collect::<Result<Vec<_>, _>>()?; + deserializer.skip_to_tag_start("commands")?; + + let commands = + deserializer.de_tag_with("commands", IgnoreEnd::No, |_, deserializer| { + deserializer.de_tag_list::<Command>("command") + })?; Ok(Self { commands }) } @@ -160,16 +147,11 @@ pub enum RegistryError #[error("Invalid command")] InvalidCommand(#[from] CommandError), - /// Parsing failed. - #[error("Parsing failed")] - ParsingFailed(#[from] ParsingError), - /// I/O failed. #[error("I/O failed")] IOFailed(#[from] std::io::Error), -} -/// Parsing error. -#[derive(Debug, thiserror::Error)] -#[error(transparent)] -pub struct ParsingError(#[from] ParserError); + /// Deserialization failed. + #[error("Deserialization failed")] + DeserializationFailed(#[from] DeserializerError), +} diff --git a/src/xml/element.rs b/src/xml/element.rs deleted file mode 100644 index f469480..0000000 --- a/src/xml/element.rs +++ /dev/null @@ -1,144 +0,0 @@ -#[derive(Debug, PartialEq, Eq)] -pub struct Elements -{ - elements: Vec<Element>, -} - -impl Elements -{ - pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> - { - self.elements.iter().find_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - } - - pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Tagged(tagged_element) if tagged_element.name == tag_name => { - Some(tagged_element) - } - _ => None, - }) - .collect() - } - - pub fn get_first_text_element(&self) -> Option<&String> - { - self.elements.iter().find_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - } - - pub fn get_all_text_elements(&self) -> Vec<&String> - { - self.elements - .iter() - .filter_map(|element| match element { - Element::Text(text) => Some(text), - _ => None, - }) - .collect() - } - - pub fn has_tagged_element(&self, tag_name: &str) -> bool - { - self.elements.iter().any(|element| { - matches!( - element, - Element::Tagged(tagged_element) if tagged_element.name == tag_name - ) - }) - } -} - -impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements -{ - fn from(into_iter: IntoIter) -> Self - { - Self { - elements: into_iter.into_iter().collect(), - } - } -} - -impl<'elements> IntoIterator for &'elements Elements -{ - type IntoIter = Iter<'elements>; - type Item = &'elements Element; - - fn into_iter(self) -> Self::IntoIter - { - Self::IntoIter { - elements: self.elements.iter(), - } - } -} - -pub struct Iter<'elements> -{ - elements: std::slice::Iter<'elements, Element>, -} - -impl<'elements> Iterator for Iter<'elements> -{ - type Item = &'elements Element; - - fn next(&mut self) -> Option<Self::Item> - { - self.elements.next() - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Element -{ - Tagged(Tagged), - Text(String), - Comment(String), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Tagged -{ - name: String, - child_elements: Elements, -} - -impl Tagged -{ - pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self - where - Name: ToString, - ChildElements: Into<Elements>, - { - Self { - name: name.to_string(), - child_elements: child_elements.into(), - } - } - - pub fn name(&self) -> &str - { - &self.name - } - - pub fn child_elements(&self) -> &Elements - { - &self.child_elements - } -} - -pub trait FromElements: Sized -{ - type Error; - - fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; -} diff --git a/src/xml/mod.rs b/src/xml/mod.rs deleted file mode 100644 index 12368c3..0000000 --- a/src/xml/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod element; -pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser<Source> -{ - reader: Reader<Source>, -} - -impl<Source: BufRead> Parser<Source> -{ - pub fn new(src: Source) -> Self - { - Self { - reader: Reader::from_reader(src), - } - } - - pub fn parse(&mut self) -> Result<Elements, Error> - { - let mut buf = Vec::new(); - - let mut elements = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - let element = match self.handle_event(event)? { - EventHandlingResult::Element(element) => element, - EventHandlingResult::Event(_) => { - continue; - } - EventHandlingResult::End => { - break; - } - }; - - elements.push(element); - } - - Ok(elements.into()) - } - - fn parse_text(text: &BytesText) -> Result<String, Error> - { - String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) - } - - fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> - { - let mut child_elements = Vec::new(); - - let mut buf = Vec::new(); - - loop { - let event = self.reader.read_event_into(&mut buf)?; - - match event { - Event::End(end) if end.name() == start.name() => { - break; - } - event => match self.handle_event(event)? { - EventHandlingResult::Element(element) => { - child_elements.push(element); - } - EventHandlingResult::End => { - return Err(Error::UnexpectedEndOfFile); - } - EventHandlingResult::Event(_) => {} - }, - } - } - - Ok(Element::Tagged(Tagged::new( - &String::from_utf8(start.name().as_ref().to_vec()) - .map_err(|_| Error::TagNameNotUTF8)?, - child_elements, - ))) - } - - fn handle_event<'a>( - &'a mut self, - event: Event<'a>, - ) -> Result<EventHandlingResult, Error> - { - match event { - Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( - Self::parse_text(&text)?, - ))), - Event::Start(start) => { - Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) - } - Event::End(_) => Err(Error::UnexpectedTagEnd), - Event::Eof => Ok(EventHandlingResult::End), - Event::Comment(comment_text) => Ok(EventHandlingResult::Element( - Element::Comment(Self::parse_text(&comment_text)?), - )), - event => Ok(EventHandlingResult::Event(event)), - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ - #[error(transparent)] - QuickXMLFailed(#[from] quick_xml::Error), - - #[error("Text is not UTF-8")] - TextNotUTF8, - - #[error("Tag name is not UTF-8")] - TagNameNotUTF8, - - #[error("Unexpectedly found the end of a tag")] - UnexpectedTagEnd, - - #[error("Unexpected end of file")] - UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ - Element(Element), - Event(Event<'event>), - End, -} - -#[cfg(test)] -mod tests -{ - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn can_parse() - { - let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![Element::Text("Hello there".to_string())] - ))]) - ); - - let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Tagged(Tagged::new( - &"bar", - Elements::from(vec![Element::Text("123".to_string())]) - )), - Element::Text(" Hello".to_string()) - ] - ))]) - ); - - let mut parser = Parser::new("".as_bytes()); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(Vec::new()) - ); - - let mut parser = Parser::new( - "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), - ); - - assert_eq!( - parser.parse().expect("Expected Ok"), - Elements::from(vec![Element::Tagged(Tagged::new( - &"foo", - vec![ - Element::Comment("XML is awful".to_string()), - Element::Text("Hello there".to_string()), - Element::Tagged(Tagged::new( - &"bar", - vec![Element::Text("123".to_string())] - )), - ] - ))]) - ); - } -} |