diff options
| author | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 | 
|---|---|---|
| committer | HampusM <hampus@hampusmat.com> | 2023-03-25 17:42:28 +0100 | 
| commit | add06dafdf874b1b419e5eef918c6b1131ab09fd (patch) | |
| tree | c1d52d3ece248d96562a3d77beb44973e7720847 /src | |
| parent | f49d77c2961be28c3cc500af185813dd5e83a367 (diff) | |
perf: improve XML deserialization speed
Diffstat (limited to 'src')
| -rw-r--r-- | src/command.rs | 559 | ||||
| -rw-r--r-- | src/deserialization/buffer_deserializer.rs | 210 | ||||
| -rw-r--r-- | src/deserialization/mod.rs | 124 | ||||
| -rw-r--r-- | src/lib.rs | 70 | ||||
| -rw-r--r-- | src/xml/element.rs | 144 | ||||
| -rw-r--r-- | src/xml/mod.rs | 2 | ||||
| -rw-r--r-- | src/xml/parser.rs | 195 | 
7 files changed, 857 insertions, 447 deletions
diff --git a/src/command.rs b/src/command.rs index c7ada95..2ba92ea 100644 --- a/src/command.rs +++ b/src/command.rs @@ -1,5 +1,14 @@  //! OpenGL command. -use crate::xml::element::{Elements, FromElements}; +use quick_xml::events::BytesStart; + +use crate::deserialization::{ +    Deserialize, +    DeserializeWithFn, +    Deserializer, +    DeserializerError, +    IgnoreEnd, +    ResultExt, +};  /// A command.  #[derive(Debug, Clone, PartialEq, Eq)] @@ -38,25 +47,21 @@ impl Command      }  } -impl FromElements for Command +impl Deserialize for Command  {      type Error = Error; -    fn from_elements( -        elements: &crate::xml::element::Elements, +    fn deserialize<TDeserializer: Deserializer>( +        start: &BytesStart, +        deserializer: &mut TDeserializer,      ) -> Result<Self, Self::Error>      { -        let proto_element = elements -            .get_first_tagged_element("proto") -            .ok_or(Self::Error::MissingPrototype)?; +        let prototype = deserializer.de_tag::<Prototype>("proto", IgnoreEnd::No)?; -        let prototype = Prototype::from_elements(proto_element.child_elements())?; +        let parameters = deserializer.de_tag_list::<Parameter>("param")?; -        let parameters = elements -            .get_all_tagged_elements_with_name("param") -            .into_iter() -            .map(|param_element| Parameter::from_elements(param_element.child_elements())) -            .collect::<Result<Vec<_>, _>>()?; +        deserializer +            .skip_to_tag_end(std::str::from_utf8(start.name().as_ref()).unwrap())?;          Ok(Self {              prototype, @@ -80,6 +85,10 @@ pub enum Error      /// Invalid parameter.      #[error("Invalid parameter")]      InvalidParameter(#[from] ParameterError), + +    /// Deserialization failed. +    #[error("Deserialization failed")] +    DeserializationFailed(#[from] DeserializerError),  }  /// A command prototype. @@ -116,23 +125,22 @@ impl Prototype      }  } -impl FromElements for Prototype +impl Deserialize for Prototype  {      type Error = PrototypeError; -    fn from_elements( -        elements: &crate::xml::element::Elements, +    fn deserialize<TDeserializer: Deserializer>( +        _start: &BytesStart, +        deserializer: &mut TDeserializer,      ) -> Result<Self, Self::Error>      { -        let name = elements -            .get_first_tagged_element("name") -            .ok_or(Self::Error::MissingName)? -            .child_elements() -            .get_first_text_element() -            .cloned() -            .unwrap_or_default(); +        let return_type = deserialize_type::<PrototypeError>(deserializer)?; -        let return_type = find_type(elements); +        let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( +            "name", +            IgnoreEnd::No, +            |_, deserializer| deserializer.de_text(), +        )?;          Ok(Self { name, return_type })      } @@ -145,6 +153,14 @@ pub enum PrototypeError      /// No 'name' element was found.      #[error("No 'name' element was found")]      MissingName, + +    /// No return type was found. +    #[error("No return type was found")] +    MissingReturnType, + +    /// Deserialization failed. +    #[error("Deserialization failed")] +    DeserializationFailed(#[from] DeserializerError),  }  /// A command parameter. @@ -181,21 +197,22 @@ impl Parameter      }  } -impl FromElements for Parameter +impl Deserialize for Parameter  {      type Error = ParameterError; -    fn from_elements(elements: &Elements) -> Result<Self, Self::Error> +    fn deserialize<TDeserializer: Deserializer>( +        _start: &BytesStart, +        deserializer: &mut TDeserializer, +    ) -> Result<Self, Self::Error>      { -        let name = elements -            .get_first_tagged_element("name") -            .ok_or(Self::Error::MissingName)? -            .child_elements() -            .get_first_text_element() -            .cloned() -            .unwrap_or_default(); +        let ty = deserialize_type::<ParameterError>(deserializer)?; -        let ty = find_type(elements); +        let name = deserializer.de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( +            "name", +            IgnoreEnd::No, +            |_, deserializer| deserializer.de_text(), +        )?;          Ok(Self { name, ty })      } @@ -208,46 +225,47 @@ pub enum ParameterError      /// No 'name' element was found.      #[error("No 'name' element was found")]      MissingName, + +    /// Deserialization failed. +    #[error("Deserialization failed")] +    DeserializationFailed(#[from] DeserializerError),  } -fn find_type(elements: &Elements) -> String +fn deserialize_type<Err>(deserializer: &mut impl Deserializer) -> Result<String, Err> +where +    Err: From<DeserializerError>,  { -    let text_type_parts = elements -        .get_all_text_elements() -        .into_iter() -        .map(|text_type_part| text_type_part.trim()) -        .filter(|text_type_part| !text_type_part.is_empty()) -        .collect::<Vec<_>>(); +    let type_before = deserializer.de_text().try_event()?; -    let opt_ptype_text = get_ptype_text(elements); +    let type_ptype = deserializer +        .de_tag_with::<_, _, DeserializeWithFn<_, _, _>>( +            "ptype", +            IgnoreEnd::No, +            |_, deserializer| deserializer.de_text(), +        ) +        .try_event()?; -    opt_ptype_text.map_or_else( -        || join_space_strs(text_type_parts.iter()), -        |ptype_text| { -            let Some(first_part) = text_type_parts.first() else { -                return ptype_text.clone(); -            }; +    let type_after = deserializer.de_text().try_event()?; -            let before = if *first_part == "const" { "const " } else { "" }; +    let type_before_after = [type_before.clone(), type_after.clone()] +        .into_iter() +        .flatten(); -            let after_start_index = usize::from(*first_part == "const"); +    Ok(type_ptype.map_or_else( +        || join_space_strs(type_before_after), +        |ptype_text| { +            let before = type_before +                .map(|before| format!("{before} ")) +                .unwrap_or_default();              format!( -                "{before}{ptype_text} {}", -                text_type_parts -                    .get(after_start_index..) -                    .map(|parts| join_space_strs(parts.iter())) +                "{before}{ptype_text}{}", +                type_after +                    .map(|after| format!(" {after}"))                      .unwrap_or_default()              )          }, -    ) -} - -fn get_ptype_text(elements: &Elements) -> Option<&String> -{ -    let ptype_element = elements.get_first_tagged_element("ptype")?; - -    ptype_element.child_elements().get_first_text_element() +    ))  }  fn join_space_strs<Strings, StrItem>(strings: Strings) -> String @@ -261,3 +279,420 @@ where          .collect::<Vec<_>>()          .join(" ")  } + +#[cfg(test)] +mod tests +{ +    use pretty_assertions::assert_str_eq; +    use quick_xml::events::Event; +    use ridicule::mock; +    use ridicule::predicate::{always, eq, function}; + +    use super::*; + +    mock! { +        MockDeserializer {} + +        impl Deserializer for MockDeserializer { +            fn de_tag<De: Deserialize>( +                &mut self, +                tag_name: &str, +                ignore_end: IgnoreEnd, +            ) -> Result<De, DeserializerError>; + +            fn de_tag_with<Output, Err, DeserializeFn>( +                &mut self, +                tag_name: &str, +                ignore_end: IgnoreEnd, +                deserialize: DeserializeFn, +            ) -> Result<Output, DeserializerError> +            where +                Err: std::error::Error + Send + Sync + 'static, +                DeserializeFn: FnOnce(&BytesStart, &mut MockDeserializer) -> Result<Output, Err>; + +            fn de_tag_list<De: Deserialize>( +                &mut self, +                tag_name: &str +            ) -> Result<Vec<De>, DeserializerError>; + +            fn de_text(&mut self) -> Result<String, DeserializerError>; + +            fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + +            fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; +        } +    } + +    #[test] +    fn deserialize_prototype_works_with_ptype() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("ptype")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("GLuint".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("glDoComplicatedThing".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let prototype = +            Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(prototype.name, "glDoComplicatedThing"); +        assert_str_eq!(prototype.return_type, "GLuint"); +    } + +    #[test] +    fn deserialize_prototype_works_with_text() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("void".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|_, _, _, _| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "start".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("glDoSomeThing".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let prototype = +            Prototype::deserialize(&BytesStart::new("proto"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(prototype.name, "glDoSomeThing"); +        assert_str_eq!(prototype.return_type, "void"); +    } + +    #[test] +    fn deserialize_parameter_works_with_ptype_only() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("ptype")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("GLenum".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("value".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let parameter = +            Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(parameter.name, "value"); +        assert_str_eq!(parameter.ty, "GLenum"); +    } + +    #[test] +    fn deserialize_parameter_works_with_ptype_and_text_after() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("ptype")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("GLchar".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("*".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("source".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let parameter = +            Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(parameter.name, "source"); +        assert_str_eq!(parameter.ty, "GLchar *"); +    } + +    #[test] +    fn deserialize_parameter_works_with_ptype_and_text_before_and_after() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("const".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("GLchar".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("*".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("name".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let parameter = +            Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(parameter.name, "name"); +        assert_str_eq!(parameter.ty, "const GLchar *"); +    } + +    #[test] +    fn deserialize_parameter_works_with_text() +    { +        let mut mock_deserializer = MockDeserializer::new(); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("void *".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("ptype"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|_, _, _, _| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "start".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| { +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: Event::Start(BytesStart::new("name")), +                }) +            }) +            .times(1); + +        mock_deserializer +            .expect_de_text() +            .returning(|_| Ok("pixels".to_string())) +            .times(1); + +        mock_deserializer +            .expect_de_tag_with::<String, DeserializerError, DeserializeWithFn<_, _, _>>() +            .with( +                eq("name"), +                function(|ignore_end| matches!(ignore_end, IgnoreEnd::No)), +                always(), +            ) +            .returning(|deserializer, tag_name, _, func| { +                func(&BytesStart::new(tag_name), deserializer) +            }) +            .times(1); + +        let parameter = +            Parameter::deserialize(&BytesStart::new("param"), &mut mock_deserializer) +                .expect("Expected Ok"); + +        assert_str_eq!(parameter.name, "pixels"); +        assert_str_eq!(parameter.ty, "void *"); +    } +} diff --git a/src/deserialization/buffer_deserializer.rs b/src/deserialization/buffer_deserializer.rs new file mode 100644 index 0000000..652e1ff --- /dev/null +++ b/src/deserialization/buffer_deserializer.rs @@ -0,0 +1,210 @@ +use std::any::type_name; +use std::error::Error; +use std::io::BufRead; + +use quick_xml::events::{BytesStart, Event}; +use quick_xml::Reader; + +use crate::deserialization::{ +    Deserialize, +    Deserializer, +    DeserializerError, +    IgnoreEnd, +    WrappedDeserializeError, +}; + +macro_rules! read_event { +    ($self: ident) => {{ +        let event = if let Some(leftover_event) = $self.leftover_event.take() { +            leftover_event +        } else { +            $self.reader.read_event_into(&mut $self.buf)?.into_owned() +        }; + +        if let Event::Eof = &event { +            return Err(DeserializerError::UnexpectedEndOfFile); +        } + +        event +    }}; +} + +pub struct BufferDeserializer<Source> +{ +    reader: Reader<Source>, +    leftover_event: Option<Event<'static>>, +    buf: Vec<u8>, +} + +impl<Source> BufferDeserializer<Source> +where +    Source: BufRead, +{ +    pub fn new(source: Source) -> Self +    { +        let mut reader = Reader::from_reader(source); + +        reader.trim_text(true); +        reader.expand_empty_elements(true); + +        Self { +            reader, +            leftover_event: None, +            buf: Vec::new(), +        } +    } +} + +impl<Source> Deserializer for BufferDeserializer<Source> +where +    Source: BufRead, +{ +    fn de_tag<De: Deserialize>( +        &mut self, +        tag_name: &str, +        ignore_end: IgnoreEnd, +    ) -> Result<De, DeserializerError> +    { +        self.de_tag_with(tag_name, ignore_end, De::deserialize) +    } + +    fn de_tag_with<Output, Err, DeserializeFn>( +        &mut self, +        tag_name: &str, +        ignore_end: IgnoreEnd, +        deserialize: DeserializeFn, +    ) -> Result<Output, DeserializerError> +    where +        Err: Error + Send + Sync + 'static, +        DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>, +    { +        let deserialized = match read_event!(self) { +            Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { +                deserialize(&start, self).map_err(|err| { +                    DeserializerError::DeserializeFailed( +                        type_name::<Output>(), +                        WrappedDeserializeError::new(err), +                    ) +                })? +            } +            event => { +                self.leftover_event = Some(event.clone().into_owned()); + +                return Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: format!("start({tag_name})"), +                    found_event: event, +                }); +            } +        }; + +        if let IgnoreEnd::No = ignore_end { +            self.read_end_event(tag_name)?; +        } + +        Ok(deserialized) +    } + +    fn de_tag_list<De: Deserialize>( +        &mut self, +        tag_name: &str, +    ) -> Result<Vec<De>, DeserializerError> +    { +        let mut deserialized_items = Vec::new(); + +        loop { +            let start = match read_event!(self) { +                Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { +                    start +                } +                Event::Comment(_) => { +                    continue; +                } +                event => { +                    self.leftover_event = Some(event.into_owned()); +                    break; +                } +            }; + +            let deserialized = De::deserialize(&start, self).map_err(|err| { +                DeserializerError::DeserializeFailed( +                    type_name::<De>(), +                    WrappedDeserializeError::new(err), +                ) +            })?; + +            self.read_end_event(tag_name)?; + +            deserialized_items.push(deserialized); +        } + +        Ok(deserialized_items) +    } + +    fn de_text(&mut self) -> Result<String, DeserializerError> +    { +        let text = match read_event!(self) { +            Event::Text(text) => Ok(text), +            event => { +                self.leftover_event = Some(event.clone().into_owned()); + +                Err(DeserializerError::UnexpectedEvent { +                    expected_event_name: "text".to_string(), +                    found_event: event, +                }) +            } +        }? +        .unescape()?; + +        Ok(text.to_string()) +    } + +    fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError> +    { +        loop { +            match read_event!(self) { +                Event::Start(start) if start.name().as_ref() == tag_name.as_bytes() => { +                    self.leftover_event = Some(Event::Start(start).into_owned()); + +                    break; +                } +                _ => {} +            } +        } + +        Ok(()) +    } + +    fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError> +    { +        loop { +            match read_event!(self) { +                Event::End(end) if end.name().as_ref() == tag_name.as_bytes() => { +                    self.leftover_event = Some(Event::End(end).into_owned()); + +                    return Ok(()); +                } +                _ => {} +            } +        } +    } +} + +impl<Source> BufferDeserializer<Source> +where +    Source: BufRead, +{ +    fn read_end_event(&mut self, tag_name: &str) -> Result<(), DeserializerError> +    { +        let event = read_event!(self); + +        if matches!(&event, Event::End(end) if end.name().as_ref() == tag_name.as_bytes()) +        { +            return Ok(()); +        } + +        Err(DeserializerError::UnexpectedEvent { +            expected_event_name: "end".to_string(), +            found_event: event.into_owned(), +        }) +    } +} diff --git a/src/deserialization/mod.rs b/src/deserialization/mod.rs new file mode 100644 index 0000000..fa25e4b --- /dev/null +++ b/src/deserialization/mod.rs @@ -0,0 +1,124 @@ +use std::error::Error; +use std::ops::Deref; + +use quick_xml::events::{BytesStart, Event}; + +pub mod buffer_deserializer; + +pub trait Deserialize: Sized +{ +    type Error: Error + Send + Sync + 'static; + +    fn deserialize<TDeserializer: Deserializer>( +        start: &BytesStart, +        deserializer: &mut TDeserializer, +    ) -> Result<Self, Self::Error>; +} + +pub trait Deserializer +{ +    fn de_tag<De: Deserialize>( +        &mut self, +        tag_name: &str, +        ignore_end: IgnoreEnd, +    ) -> Result<De, DeserializerError>; + +    fn de_tag_with<Output, Err, DeserializeFn>( +        &mut self, +        tag_name: &str, +        ignore_end: IgnoreEnd, +        deserialize: DeserializeFn, +    ) -> Result<Output, DeserializerError> +    where +        Err: Error + Send + Sync + 'static, +        DeserializeFn: FnOnce(&BytesStart, &mut Self) -> Result<Output, Err>; + +    fn de_tag_list<De: Deserialize>( +        &mut self, +        tag_name: &str, +    ) -> Result<Vec<De>, DeserializerError>; + +    fn de_text(&mut self) -> Result<String, DeserializerError>; + +    fn skip_to_tag_start(&mut self, tag_name: &str) -> Result<(), DeserializerError>; + +    fn skip_to_tag_end(&mut self, tag_name: &str) -> Result<(), DeserializerError>; +} + +pub enum IgnoreEnd +{ +    Yes, +    No, +} + +/// Function pointer type passable to [`Deserializer::de_tag_with`]. +pub type DeserializeWithFn<Output, Err, Deserializer> = +    fn(&BytesStart, &mut Deserializer) -> Result<Output, Err>; + +#[derive(Debug, thiserror::Error)] +pub enum DeserializerError +{ +    #[error("Failed to read")] +    ReadFailed(#[from] quick_xml::Error), + +    #[error("Failed to deserialize {0}")] +    DeserializeFailed(&'static str, #[source] WrappedDeserializeError), + +    #[error("Expected {expected_event_name} event. Found {found_event:?}")] +    UnexpectedEvent +    { +        expected_event_name: String, +        found_event: Event<'static>, +    }, + +    #[error("Unexpected end of file")] +    UnexpectedEndOfFile, +} + +#[derive(Debug, thiserror::Error)] +#[error(transparent)] +pub struct WrappedDeserializeError(Box<dyn Error + Send + Sync>); + +impl WrappedDeserializeError +{ +    fn new<Err: Error + Send + Sync + 'static>(err: Err) -> Self +    { +        Self(Box::new(err)) +    } +} + +impl Deref for WrappedDeserializeError +{ +    type Target = dyn Error; + +    fn deref(&self) -> &Self::Target +    { +        self.0.as_ref() +    } +} + +pub trait ResultExt<Value> +{ +    fn try_event(self) -> Result<Option<Value>, DeserializerError>; +} + +impl<Value> ResultExt<Value> for Result<Value, DeserializerError> +{ +    fn try_event(self) -> Result<Option<Value>, DeserializerError> +    { +        self.map_or_else( +            |err| { +                if let DeserializerError::UnexpectedEvent { +                    expected_event_name: _, +                    found_event: _, +                } = err +                { +                    return Ok(None); +                } + +                Err(err) +            }, +            |value| Ok(Some(value)), +        ) +    } +} @@ -24,20 +24,24 @@  use std::fs::File;  use std::io::Read; +use quick_xml::events::BytesStart; +  use crate::command::{Command, Error as CommandError}; -use crate::xml::element::{Element, Elements, FromElements}; -use crate::xml::parser::{Error as ParserError, Parser}; +use crate::deserialization::buffer_deserializer::BufferDeserializer; +use crate::deserialization::{Deserialize, Deserializer, DeserializerError, IgnoreEnd};  pub mod command; -mod xml; +mod deserialization; +/// XML.  #[cfg(feature = "include-xml")]  const GL_REGISTRY_XML: &[u8] = include_bytes!("../OpenGL-Registry/xml/gl.xml");  const REGISTRY_TAG_NAME: &str = "registry";  /// Representation of the OpenGL registry. +#[derive(Debug, PartialEq, Eq)]  pub struct Registry  {      commands: Vec<Command>, @@ -62,15 +66,12 @@ impl Registry      /// Returns `Err` if parsing fails in any way.      pub fn retrieve_from_bytes(xml_bytes: &[u8]) -> Result<Registry, RegistryError>      { -        let mut parser = Parser::new(xml_bytes); - -        let elements = parser.parse().map_err(ParsingError)?; +        let mut deserializer = BufferDeserializer::new(xml_bytes); -        let registry_element = elements -            .get_first_tagged_element(REGISTRY_TAG_NAME) -            .ok_or(RegistryError::MissingRegistryElement)?; +        deserializer.skip_to_tag_start(REGISTRY_TAG_NAME)?; -        let registry = Registry::from_elements(registry_element.child_elements())?; +        let registry = +            deserializer.de_tag::<Registry>(REGISTRY_TAG_NAME, IgnoreEnd::Yes)?;          Ok(registry)      } @@ -110,35 +111,21 @@ impl Registry      }  } -impl FromElements for Registry +impl Deserialize for Registry  {      type Error = RegistryError; -    fn from_elements(elements: &Elements) -> Result<Self, Self::Error> +    fn deserialize<TDeserializer: Deserializer>( +        _start: &BytesStart, +        deserializer: &mut TDeserializer, +    ) -> Result<Self, Self::Error>      { -        let commands_element = elements -            .get_first_tagged_element("commands") -            .ok_or(Self::Error::MissingCommandsElement)?; - -        let command_elements = -            commands_element -                .child_elements() -                .into_iter() -                .filter_map(|element| match element { -                    Element::Tagged(tagged_element) -                        if tagged_element.name() == "command" => -                    { -                        Some(tagged_element) -                    } -                    _ => None, -                }); - -        let commands = command_elements -            .into_iter() -            .map(|command_element| { -                Command::from_elements(command_element.child_elements()) -            }) -            .collect::<Result<Vec<_>, _>>()?; +        deserializer.skip_to_tag_start("commands")?; + +        let commands = +            deserializer.de_tag_with("commands", IgnoreEnd::No, |_, deserializer| { +                deserializer.de_tag_list::<Command>("command") +            })?;          Ok(Self { commands })      } @@ -160,16 +147,11 @@ pub enum RegistryError      #[error("Invalid command")]      InvalidCommand(#[from] CommandError), -    /// Parsing failed. -    #[error("Parsing failed")] -    ParsingFailed(#[from] ParsingError), -      /// I/O failed.      #[error("I/O failed")]      IOFailed(#[from] std::io::Error), -} -/// Parsing error. -#[derive(Debug, thiserror::Error)] -#[error(transparent)] -pub struct ParsingError(#[from] ParserError); +    /// Deserialization failed. +    #[error("Deserialization failed")] +    DeserializationFailed(#[from] DeserializerError), +} diff --git a/src/xml/element.rs b/src/xml/element.rs deleted file mode 100644 index f469480..0000000 --- a/src/xml/element.rs +++ /dev/null @@ -1,144 +0,0 @@ -#[derive(Debug, PartialEq, Eq)] -pub struct Elements -{ -    elements: Vec<Element>, -} - -impl Elements -{ -    pub fn get_first_tagged_element(&self, tag_name: &str) -> Option<&Tagged> -    { -        self.elements.iter().find_map(|element| match element { -            Element::Tagged(tagged_element) if tagged_element.name == tag_name => { -                Some(tagged_element) -            } -            _ => None, -        }) -    } - -    pub fn get_all_tagged_elements_with_name(&self, tag_name: &str) -> Vec<&Tagged> -    { -        self.elements -            .iter() -            .filter_map(|element| match element { -                Element::Tagged(tagged_element) if tagged_element.name == tag_name => { -                    Some(tagged_element) -                } -                _ => None, -            }) -            .collect() -    } - -    pub fn get_first_text_element(&self) -> Option<&String> -    { -        self.elements.iter().find_map(|element| match element { -            Element::Text(text) => Some(text), -            _ => None, -        }) -    } - -    pub fn get_all_text_elements(&self) -> Vec<&String> -    { -        self.elements -            .iter() -            .filter_map(|element| match element { -                Element::Text(text) => Some(text), -                _ => None, -            }) -            .collect() -    } - -    pub fn has_tagged_element(&self, tag_name: &str) -> bool -    { -        self.elements.iter().any(|element| { -            matches!( -                element, -                Element::Tagged(tagged_element) if tagged_element.name == tag_name -            ) -        }) -    } -} - -impl<IntoIter: IntoIterator<Item = Element>> From<IntoIter> for Elements -{ -    fn from(into_iter: IntoIter) -> Self -    { -        Self { -            elements: into_iter.into_iter().collect(), -        } -    } -} - -impl<'elements> IntoIterator for &'elements Elements -{ -    type IntoIter = Iter<'elements>; -    type Item = &'elements Element; - -    fn into_iter(self) -> Self::IntoIter -    { -        Self::IntoIter { -            elements: self.elements.iter(), -        } -    } -} - -pub struct Iter<'elements> -{ -    elements: std::slice::Iter<'elements, Element>, -} - -impl<'elements> Iterator for Iter<'elements> -{ -    type Item = &'elements Element; - -    fn next(&mut self) -> Option<Self::Item> -    { -        self.elements.next() -    } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Element -{ -    Tagged(Tagged), -    Text(String), -    Comment(String), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Tagged -{ -    name: String, -    child_elements: Elements, -} - -impl Tagged -{ -    pub fn new<Name, ChildElements>(name: &Name, child_elements: ChildElements) -> Self -    where -        Name: ToString, -        ChildElements: Into<Elements>, -    { -        Self { -            name: name.to_string(), -            child_elements: child_elements.into(), -        } -    } - -    pub fn name(&self) -> &str -    { -        &self.name -    } - -    pub fn child_elements(&self) -> &Elements -    { -        &self.child_elements -    } -} - -pub trait FromElements: Sized -{ -    type Error; - -    fn from_elements(elements: &Elements) -> Result<Self, Self::Error>; -} diff --git a/src/xml/mod.rs b/src/xml/mod.rs deleted file mode 100644 index 12368c3..0000000 --- a/src/xml/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod element; -pub mod parser; diff --git a/src/xml/parser.rs b/src/xml/parser.rs deleted file mode 100644 index d152a6e..0000000 --- a/src/xml/parser.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::io::BufRead; - -use quick_xml::events::{BytesStart, BytesText, Event}; -use quick_xml::Reader; - -use crate::xml::element::{Element, Elements, Tagged}; - -/// XML parser. -pub struct Parser<Source> -{ -    reader: Reader<Source>, -} - -impl<Source: BufRead> Parser<Source> -{ -    pub fn new(src: Source) -> Self -    { -        Self { -            reader: Reader::from_reader(src), -        } -    } - -    pub fn parse(&mut self) -> Result<Elements, Error> -    { -        let mut buf = Vec::new(); - -        let mut elements = Vec::new(); - -        loop { -            let event = self.reader.read_event_into(&mut buf)?; - -            let element = match self.handle_event(event)? { -                EventHandlingResult::Element(element) => element, -                EventHandlingResult::Event(_) => { -                    continue; -                } -                EventHandlingResult::End => { -                    break; -                } -            }; - -            elements.push(element); -        } - -        Ok(elements.into()) -    } - -    fn parse_text(text: &BytesText) -> Result<String, Error> -    { -        String::from_utf8(text.to_vec()).map_err(|_| Error::TextNotUTF8) -    } - -    fn parse_tagged(&mut self, start: &BytesStart) -> Result<Element, Error> -    { -        let mut child_elements = Vec::new(); - -        let mut buf = Vec::new(); - -        loop { -            let event = self.reader.read_event_into(&mut buf)?; - -            match event { -                Event::End(end) if end.name() == start.name() => { -                    break; -                } -                event => match self.handle_event(event)? { -                    EventHandlingResult::Element(element) => { -                        child_elements.push(element); -                    } -                    EventHandlingResult::End => { -                        return Err(Error::UnexpectedEndOfFile); -                    } -                    EventHandlingResult::Event(_) => {} -                }, -            } -        } - -        Ok(Element::Tagged(Tagged::new( -            &String::from_utf8(start.name().as_ref().to_vec()) -                .map_err(|_| Error::TagNameNotUTF8)?, -            child_elements, -        ))) -    } - -    fn handle_event<'a>( -        &'a mut self, -        event: Event<'a>, -    ) -> Result<EventHandlingResult, Error> -    { -        match event { -            Event::Text(text) => Ok(EventHandlingResult::Element(Element::Text( -                Self::parse_text(&text)?, -            ))), -            Event::Start(start) => { -                Ok(EventHandlingResult::Element(self.parse_tagged(&start)?)) -            } -            Event::End(_) => Err(Error::UnexpectedTagEnd), -            Event::Eof => Ok(EventHandlingResult::End), -            Event::Comment(comment_text) => Ok(EventHandlingResult::Element( -                Element::Comment(Self::parse_text(&comment_text)?), -            )), -            event => Ok(EventHandlingResult::Event(event)), -        } -    } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error -{ -    #[error(transparent)] -    QuickXMLFailed(#[from] quick_xml::Error), - -    #[error("Text is not UTF-8")] -    TextNotUTF8, - -    #[error("Tag name is not UTF-8")] -    TagNameNotUTF8, - -    #[error("Unexpectedly found the end of a tag")] -    UnexpectedTagEnd, - -    #[error("Unexpected end of file")] -    UnexpectedEndOfFile, -} - -enum EventHandlingResult<'event> -{ -    Element(Element), -    Event(Event<'event>), -    End, -} - -#[cfg(test)] -mod tests -{ -    use pretty_assertions::assert_eq; - -    use super::*; - -    #[test] -    fn can_parse() -    { -        let mut parser = Parser::new("<foo>Hello there</foo>".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![Element::Text("Hello there".to_string())] -            ))]) -        ); - -        let mut parser = Parser::new("<foo><bar>123</bar> Hello</foo>".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![ -                    Element::Tagged(Tagged::new( -                        &"bar", -                        Elements::from(vec![Element::Text("123".to_string())]) -                    )), -                    Element::Text(" Hello".to_string()) -                ] -            ))]) -        ); - -        let mut parser = Parser::new("".as_bytes()); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(Vec::new()) -        ); - -        let mut parser = Parser::new( -            "<foo><!--XML is awful-->Hello there<bar>123</bar></foo>".as_bytes(), -        ); - -        assert_eq!( -            parser.parse().expect("Expected Ok"), -            Elements::from(vec![Element::Tagged(Tagged::new( -                &"foo", -                vec![ -                    Element::Comment("XML is awful".to_string()), -                    Element::Text("Hello there".to_string()), -                    Element::Tagged(Tagged::new( -                        &"bar", -                        vec![Element::Text("123".to_string())] -                    )), -                ] -            ))]) -        ); -    } -}  | 
