From 2003e77ae3336052b792d44d2cd9ba3afd4b98c1 Mon Sep 17 00:00:00 2001 From: lurchi Date: Mon, 29 Aug 2016 01:00:31 +0200 Subject: [PATCH] improve parser: incomplete data is handled internally --- rust/src/parser.rs | 254 +++++++++++++++++++------------------- rust/src/parser_types.rs | 1 + rust/tests/test_parser.rs | 68 ++++++++-- 3 files changed, 189 insertions(+), 134 deletions(-) diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 9d9620c..064a374 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -7,14 +7,14 @@ use std::os::raw::c_char; extern "C" { fn psyc_parse_state_init(state: *mut PsycParseState, flags: u8); fn psyc_parse_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); - fn psyc_parse_list_state_init(state: *mut PsycParseState); - fn psyc_parse_list_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); - fn psyc_parse_dict_state_init(state: *mut PsycParseState); - fn psyc_parse_dict_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); - fn psyc_parse_index_state_init(state: *mut PsycParseState); - fn psyc_parse_index_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); - fn psyc_parse_update_state_init(state: *mut PsycParseState); - fn psyc_parse_update_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); + fn psyc_parse_list_state_init(state: *mut PsycParseListState); + fn psyc_parse_list_buffer_set(state: *mut PsycParseListState, buffer: *const c_char, length: usize); + fn psyc_parse_dict_state_init(state: *mut PsycParseDictState); + fn psyc_parse_dict_buffer_set(state: *mut PsycParseDictState, buffer: *const c_char, length: usize); + fn psyc_parse_index_state_init(state: *mut PsycParseIndexState); + fn psyc_parse_index_buffer_set(state: *mut PsycParseIndexState, buffer: *const c_char, length: usize); + fn psyc_parse_update_state_init(state: *mut PsycParseUpdateState); + fn psyc_parse_update_buffer_set(state: *mut PsycParseUpdateState, buffer: *const c_char, length: usize); fn psyc_parse_content_length(state: *mut PsycParseState) -> usize; fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool; fn psyc_parse_value_length(state: *mut PsycParseState) -> usize; @@ -60,61 +60,66 @@ extern "C" { fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize; } -pub struct PsycParser { - state: PsycParseState +pub struct PsycParser<'a> { + state: PsycParseState, + operator: char, + name: Option<&'a [u8]>, + buffer: Option<&'a [u8]>, + cursor: usize } -pub struct PsycListParser { - state: PsycParseListState -} - -pub struct PsycDictParser { - state: PsycParseDictState -} - -pub struct PsycIndexParser { - state: PsycParseIndexState -} - -pub struct PsycUpdateParser { - state: PsycParseUpdateState -} +//pub struct PsycListParser<'a> { +// state: PsycParseListState, +// parsed_list: Vec>, +// buffer: &'a [u8] +//} +// +//pub struct PsycDictParser<'a> { +// state: PsycParseDictState, +// parsed_dict: Vec<(Vec, Vec)>, +// buffer: &'a [u8] +//} +// +//// TODO: What data structures does the index parser need? +//pub struct PsycIndexParser { +// state: PsycParseIndexState +//} +// +//// TODO: what data structures does the update parser need? +//pub struct PsycUpdateParser { +// state: PsycParseUpdateState +//} #[derive(Debug, PartialEq)] -pub enum PsycParserResult { +pub enum PsycParserResult<'a> { StateSync, StateReset, - ParsingComplete, + Complete, + InsufficientData, RoutingModifier { operator: char, - name: String, - value: Vec + name: &'a [u8], + value: &'a [u8] }, EntityModifier { operator: char, - name: String, - value: Vec - }, - IncompleteEntityModifier { - operator: char, - name: String, - value: Vec, - cursor: usize + name: &'a [u8], + value: &'a [u8] }, Body { - name: String, - value: Vec - }, - IncompleteBody { - name: String, - value: Vec, - cursor: usize - }, - InsufficientData { - cursor: usize + name: &'a [u8], + value: &'a [u8] }, } +//#[derive(Debug, PartialEq)] +//pub enum PsycDictParserResult { +// InsufficientData, +// Dict { +// data: Vec<(&'a [u8], &'a[u8])> +// } +//} + #[repr(C)] #[derive(Debug, PartialEq)] pub enum PsycParserError { @@ -130,7 +135,7 @@ pub enum PsycParserError { GenericError = PsycParseRC::PSYC_PARSE_ERROR as _, } -impl PsycParser { +impl<'a> PsycParser<'a> { /// Create a PsycParser pub fn new() -> Self { let mut state: PsycParseState; @@ -139,96 +144,41 @@ impl PsycParser { let state_ptr = &mut state as *mut PsycParseState; psyc_parse_state_init(state_ptr, PsycParseFlag::PSYC_PARSE_ALL as u8) } - PsycParser{state: state} + PsycParser{ + state: state, + operator: '\0', + name: None, + buffer: None, + cursor: 0 + } } /// Set a buffer of raw bytes for parsing - pub fn set_buffer(&mut self, buffer: &[u8]) { + pub fn set_buffer(&mut self, buffer: &'a [u8]) { + self.buffer = Some(buffer); let state_ptr = &mut self.state as *mut PsycParseState; - let buffer_ptr = &buffer[0] as *const u8 as *const c_char; + let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char; unsafe { - psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len()) + psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len() - self.cursor) } } /// Parse the buffer previously set by set_buffer. Call repeatedly until the - /// result is PsycParserResult::ParsingComplete or a PsycParserError. - pub fn parse(&mut self) -> Result { + /// result is PsycParserResult::Complete or a PsycParserError. + pub fn parse(&mut self) + -> Result { let state_ptr = &mut self.state as *mut PsycParseState; - let mut operator: char; let mut name: PsycString; let mut value: PsycString; unsafe { - operator = mem::uninitialized(); name = mem::uninitialized(); value = mem::uninitialized(); - let operator_ptr = &mut operator as *mut char as *mut c_char; + let operator_ptr = &mut self.operator as *mut char as *mut c_char; let name_ptr = &mut name as *mut PsycString; let value_ptr = &mut value as *mut PsycString; let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr); + self.cursor = self.cursor + psyc_parse_cursor(state_ptr); match parse_result { - PsycParseRC::PSYC_PARSE_INSUFFICIENT => { - let result = - PsycParserResult::InsufficientData { - cursor: psyc_parse_cursor(state_ptr) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_ROUTING => { - let result = - PsycParserResult::RoutingModifier { - operator: operator, - name: Self::cstring_to_string(name.data, name.length), - value: Self::cstring_to_bytes(value.data, value.length) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_ENTITY | - PsycParseRC::PSYC_PARSE_ENTITY_END => { - let result = - PsycParserResult::EntityModifier { - operator: operator, - name: Self::cstring_to_string(name.data, name.length), - value: Self::cstring_to_bytes(value.data, value.length) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_ENTITY_START | - PsycParseRC::PSYC_PARSE_ENTITY_CONT => { - let result = - PsycParserResult::IncompleteEntityModifier { - operator: operator, - name: Self::cstring_to_string(name.data, name.length), - value: Self::cstring_to_bytes(value.data, value.length), - cursor: psyc_parse_cursor(state_ptr) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_BODY | - PsycParseRC::PSYC_PARSE_BODY_END => { - let result = - PsycParserResult::Body { - name: Self::cstring_to_string(name.data, name.length), - value: Self::cstring_to_bytes(value.data, value.length) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_BODY_START | - PsycParseRC::PSYC_PARSE_BODY_CONT => { - let result = - PsycParserResult::IncompleteBody { - name: Self::cstring_to_string(name.data, name.length), - value: Self::cstring_to_bytes(value.data, value.length), - cursor: psyc_parse_cursor(state_ptr) - }; - Ok(result) - }, - PsycParseRC::PSYC_PARSE_STATE_RESYNC => Ok(PsycParserResult::StateSync), @@ -236,19 +186,69 @@ impl PsycParser { Ok(PsycParserResult::StateReset), PsycParseRC::PSYC_PARSE_COMPLETE => - Ok(PsycParserResult::ParsingComplete), + Ok(PsycParserResult::Complete), + + PsycParseRC::PSYC_PARSE_ROUTING => { + let result = PsycParserResult::RoutingModifier { + operator: self.operator, + name: Self::cstring_to_slice(name.data, name.length), + value: Self::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_ENTITY => { + let result = PsycParserResult::EntityModifier { + operator: self.operator, + name: Self::cstring_to_slice(name.data, name.length), + value: Self::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_BODY => { + let result = PsycParserResult::Body { + name: Self::cstring_to_slice(name.data, name.length), + value: Self::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_ENTITY_START | + PsycParseRC::PSYC_PARSE_BODY_START => { + self.name = Some(Self::cstring_to_slice(name.data, name.length)); + Ok(PsycParserResult::InsufficientData) + }, + + PsycParseRC::PSYC_PARSE_ENTITY_END => { + let result = PsycParserResult::EntityModifier { + operator: self.operator, + name: self.name.unwrap(), + value: Self::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_BODY_END => { + let result = PsycParserResult::Body { + name: self.name.unwrap(), + value: Self::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_INSUFFICIENT | + PsycParseRC::PSYC_PARSE_ENTITY_CONT | + PsycParseRC::PSYC_PARSE_BODY_CONT => { + Ok(PsycParserResult::InsufficientData) + }, _error => Err(mem::transmute(_error)), } } } - unsafe fn cstring_to_string(cstring: *const c_char, length: usize) -> String { - let vec = Self::cstring_to_bytes(cstring, length); - String::from_utf8(vec).unwrap() - } - - unsafe fn cstring_to_bytes(cstring: *const c_char, length: usize) -> Vec { - slice::from_raw_parts(cstring as *const u8, length).to_vec() + unsafe fn cstring_to_slice(cstring: *const c_char, length: usize) -> &'a [u8] { + slice::from_raw_parts(cstring as *const u8, length) } } diff --git a/rust/src/parser_types.rs b/rust/src/parser_types.rs index 3881cf6..5e63915 100644 --- a/rust/src/parser_types.rs +++ b/rust/src/parser_types.rs @@ -79,6 +79,7 @@ pub enum PsycParseFlag { PSYC_PARSE_START_AT_CONTENT = 2, } +#[derive(Debug)] #[repr(C)] pub enum PsycParseRC { /// Error, no length is set for a modifier which is longer than PSYC_MODIFIER_SIZE_THRESHOLD. diff --git a/rust/tests/test_parser.rs b/rust/tests/test_parser.rs index 4b11bb4..cd5dfba 100644 --- a/rust/tests/test_parser.rs +++ b/rust/tests/test_parser.rs @@ -1,5 +1,4 @@ extern crate psyc; - use psyc::parser::*; #[test] @@ -9,18 +8,73 @@ fn test_parse() { let expected1 = PsycParserResult::RoutingModifier{ operator: ':', - name: "_target".to_string(), - value: "psyc://ve.symlynx.com/@blog".to_string().into_bytes() + name: &test_data[1 .. 8], + value: &test_data[9 .. 36], }; let expected2 = PsycParserResult::StateSync; let mut parser = PsycParser::new(); + parser.set_buffer(&test_data); - let result1 = parser.parse(); - let result2 = parser.parse(); + { + let result1 = parser.parse(); + assert_eq!(result1, Ok(expected1)); + } - assert_eq!(result1, Ok(expected1)); - assert_eq!(result2, Ok(expected2)); + { + let result2 = parser.parse(); + assert_eq!(result2, Ok(expected2)); + } +} + +#[test] +fn test_insufficient() { + let test_data = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick\tlurchi\n|\n".to_string().into_bytes(); + + let expected1 = PsycParserResult::InsufficientData; + let expected2 =PsycParserResult::RoutingModifier { + operator: ':', + name: &test_data[1 .. 8], + value: &test_data[9 .. 36] + }; + let expected3 = PsycParserResult::InsufficientData; + let expected4 = PsycParserResult::EntityModifier{ + operator: ':', + name: &test_data[39 .. 44], + value: &test_data[45 .. 51], + }; + let expected5 = PsycParserResult::Complete; + + let mut parser = PsycParser::new(); + + { + parser.set_buffer(&test_data[.. 1]); + let result1 = parser.parse(); + assert_eq!(result1, Ok(expected1)); + } + + { + parser.set_buffer(&test_data[.. 46]); + let result2 = parser.parse(); + assert_eq!(result2, Ok(expected2)); + } + + { + parser.set_buffer(&test_data[.. 49]); + let result3 = parser.parse(); + assert_eq!(result3, Ok(expected3)); + } + + { + parser.set_buffer(&test_data); + let result4 = parser.parse(); + assert_eq!(result4, Ok(expected4)); + } + + { + let result5 = parser.parse(); + assert_eq!(result5, Ok(expected5)); + } }