diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 2972e4d..a70e024 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,5 +1,6 @@ #![allow(dead_code)] mod types; +mod util; pub mod parser_types; pub mod packet_types; pub mod parser; diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 6c06d69..faf15f1 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -1,7 +1,7 @@ use types::*; use parser_types::*; +use util; use std::mem; -use std::slice; use std::os::raw::c_char; extern "C" { @@ -19,10 +19,10 @@ extern "C" { fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool; fn psyc_parse_value_length(state: *mut PsycParseState) -> usize; fn psyc_parse_value_length_found(state: *mut PsycParseState) -> bool; - fn psyc_parse_cursor(state: *mut PsycParseState) -> usize; - fn psyc_parse_buffer_length(state: *mut PsycParseState) -> usize; - fn psyc_parse_remaining_length(state: *mut PsycParseState) -> usize; - fn psyc_parse_remaining_buffer(state: *mut PsycParseState) -> *const c_char; + fn psyc_parse_cursor(state: *const PsycParseState) -> usize; + fn psyc_parse_buffer_length(state: *const PsycParseState) -> usize; + fn psyc_parse_remaining_length(state: *const PsycParseState) -> usize; + fn psyc_parse_remaining_buffer(state: *const PsycParseState) -> *const c_char; fn psyc_parse(state: *mut PsycParseState, oper: *mut c_char, name: *mut PsycString, @@ -60,12 +60,8 @@ extern "C" { fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize; } -pub struct PsycParser<'a> { - state: PsycParseState, - operator: char, - name: Option<&'a [u8]>, - buffer: Option<&'a [u8]>, - cursor: usize +pub struct PsycParser { + state: PsycParseState } //pub struct PsycListParser<'a> { @@ -74,11 +70,13 @@ pub struct PsycParser<'a> { // buffer: &'a [u8] //} // -//pub struct PsycDictParser<'a> { -// state: PsycParseDictState, -// parsed_dict: Vec<(Vec, Vec)>, -// buffer: &'a [u8] -//} +pub struct PsycDictParser<'a> { + state: PsycParseDictState, + parsed_key: Option<&'a [u8]>, + parsed_dict: Vec<(&'a [u8], &'a [u8])>, + buffer: Option<&'a [u8]>, + cursor: usize +} // //// TODO: What data structures does the index parser need? //pub struct PsycIndexParser { @@ -106,19 +104,64 @@ pub enum PsycParserResult<'a> { name: &'a [u8], value: &'a [u8] }, + EntityModifierStart { + operator: char, + name: &'a [u8], + value_part: &'a [u8] + }, + EntityModifierCont { + value_part: &'a [u8] + }, + EntityModifierEnd, Body { name: &'a [u8], value: &'a [u8] }, + BodyStart { + name: &'a [u8], + value_part: &'a [u8] + }, + BodyCont { + value_part: &'a [u8] + }, + BodyEnd } //#[derive(Debug, PartialEq)] -//pub enum PsycDictParserResult { +//pub enum PsycListParserResult { +// Complete, // InsufficientData, -// Dict { -// data: Vec<(&'a [u8], &'a[u8])> +// ListElement { +// value: &[u8] +// }, +// ListElementStart { +// value_part: &[u8] +// }, +// ListElementCont { +// value_part: &[u8] +// }, +// ListElementEnd { +// value_part: &[u8] // } //} +// +//#[derive(Debug, PartialEq)] +//pub enum PsycDictParserResult<'a> { +// Complete, +// InsufficientData, +// DictEntry { +// key: &'a [u8], +// value: &'a [u8] +// }, +// DictEntryStart { +// key: &'a [u8], +// value_part: &'a [u8] +// }, +// DictEntryCont { +// value_part: &'a [u8] +// }, +// DictEntryEnd +//} #[repr(C)] #[derive(Debug, PartialEq)] @@ -135,7 +178,32 @@ pub enum PsycParserError { GenericError = PsycParseRC::PSYC_PARSE_ERROR as _, } -impl<'a> PsycParser<'a> { +#[repr(C)] +#[derive(Debug, PartialEq)] +pub enum PsycListParserError { + NoElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_NO_LEN as _, + InvalidElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_LENGTH as _, + InvalidElementType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_TYPE as _, + InvalidElementStart = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_START as _, + InvalidType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_TYPE as _, + GenericError = PsycParseListRC::PSYC_PARSE_LIST_ERROR as _, +} + +#[repr(C)] +#[derive(Debug, PartialEq)] +pub enum PsycDictParserError { + InvalidValue = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE as _, + InvalidValueLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_LENGTH as _, + InvalidValueType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_TYPE as _, + InvalidValueStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_START as _, + InvalidKey = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY as _, + InvalidKeyLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_LENGTH as _, + InvalidKeyStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_START as _, + InvalidKeyType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_TYPE as _, + GenericError = PsycParseDictRC::PSYC_PARSE_DICT_ERROR as _, +} + +impl PsycParser { /// Create a PsycParser pub fn new() -> Self { let mut state: PsycParseState; @@ -144,40 +212,26 @@ impl<'a> PsycParser<'a> { let state_ptr = &mut state as *mut PsycParseState; psyc_parse_state_init(state_ptr, PsycParseFlag::PSYC_PARSE_ALL as u8) } - PsycParser{ + PsycParser { state: state, - operator: '\0', - name: None, - buffer: None, - cursor: 0 - } - } - - /// Set a buffer of raw bytes for parsing - pub fn set_buffer(&mut self, buffer: &'a [u8]) { - self.buffer = Some(buffer); - let state_ptr = &mut self.state as *mut PsycParseState; - let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char; - unsafe { - psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len() - self.cursor) } } /// Parse the buffer previously set by set_buffer. Call repeatedly until the /// result is PsycParserResult::Complete or a PsycParserError. - pub fn parse(&mut self) - -> Result, PsycParserError> { + pub fn parse<'a>(&mut self) + -> Result, PsycParserError> { let state_ptr = &mut self.state as *mut PsycParseState; + let mut operator = '\0'; let mut name: PsycString; let mut value: PsycString; unsafe { name = mem::uninitialized(); value = mem::uninitialized(); - let operator_ptr = &mut self.operator as *mut char as *mut c_char; + let operator_ptr = &mut operator as *mut char as *mut c_char; let name_ptr = &mut name as *mut PsycString; let value_ptr = &mut value as *mut PsycString; let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr); - self.cursor = self.cursor + psyc_parse_cursor(state_ptr); match parse_result { PsycParseRC::PSYC_PARSE_STATE_RESYNC => Ok(PsycParserResult::StateSync), @@ -188,67 +242,203 @@ impl<'a> PsycParser<'a> { PsycParseRC::PSYC_PARSE_COMPLETE => Ok(PsycParserResult::Complete), + PsycParseRC::PSYC_PARSE_INSUFFICIENT => + Ok(PsycParserResult::InsufficientData), + PsycParseRC::PSYC_PARSE_ROUTING => { let result = PsycParserResult::RoutingModifier { - operator: self.operator, - name: Self::cstring_to_slice(name.data, name.length), - value: Self::cstring_to_slice(value.data, value.length) + operator: operator, + name: util::cstring_to_slice(name.data, name.length), + value: util::cstring_to_slice(value.data, value.length) }; Ok(result) }, PsycParseRC::PSYC_PARSE_ENTITY => { let result = PsycParserResult::EntityModifier { - operator: self.operator, - name: Self::cstring_to_slice(name.data, name.length), - value: Self::cstring_to_slice(value.data, value.length) + operator: operator, + name: util::cstring_to_slice(name.data, name.length), + value: util::cstring_to_slice(value.data, value.length) }; Ok(result) }, + PsycParseRC::PSYC_PARSE_ENTITY_START => { + let result = PsycParserResult::EntityModifierStart { + operator: operator, + name: util::cstring_to_slice(name.data, name.length), + value_part: util::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_ENTITY_CONT => { + let result = PsycParserResult::EntityModifierCont { + value_part: util::cstring_to_slice(value.data, value.length) + }; + Ok(result) + }, + + PsycParseRC::PSYC_PARSE_ENTITY_END => + Ok(PsycParserResult::EntityModifierEnd), + PsycParseRC::PSYC_PARSE_BODY => { let result = PsycParserResult::Body { - name: Self::cstring_to_slice(name.data, name.length), - value: Self::cstring_to_slice(value.data, value.length) + name: util::cstring_to_slice(name.data, name.length), + value: util::cstring_to_slice(value.data, value.length) }; Ok(result) }, - PsycParseRC::PSYC_PARSE_ENTITY_START | PsycParseRC::PSYC_PARSE_BODY_START => { - self.name = Some(Self::cstring_to_slice(name.data, name.length)); - Ok(PsycParserResult::InsufficientData) - }, - - PsycParseRC::PSYC_PARSE_ENTITY_END => { - let result = PsycParserResult::EntityModifier { - operator: self.operator, - name: self.name.unwrap(), - value: Self::cstring_to_slice(value.data, value.length) + let result = PsycParserResult::BodyStart { + name: util::cstring_to_slice(name.data, name.length), + value_part: util::cstring_to_slice(value.data, value.length) }; Ok(result) }, - - PsycParseRC::PSYC_PARSE_BODY_END => { - let result = PsycParserResult::Body { - name: self.name.unwrap(), - value: Self::cstring_to_slice(value.data, value.length) - }; - Ok(result) - }, - - PsycParseRC::PSYC_PARSE_INSUFFICIENT | - PsycParseRC::PSYC_PARSE_ENTITY_CONT | PsycParseRC::PSYC_PARSE_BODY_CONT => { - Ok(PsycParserResult::InsufficientData) + let result = PsycParserResult::BodyCont { + value_part: util::cstring_to_slice(value.data, value.length) + }; + Ok(result) }, + PsycParseRC::PSYC_PARSE_BODY_END => + Ok(PsycParserResult::BodyEnd), + _error => Err(mem::transmute(_error)), } } } +} - unsafe fn cstring_to_slice(cstring: *const c_char, length: usize) -> &'a [u8] { - slice::from_raw_parts(cstring as *const u8, length) +impl Parser for PsycParser { + fn set_buffer(&mut self, buffer: &[u8]) { + let state_ptr = &mut self.state as *mut PsycParseState; + let buffer_ptr = buffer.as_ptr() as *const c_char; + unsafe { + psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len()) + } + } + + fn unparsed_position(&self) -> usize { + unsafe { + psyc_parse_cursor(&self.state as *const PsycParseState) + } + } + + fn unparsed_length(&self) -> usize { + unsafe { + psyc_parse_remaining_length(&self.state as *const PsycParseState) + } } } + +//impl<'a> PsycDictParser<'a> { +// /// Create a PsycDictParser +// pub fn new() -> Self { +// let mut state: PsycParseDictState; +// unsafe { +// state = mem::uninitialized(); +// let state_ptr = &mut state as *mut PsycParseDictState; +// psyc_parse_dict_state_init(state_ptr) +// } +// PsycDictParser { +// state: state, +// parsed_key: None, +// parsed_dict: vec![], +// buffer: None, +// cursor: 0 +// } +// } +// +// /// Set a buffer of raw bytes for parsing +// pub fn set_buffer(&mut self, buffer: &'a [u8]) { +// self.buffer = Some(buffer); +// let state_ptr = &mut self.state as *mut PsycParseDictState; +// let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char; +// unsafe { +// psyc_parse_dict_buffer_set(state_ptr, buffer_ptr, buffer.len()) +// } +// } +// +// /// Parse the buffer previously set by set_buffer. Call repeatedly until the +// /// result is PsycDictParserResult::Complete or a PsycDictParserError +// pub fn parse(&mut self) +// -> Result, PsycDictParserError> { +// let state_ptr = &mut self.state as *mut PsycParseDictState; +// let mut list_type: PsycString; +// let mut element: PsycString; +// unsafe { +// list_type = mem::uninitialized(); +// element = mem::uninitialized(); +// let list_type_ptr = &mut list_type as *mut PsycString; +// let element_ptr = &mut element as *mut PsycString; +// loop { +// let parse_result = psyc_parse_dict(state_ptr, list_type_ptr, element_ptr); +// self.cursor = self.cursor + self.state.cursor; +// println!("parse_result: {:?}", parse_result); +// match parse_result { +// PsycParseDictRC::PSYC_PARSE_DICT_KEY_END | +// PsycParseDictRC::PSYC_PARSE_DICT_KEY => { +// let key = util::cstring_to_slice(element.data, element.length); +// self.parsed_key = Some(key) +// }, +// +// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_END | +// PsycParseDictRC::PSYC_PARSE_DICT_VALUE => { +// let value = util::cstring_to_slice(element.data, element.length); +// self.parsed_dict.push((self.parsed_key.unwrap(), value)) +// }, +// +// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_LAST => { +// let value = util::cstring_to_slice(element.data, element.length); +// self.parsed_dict.push((self.parsed_key.unwrap(), value)); +// let complete = PsycDictParserResult::Complete { +// dict: self.parsed_dict.clone() +// }; +// return Ok(complete); +// }, +// +// PsycParseDictRC::PSYC_PARSE_DICT_INSUFFICIENT | +// PsycParseDictRC::PSYC_PARSE_DICT_KEY_START | +// PsycParseDictRC::PSYC_PARSE_DICT_KEY_CONT | +// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_START | +// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_CONT | +// PsycParseDictRC::PSYC_PARSE_DICT_END /* FIXME: correct? */=> { +// return Ok(PsycDictParserResult::InsufficientData); +// }, +// +// PsycParseDictRC::PSYC_PARSE_DICT_TYPE => (), +// +// _error => { +// return Err(mem::transmute(_error)); +// }, +// } +// } +// } +// } +//} + +pub trait Parser { + /// Set a buffer of raw bytes for parsing + fn set_buffer(&mut self, buffer: &[u8]); + + /// copies the remaining unparsed bytes to the beginning of the given buffer. + /// Returns the number of copied bytes. Must be called when parse() returned + /// InsufficientData as Result. + fn copy_unparsed_into_buffer(&self, buffer: &mut [u8]) -> usize { + let unparsed_pos = self.unparsed_position(); + let unparsed_len = self.unparsed_length(); + if unparsed_pos != 0 { + let copy_pos_second = unparsed_pos - unparsed_len; + let (part1, part2) = buffer.split_at_mut(unparsed_len); + part1.copy_from_slice(&part2[copy_pos_second .. copy_pos_second + unparsed_len]); + } + unparsed_len + } + + fn unparsed_position(&self) -> usize; + fn unparsed_length(&self) -> usize; +} diff --git a/rust/src/parser_types.rs b/rust/src/parser_types.rs index 5e63915..7b75a64 100644 --- a/rust/src/parser_types.rs +++ b/rust/src/parser_types.rs @@ -26,7 +26,7 @@ pub struct PsycParseState { #[repr(C)] pub struct PsycParseListState { buffer: PsycString, - cursor: usize, + pub cursor: usize, startc: usize, list_type: PsycString, elemlen: usize, @@ -38,7 +38,7 @@ pub struct PsycParseListState { #[repr(C)] pub struct PsycParseDictState { buffer: PsycString, - cursor: usize, + pub cursor: usize, startc: usize, elemlen: usize, elem_parsed: usize, @@ -49,7 +49,7 @@ pub struct PsycParseDictState { #[repr(C)] pub struct PsycParseIndexState { buffer: PsycString, - cursor: usize, + pub cursor: usize, startc: usize, elemlen: usize, elem_parsed: usize, @@ -60,7 +60,7 @@ pub struct PsycParseIndexState { #[repr(C)] pub struct PsycParseUpdateState { buffer: PsycString, - cursor: usize, + pub cursor: usize, startc: usize, elemlen: usize, elem_parsed: usize, @@ -181,6 +181,7 @@ pub enum PsycParseListRC { PSYC_PARSE_LIST_END = 8, } +#[derive(Debug)] #[repr(C)] pub enum PsycParseDictRC { PSYC_PARSE_DICT_ERROR_VALUE = -9, diff --git a/rust/src/util.rs b/rust/src/util.rs new file mode 100644 index 0000000..ebb2ec4 --- /dev/null +++ b/rust/src/util.rs @@ -0,0 +1,6 @@ +use std::slice; +use std::os::raw::c_char; + +pub unsafe fn cstring_to_slice<'a>(cstring: *const c_char, length: usize) -> &'a[u8] { + slice::from_raw_parts(cstring as *const u8, length) +} diff --git a/rust/tests/test_parser.rs b/rust/tests/test_parser.rs index cbbbac5..295650a 100644 --- a/rust/tests/test_parser.rs +++ b/rust/tests/test_parser.rs @@ -7,8 +7,8 @@ fn test_parse() { let expected = vec![PsycParserResult::RoutingModifier{ operator: ':', - name: &test_data[1 .. 8], - value: &test_data[9 .. 36], + name: "_target".as_bytes(), + value: "psyc://ve.symlynx.com/@blog".as_bytes(), }, PsycParserResult::StateSync]; @@ -23,34 +23,39 @@ fn test_parse() { #[test] fn test_insufficient() { - let test_data = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick\tlurchi\n|\n".to_string().into_bytes(); + let mut test_data1 = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick".to_string().into_bytes(); + let mut test_data2 = "\tlurchi\n|\n".to_string().into_bytes(); let expected = vec![PsycParserResult::InsufficientData, - PsycParserResult::RoutingModifier { - operator: ':', - name: &test_data[1 .. 8], - value: &test_data[9 .. 36] - }, - PsycParserResult::InsufficientData, - PsycParserResult::EntityModifier{ - operator: ':', - name: &test_data[39 .. 44], - value: &test_data[45 .. 51], - }, - PsycParserResult::Complete]; - + PsycParserResult::RoutingModifier { + operator: ':', + name: "_target".as_bytes(), + value: "psyc://ve.symlynx.com/@blog".as_bytes() + }, + PsycParserResult::InsufficientData, + PsycParserResult::EntityModifier{ + operator: ':', + name: "_nick".as_bytes(), + value: "lurchi".as_bytes(), + }, + PsycParserResult::Complete]; + let mut parser = PsycParser::new(); - parser.set_buffer(&test_data[.. 1]); + parser.set_buffer(&test_data1[.. 1]); assert_eq!(parser.parse().unwrap(), expected[0]); - - parser.set_buffer(&test_data[.. 46]); + + let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1); + assert_eq!(unparsed_length, 1); + parser.set_buffer(&test_data1[.. 44]); assert_eq!(parser.parse().unwrap(), expected[1]); - - parser.set_buffer(&test_data[.. 49]); + assert_eq!(parser.parse().unwrap(), expected[2]); - parser.set_buffer(&test_data); + let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1); + test_data1.resize(unparsed_length, 0); + test_data1.append(&mut test_data2); + parser.set_buffer(&test_data1); assert_eq!(parser.parse().unwrap(), expected[3]); assert_eq!(parser.parse().unwrap(), expected[4]);