change PsycParser API to make it possible to parse large packets and process them partially; prepare PsycDictParser

This commit is contained in:
lurchi 2016-08-31 19:14:41 +02:00
parent ec261f3e20
commit d12047ee18
5 changed files with 300 additions and 97 deletions

View File

@ -1,5 +1,6 @@
#![allow(dead_code)]
mod types;
mod util;
pub mod parser_types;
pub mod packet_types;
pub mod parser;

View File

@ -1,7 +1,7 @@
use types::*;
use parser_types::*;
use util;
use std::mem;
use std::slice;
use std::os::raw::c_char;
extern "C" {
@ -19,10 +19,10 @@ extern "C" {
fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool;
fn psyc_parse_value_length(state: *mut PsycParseState) -> usize;
fn psyc_parse_value_length_found(state: *mut PsycParseState) -> bool;
fn psyc_parse_cursor(state: *mut PsycParseState) -> usize;
fn psyc_parse_buffer_length(state: *mut PsycParseState) -> usize;
fn psyc_parse_remaining_length(state: *mut PsycParseState) -> usize;
fn psyc_parse_remaining_buffer(state: *mut PsycParseState) -> *const c_char;
fn psyc_parse_cursor(state: *const PsycParseState) -> usize;
fn psyc_parse_buffer_length(state: *const PsycParseState) -> usize;
fn psyc_parse_remaining_length(state: *const PsycParseState) -> usize;
fn psyc_parse_remaining_buffer(state: *const PsycParseState) -> *const c_char;
fn psyc_parse(state: *mut PsycParseState,
oper: *mut c_char,
name: *mut PsycString,
@ -60,12 +60,8 @@ extern "C" {
fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize;
}
pub struct PsycParser<'a> {
state: PsycParseState,
operator: char,
name: Option<&'a [u8]>,
buffer: Option<&'a [u8]>,
cursor: usize
pub struct PsycParser {
state: PsycParseState
}
//pub struct PsycListParser<'a> {
@ -74,11 +70,13 @@ pub struct PsycParser<'a> {
// buffer: &'a [u8]
//}
//
//pub struct PsycDictParser<'a> {
// state: PsycParseDictState,
// parsed_dict: Vec<(Vec<u8>, Vec<u8>)>,
// buffer: &'a [u8]
//}
pub struct PsycDictParser<'a> {
state: PsycParseDictState,
parsed_key: Option<&'a [u8]>,
parsed_dict: Vec<(&'a [u8], &'a [u8])>,
buffer: Option<&'a [u8]>,
cursor: usize
}
//
//// TODO: What data structures does the index parser need?
//pub struct PsycIndexParser {
@ -106,19 +104,64 @@ pub enum PsycParserResult<'a> {
name: &'a [u8],
value: &'a [u8]
},
EntityModifierStart {
operator: char,
name: &'a [u8],
value_part: &'a [u8]
},
EntityModifierCont {
value_part: &'a [u8]
},
EntityModifierEnd,
Body {
name: &'a [u8],
value: &'a [u8]
},
BodyStart {
name: &'a [u8],
value_part: &'a [u8]
},
BodyCont {
value_part: &'a [u8]
},
BodyEnd
}
//#[derive(Debug, PartialEq)]
//pub enum PsycDictParserResult {
//pub enum PsycListParserResult {
// Complete,
// InsufficientData,
// Dict {
// data: Vec<(&'a [u8], &'a[u8])>
// ListElement {
// value: &[u8]
// },
// ListElementStart {
// value_part: &[u8]
// },
// ListElementCont {
// value_part: &[u8]
// },
// ListElementEnd {
// value_part: &[u8]
// }
//}
//
//#[derive(Debug, PartialEq)]
//pub enum PsycDictParserResult<'a> {
// Complete,
// InsufficientData,
// DictEntry {
// key: &'a [u8],
// value: &'a [u8]
// },
// DictEntryStart {
// key: &'a [u8],
// value_part: &'a [u8]
// },
// DictEntryCont {
// value_part: &'a [u8]
// },
// DictEntryEnd
//}
#[repr(C)]
#[derive(Debug, PartialEq)]
@ -135,7 +178,32 @@ pub enum PsycParserError {
GenericError = PsycParseRC::PSYC_PARSE_ERROR as _,
}
impl<'a> PsycParser<'a> {
#[repr(C)]
#[derive(Debug, PartialEq)]
pub enum PsycListParserError {
NoElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_NO_LEN as _,
InvalidElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_LENGTH as _,
InvalidElementType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_TYPE as _,
InvalidElementStart = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_START as _,
InvalidType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_TYPE as _,
GenericError = PsycParseListRC::PSYC_PARSE_LIST_ERROR as _,
}
#[repr(C)]
#[derive(Debug, PartialEq)]
pub enum PsycDictParserError {
InvalidValue = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE as _,
InvalidValueLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_LENGTH as _,
InvalidValueType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_TYPE as _,
InvalidValueStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_START as _,
InvalidKey = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY as _,
InvalidKeyLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_LENGTH as _,
InvalidKeyStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_START as _,
InvalidKeyType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_TYPE as _,
GenericError = PsycParseDictRC::PSYC_PARSE_DICT_ERROR as _,
}
impl PsycParser {
/// Create a PsycParser
pub fn new() -> Self {
let mut state: PsycParseState;
@ -144,40 +212,26 @@ impl<'a> PsycParser<'a> {
let state_ptr = &mut state as *mut PsycParseState;
psyc_parse_state_init(state_ptr, PsycParseFlag::PSYC_PARSE_ALL as u8)
}
PsycParser{
PsycParser {
state: state,
operator: '\0',
name: None,
buffer: None,
cursor: 0
}
}
/// Set a buffer of raw bytes for parsing
pub fn set_buffer(&mut self, buffer: &'a [u8]) {
self.buffer = Some(buffer);
let state_ptr = &mut self.state as *mut PsycParseState;
let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char;
unsafe {
psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len() - self.cursor)
}
}
/// Parse the buffer previously set by set_buffer. Call repeatedly until the
/// result is PsycParserResult::Complete or a PsycParserError.
pub fn parse(&mut self)
-> Result<PsycParserResult<'a>, PsycParserError> {
pub fn parse<'a>(&mut self)
-> Result<PsycParserResult<'a>, PsycParserError> {
let state_ptr = &mut self.state as *mut PsycParseState;
let mut operator = '\0';
let mut name: PsycString;
let mut value: PsycString;
unsafe {
name = mem::uninitialized();
value = mem::uninitialized();
let operator_ptr = &mut self.operator as *mut char as *mut c_char;
let operator_ptr = &mut operator as *mut char as *mut c_char;
let name_ptr = &mut name as *mut PsycString;
let value_ptr = &mut value as *mut PsycString;
let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr);
self.cursor = self.cursor + psyc_parse_cursor(state_ptr);
match parse_result {
PsycParseRC::PSYC_PARSE_STATE_RESYNC =>
Ok(PsycParserResult::StateSync),
@ -188,67 +242,203 @@ impl<'a> PsycParser<'a> {
PsycParseRC::PSYC_PARSE_COMPLETE =>
Ok(PsycParserResult::Complete),
PsycParseRC::PSYC_PARSE_INSUFFICIENT =>
Ok(PsycParserResult::InsufficientData),
PsycParseRC::PSYC_PARSE_ROUTING => {
let result = PsycParserResult::RoutingModifier {
operator: self.operator,
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
operator: operator,
name: util::cstring_to_slice(name.data, name.length),
value: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY => {
let result = PsycParserResult::EntityModifier {
operator: self.operator,
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
operator: operator,
name: util::cstring_to_slice(name.data, name.length),
value: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_START => {
let result = PsycParserResult::EntityModifierStart {
operator: operator,
name: util::cstring_to_slice(name.data, name.length),
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_CONT => {
let result = PsycParserResult::EntityModifierCont {
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_END =>
Ok(PsycParserResult::EntityModifierEnd),
PsycParseRC::PSYC_PARSE_BODY => {
let result = PsycParserResult::Body {
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
name: util::cstring_to_slice(name.data, name.length),
value: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_START |
PsycParseRC::PSYC_PARSE_BODY_START => {
self.name = Some(Self::cstring_to_slice(name.data, name.length));
Ok(PsycParserResult::InsufficientData)
},
PsycParseRC::PSYC_PARSE_ENTITY_END => {
let result = PsycParserResult::EntityModifier {
operator: self.operator,
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
let result = PsycParserResult::BodyStart {
name: util::cstring_to_slice(name.data, name.length),
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY_END => {
let result = PsycParserResult::Body {
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_INSUFFICIENT |
PsycParseRC::PSYC_PARSE_ENTITY_CONT |
PsycParseRC::PSYC_PARSE_BODY_CONT => {
Ok(PsycParserResult::InsufficientData)
let result = PsycParserResult::BodyCont {
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY_END =>
Ok(PsycParserResult::BodyEnd),
_error => Err(mem::transmute(_error)),
}
}
}
}
unsafe fn cstring_to_slice(cstring: *const c_char, length: usize) -> &'a [u8] {
slice::from_raw_parts(cstring as *const u8, length)
impl Parser for PsycParser {
fn set_buffer(&mut self, buffer: &[u8]) {
let state_ptr = &mut self.state as *mut PsycParseState;
let buffer_ptr = buffer.as_ptr() as *const c_char;
unsafe {
psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len())
}
}
fn unparsed_position(&self) -> usize {
unsafe {
psyc_parse_cursor(&self.state as *const PsycParseState)
}
}
fn unparsed_length(&self) -> usize {
unsafe {
psyc_parse_remaining_length(&self.state as *const PsycParseState)
}
}
}
//impl<'a> PsycDictParser<'a> {
// /// Create a PsycDictParser
// pub fn new() -> Self {
// let mut state: PsycParseDictState;
// unsafe {
// state = mem::uninitialized();
// let state_ptr = &mut state as *mut PsycParseDictState;
// psyc_parse_dict_state_init(state_ptr)
// }
// PsycDictParser {
// state: state,
// parsed_key: None,
// parsed_dict: vec![],
// buffer: None,
// cursor: 0
// }
// }
//
// /// Set a buffer of raw bytes for parsing
// pub fn set_buffer(&mut self, buffer: &'a [u8]) {
// self.buffer = Some(buffer);
// let state_ptr = &mut self.state as *mut PsycParseDictState;
// let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char;
// unsafe {
// psyc_parse_dict_buffer_set(state_ptr, buffer_ptr, buffer.len())
// }
// }
//
// /// Parse the buffer previously set by set_buffer. Call repeatedly until the
// /// result is PsycDictParserResult::Complete or a PsycDictParserError
// pub fn parse(&mut self)
// -> Result<PsycDictParserResult<'a>, PsycDictParserError> {
// let state_ptr = &mut self.state as *mut PsycParseDictState;
// let mut list_type: PsycString;
// let mut element: PsycString;
// unsafe {
// list_type = mem::uninitialized();
// element = mem::uninitialized();
// let list_type_ptr = &mut list_type as *mut PsycString;
// let element_ptr = &mut element as *mut PsycString;
// loop {
// let parse_result = psyc_parse_dict(state_ptr, list_type_ptr, element_ptr);
// self.cursor = self.cursor + self.state.cursor;
// println!("parse_result: {:?}", parse_result);
// match parse_result {
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_END |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY => {
// let key = util::cstring_to_slice(element.data, element.length);
// self.parsed_key = Some(key)
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_END |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE => {
// let value = util::cstring_to_slice(element.data, element.length);
// self.parsed_dict.push((self.parsed_key.unwrap(), value))
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_LAST => {
// let value = util::cstring_to_slice(element.data, element.length);
// self.parsed_dict.push((self.parsed_key.unwrap(), value));
// let complete = PsycDictParserResult::Complete {
// dict: self.parsed_dict.clone()
// };
// return Ok(complete);
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_INSUFFICIENT |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_START |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_CONT |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_START |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_CONT |
// PsycParseDictRC::PSYC_PARSE_DICT_END /* FIXME: correct? */=> {
// return Ok(PsycDictParserResult::InsufficientData);
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_TYPE => (),
//
// _error => {
// return Err(mem::transmute(_error));
// },
// }
// }
// }
// }
//}
pub trait Parser {
/// Set a buffer of raw bytes for parsing
fn set_buffer(&mut self, buffer: &[u8]);
/// copies the remaining unparsed bytes to the beginning of the given buffer.
/// Returns the number of copied bytes. Must be called when parse() returned
/// InsufficientData as Result.
fn copy_unparsed_into_buffer(&self, buffer: &mut [u8]) -> usize {
let unparsed_pos = self.unparsed_position();
let unparsed_len = self.unparsed_length();
if unparsed_pos != 0 {
let copy_pos_second = unparsed_pos - unparsed_len;
let (part1, part2) = buffer.split_at_mut(unparsed_len);
part1.copy_from_slice(&part2[copy_pos_second .. copy_pos_second + unparsed_len]);
}
unparsed_len
}
fn unparsed_position(&self) -> usize;
fn unparsed_length(&self) -> usize;
}

View File

@ -26,7 +26,7 @@ pub struct PsycParseState {
#[repr(C)]
pub struct PsycParseListState {
buffer: PsycString,
cursor: usize,
pub cursor: usize,
startc: usize,
list_type: PsycString,
elemlen: usize,
@ -38,7 +38,7 @@ pub struct PsycParseListState {
#[repr(C)]
pub struct PsycParseDictState {
buffer: PsycString,
cursor: usize,
pub cursor: usize,
startc: usize,
elemlen: usize,
elem_parsed: usize,
@ -49,7 +49,7 @@ pub struct PsycParseDictState {
#[repr(C)]
pub struct PsycParseIndexState {
buffer: PsycString,
cursor: usize,
pub cursor: usize,
startc: usize,
elemlen: usize,
elem_parsed: usize,
@ -60,7 +60,7 @@ pub struct PsycParseIndexState {
#[repr(C)]
pub struct PsycParseUpdateState {
buffer: PsycString,
cursor: usize,
pub cursor: usize,
startc: usize,
elemlen: usize,
elem_parsed: usize,
@ -181,6 +181,7 @@ pub enum PsycParseListRC {
PSYC_PARSE_LIST_END = 8,
}
#[derive(Debug)]
#[repr(C)]
pub enum PsycParseDictRC {
PSYC_PARSE_DICT_ERROR_VALUE = -9,

6
rust/src/util.rs Normal file
View File

@ -0,0 +1,6 @@
use std::slice;
use std::os::raw::c_char;
pub unsafe fn cstring_to_slice<'a>(cstring: *const c_char, length: usize) -> &'a[u8] {
slice::from_raw_parts(cstring as *const u8, length)
}

View File

@ -7,8 +7,8 @@ fn test_parse() {
let expected = vec![PsycParserResult::RoutingModifier{
operator: ':',
name: &test_data[1 .. 8],
value: &test_data[9 .. 36],
name: "_target".as_bytes(),
value: "psyc://ve.symlynx.com/@blog".as_bytes(),
},
PsycParserResult::StateSync];
@ -23,34 +23,39 @@ fn test_parse() {
#[test]
fn test_insufficient() {
let test_data = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick\tlurchi\n|\n".to_string().into_bytes();
let mut test_data1 = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick".to_string().into_bytes();
let mut test_data2 = "\tlurchi\n|\n".to_string().into_bytes();
let expected = vec![PsycParserResult::InsufficientData,
PsycParserResult::RoutingModifier {
operator: ':',
name: &test_data[1 .. 8],
value: &test_data[9 .. 36]
},
PsycParserResult::InsufficientData,
PsycParserResult::EntityModifier{
operator: ':',
name: &test_data[39 .. 44],
value: &test_data[45 .. 51],
},
PsycParserResult::Complete];
PsycParserResult::RoutingModifier {
operator: ':',
name: "_target".as_bytes(),
value: "psyc://ve.symlynx.com/@blog".as_bytes()
},
PsycParserResult::InsufficientData,
PsycParserResult::EntityModifier{
operator: ':',
name: "_nick".as_bytes(),
value: "lurchi".as_bytes(),
},
PsycParserResult::Complete];
let mut parser = PsycParser::new();
parser.set_buffer(&test_data[.. 1]);
parser.set_buffer(&test_data1[.. 1]);
assert_eq!(parser.parse().unwrap(), expected[0]);
parser.set_buffer(&test_data[.. 46]);
let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1);
assert_eq!(unparsed_length, 1);
parser.set_buffer(&test_data1[.. 44]);
assert_eq!(parser.parse().unwrap(), expected[1]);
parser.set_buffer(&test_data[.. 49]);
assert_eq!(parser.parse().unwrap(), expected[2]);
parser.set_buffer(&test_data);
let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1);
test_data1.resize(unparsed_length, 0);
test_data1.append(&mut test_data2);
parser.set_buffer(&test_data1);
assert_eq!(parser.parse().unwrap(), expected[3]);
assert_eq!(parser.parse().unwrap(), expected[4]);