1
0
Fork 0
mirror of git://git.psyc.eu/libpsyc synced 2024-08-15 03:19:02 +00:00

change PsycParser API to make it possible to parse large packets and process them partially; prepare PsycDictParser

This commit is contained in:
lurchi 2016-08-31 19:14:41 +02:00
parent ec261f3e20
commit d12047ee18
5 changed files with 300 additions and 97 deletions

View file

@ -1,5 +1,6 @@
#![allow(dead_code)] #![allow(dead_code)]
mod types; mod types;
mod util;
pub mod parser_types; pub mod parser_types;
pub mod packet_types; pub mod packet_types;
pub mod parser; pub mod parser;

View file

@ -1,7 +1,7 @@
use types::*; use types::*;
use parser_types::*; use parser_types::*;
use util;
use std::mem; use std::mem;
use std::slice;
use std::os::raw::c_char; use std::os::raw::c_char;
extern "C" { extern "C" {
@ -19,10 +19,10 @@ extern "C" {
fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool; fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool;
fn psyc_parse_value_length(state: *mut PsycParseState) -> usize; fn psyc_parse_value_length(state: *mut PsycParseState) -> usize;
fn psyc_parse_value_length_found(state: *mut PsycParseState) -> bool; fn psyc_parse_value_length_found(state: *mut PsycParseState) -> bool;
fn psyc_parse_cursor(state: *mut PsycParseState) -> usize; fn psyc_parse_cursor(state: *const PsycParseState) -> usize;
fn psyc_parse_buffer_length(state: *mut PsycParseState) -> usize; fn psyc_parse_buffer_length(state: *const PsycParseState) -> usize;
fn psyc_parse_remaining_length(state: *mut PsycParseState) -> usize; fn psyc_parse_remaining_length(state: *const PsycParseState) -> usize;
fn psyc_parse_remaining_buffer(state: *mut PsycParseState) -> *const c_char; fn psyc_parse_remaining_buffer(state: *const PsycParseState) -> *const c_char;
fn psyc_parse(state: *mut PsycParseState, fn psyc_parse(state: *mut PsycParseState,
oper: *mut c_char, oper: *mut c_char,
name: *mut PsycString, name: *mut PsycString,
@ -60,12 +60,8 @@ extern "C" {
fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize; fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize;
} }
pub struct PsycParser<'a> { pub struct PsycParser {
state: PsycParseState, state: PsycParseState
operator: char,
name: Option<&'a [u8]>,
buffer: Option<&'a [u8]>,
cursor: usize
} }
//pub struct PsycListParser<'a> { //pub struct PsycListParser<'a> {
@ -74,11 +70,13 @@ pub struct PsycParser<'a> {
// buffer: &'a [u8] // buffer: &'a [u8]
//} //}
// //
//pub struct PsycDictParser<'a> { pub struct PsycDictParser<'a> {
// state: PsycParseDictState, state: PsycParseDictState,
// parsed_dict: Vec<(Vec<u8>, Vec<u8>)>, parsed_key: Option<&'a [u8]>,
// buffer: &'a [u8] parsed_dict: Vec<(&'a [u8], &'a [u8])>,
//} buffer: Option<&'a [u8]>,
cursor: usize
}
// //
//// TODO: What data structures does the index parser need? //// TODO: What data structures does the index parser need?
//pub struct PsycIndexParser { //pub struct PsycIndexParser {
@ -106,19 +104,64 @@ pub enum PsycParserResult<'a> {
name: &'a [u8], name: &'a [u8],
value: &'a [u8] value: &'a [u8]
}, },
EntityModifierStart {
operator: char,
name: &'a [u8],
value_part: &'a [u8]
},
EntityModifierCont {
value_part: &'a [u8]
},
EntityModifierEnd,
Body { Body {
name: &'a [u8], name: &'a [u8],
value: &'a [u8] value: &'a [u8]
}, },
BodyStart {
name: &'a [u8],
value_part: &'a [u8]
},
BodyCont {
value_part: &'a [u8]
},
BodyEnd
} }
//#[derive(Debug, PartialEq)] //#[derive(Debug, PartialEq)]
//pub enum PsycDictParserResult { //pub enum PsycListParserResult {
// Complete,
// InsufficientData, // InsufficientData,
// Dict { // ListElement {
// data: Vec<(&'a [u8], &'a[u8])> // value: &[u8]
// },
// ListElementStart {
// value_part: &[u8]
// },
// ListElementCont {
// value_part: &[u8]
// },
// ListElementEnd {
// value_part: &[u8]
// } // }
//} //}
//
//#[derive(Debug, PartialEq)]
//pub enum PsycDictParserResult<'a> {
// Complete,
// InsufficientData,
// DictEntry {
// key: &'a [u8],
// value: &'a [u8]
// },
// DictEntryStart {
// key: &'a [u8],
// value_part: &'a [u8]
// },
// DictEntryCont {
// value_part: &'a [u8]
// },
// DictEntryEnd
//}
#[repr(C)] #[repr(C)]
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -135,7 +178,32 @@ pub enum PsycParserError {
GenericError = PsycParseRC::PSYC_PARSE_ERROR as _, GenericError = PsycParseRC::PSYC_PARSE_ERROR as _,
} }
impl<'a> PsycParser<'a> { #[repr(C)]
#[derive(Debug, PartialEq)]
pub enum PsycListParserError {
NoElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_NO_LEN as _,
InvalidElementLength = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_LENGTH as _,
InvalidElementType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_TYPE as _,
InvalidElementStart = PsycParseListRC::PSYC_PARSE_LIST_ERROR_ELEM_START as _,
InvalidType = PsycParseListRC::PSYC_PARSE_LIST_ERROR_TYPE as _,
GenericError = PsycParseListRC::PSYC_PARSE_LIST_ERROR as _,
}
#[repr(C)]
#[derive(Debug, PartialEq)]
pub enum PsycDictParserError {
InvalidValue = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE as _,
InvalidValueLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_LENGTH as _,
InvalidValueType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_TYPE as _,
InvalidValueStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_VALUE_START as _,
InvalidKey = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY as _,
InvalidKeyLength = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_LENGTH as _,
InvalidKeyStart = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_KEY_START as _,
InvalidKeyType = PsycParseDictRC::PSYC_PARSE_DICT_ERROR_TYPE as _,
GenericError = PsycParseDictRC::PSYC_PARSE_DICT_ERROR as _,
}
impl PsycParser {
/// Create a PsycParser /// Create a PsycParser
pub fn new() -> Self { pub fn new() -> Self {
let mut state: PsycParseState; let mut state: PsycParseState;
@ -146,38 +214,24 @@ impl<'a> PsycParser<'a> {
} }
PsycParser { PsycParser {
state: state, state: state,
operator: '\0',
name: None,
buffer: None,
cursor: 0
}
}
/// Set a buffer of raw bytes for parsing
pub fn set_buffer(&mut self, buffer: &'a [u8]) {
self.buffer = Some(buffer);
let state_ptr = &mut self.state as *mut PsycParseState;
let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char;
unsafe {
psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len() - self.cursor)
} }
} }
/// Parse the buffer previously set by set_buffer. Call repeatedly until the /// Parse the buffer previously set by set_buffer. Call repeatedly until the
/// result is PsycParserResult::Complete or a PsycParserError. /// result is PsycParserResult::Complete or a PsycParserError.
pub fn parse(&mut self) pub fn parse<'a>(&mut self)
-> Result<PsycParserResult<'a>, PsycParserError> { -> Result<PsycParserResult<'a>, PsycParserError> {
let state_ptr = &mut self.state as *mut PsycParseState; let state_ptr = &mut self.state as *mut PsycParseState;
let mut operator = '\0';
let mut name: PsycString; let mut name: PsycString;
let mut value: PsycString; let mut value: PsycString;
unsafe { unsafe {
name = mem::uninitialized(); name = mem::uninitialized();
value = mem::uninitialized(); value = mem::uninitialized();
let operator_ptr = &mut self.operator as *mut char as *mut c_char; let operator_ptr = &mut operator as *mut char as *mut c_char;
let name_ptr = &mut name as *mut PsycString; let name_ptr = &mut name as *mut PsycString;
let value_ptr = &mut value as *mut PsycString; let value_ptr = &mut value as *mut PsycString;
let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr); let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr);
self.cursor = self.cursor + psyc_parse_cursor(state_ptr);
match parse_result { match parse_result {
PsycParseRC::PSYC_PARSE_STATE_RESYNC => PsycParseRC::PSYC_PARSE_STATE_RESYNC =>
Ok(PsycParserResult::StateSync), Ok(PsycParserResult::StateSync),
@ -188,67 +242,203 @@ impl<'a> PsycParser<'a> {
PsycParseRC::PSYC_PARSE_COMPLETE => PsycParseRC::PSYC_PARSE_COMPLETE =>
Ok(PsycParserResult::Complete), Ok(PsycParserResult::Complete),
PsycParseRC::PSYC_PARSE_INSUFFICIENT =>
Ok(PsycParserResult::InsufficientData),
PsycParseRC::PSYC_PARSE_ROUTING => { PsycParseRC::PSYC_PARSE_ROUTING => {
let result = PsycParserResult::RoutingModifier { let result = PsycParserResult::RoutingModifier {
operator: self.operator, operator: operator,
name: Self::cstring_to_slice(name.data, name.length), name: util::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length) value: util::cstring_to_slice(value.data, value.length)
}; };
Ok(result) Ok(result)
}, },
PsycParseRC::PSYC_PARSE_ENTITY => { PsycParseRC::PSYC_PARSE_ENTITY => {
let result = PsycParserResult::EntityModifier { let result = PsycParserResult::EntityModifier {
operator: self.operator, operator: operator,
name: Self::cstring_to_slice(name.data, name.length), name: util::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length) value: util::cstring_to_slice(value.data, value.length)
}; };
Ok(result) Ok(result)
}, },
PsycParseRC::PSYC_PARSE_ENTITY_START => {
let result = PsycParserResult::EntityModifierStart {
operator: operator,
name: util::cstring_to_slice(name.data, name.length),
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_CONT => {
let result = PsycParserResult::EntityModifierCont {
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_END =>
Ok(PsycParserResult::EntityModifierEnd),
PsycParseRC::PSYC_PARSE_BODY => { PsycParseRC::PSYC_PARSE_BODY => {
let result = PsycParserResult::Body { let result = PsycParserResult::Body {
name: Self::cstring_to_slice(name.data, name.length), name: util::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length) value: util::cstring_to_slice(value.data, value.length)
}; };
Ok(result) Ok(result)
}, },
PsycParseRC::PSYC_PARSE_ENTITY_START |
PsycParseRC::PSYC_PARSE_BODY_START => { PsycParseRC::PSYC_PARSE_BODY_START => {
self.name = Some(Self::cstring_to_slice(name.data, name.length)); let result = PsycParserResult::BodyStart {
Ok(PsycParserResult::InsufficientData) name: util::cstring_to_slice(name.data, name.length),
}, value_part: util::cstring_to_slice(value.data, value.length)
PsycParseRC::PSYC_PARSE_ENTITY_END => {
let result = PsycParserResult::EntityModifier {
operator: self.operator,
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
}; };
Ok(result) Ok(result)
}, },
PsycParseRC::PSYC_PARSE_BODY_END => {
let result = PsycParserResult::Body {
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_INSUFFICIENT |
PsycParseRC::PSYC_PARSE_ENTITY_CONT |
PsycParseRC::PSYC_PARSE_BODY_CONT => { PsycParseRC::PSYC_PARSE_BODY_CONT => {
Ok(PsycParserResult::InsufficientData) let result = PsycParserResult::BodyCont {
value_part: util::cstring_to_slice(value.data, value.length)
};
Ok(result)
}, },
PsycParseRC::PSYC_PARSE_BODY_END =>
Ok(PsycParserResult::BodyEnd),
_error => Err(mem::transmute(_error)), _error => Err(mem::transmute(_error)),
} }
} }
} }
}
unsafe fn cstring_to_slice(cstring: *const c_char, length: usize) -> &'a [u8] { impl Parser for PsycParser {
slice::from_raw_parts(cstring as *const u8, length) fn set_buffer(&mut self, buffer: &[u8]) {
let state_ptr = &mut self.state as *mut PsycParseState;
let buffer_ptr = buffer.as_ptr() as *const c_char;
unsafe {
psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len())
} }
} }
fn unparsed_position(&self) -> usize {
unsafe {
psyc_parse_cursor(&self.state as *const PsycParseState)
}
}
fn unparsed_length(&self) -> usize {
unsafe {
psyc_parse_remaining_length(&self.state as *const PsycParseState)
}
}
}
//impl<'a> PsycDictParser<'a> {
// /// Create a PsycDictParser
// pub fn new() -> Self {
// let mut state: PsycParseDictState;
// unsafe {
// state = mem::uninitialized();
// let state_ptr = &mut state as *mut PsycParseDictState;
// psyc_parse_dict_state_init(state_ptr)
// }
// PsycDictParser {
// state: state,
// parsed_key: None,
// parsed_dict: vec![],
// buffer: None,
// cursor: 0
// }
// }
//
// /// Set a buffer of raw bytes for parsing
// pub fn set_buffer(&mut self, buffer: &'a [u8]) {
// self.buffer = Some(buffer);
// let state_ptr = &mut self.state as *mut PsycParseDictState;
// let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char;
// unsafe {
// psyc_parse_dict_buffer_set(state_ptr, buffer_ptr, buffer.len())
// }
// }
//
// /// Parse the buffer previously set by set_buffer. Call repeatedly until the
// /// result is PsycDictParserResult::Complete or a PsycDictParserError
// pub fn parse(&mut self)
// -> Result<PsycDictParserResult<'a>, PsycDictParserError> {
// let state_ptr = &mut self.state as *mut PsycParseDictState;
// let mut list_type: PsycString;
// let mut element: PsycString;
// unsafe {
// list_type = mem::uninitialized();
// element = mem::uninitialized();
// let list_type_ptr = &mut list_type as *mut PsycString;
// let element_ptr = &mut element as *mut PsycString;
// loop {
// let parse_result = psyc_parse_dict(state_ptr, list_type_ptr, element_ptr);
// self.cursor = self.cursor + self.state.cursor;
// println!("parse_result: {:?}", parse_result);
// match parse_result {
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_END |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY => {
// let key = util::cstring_to_slice(element.data, element.length);
// self.parsed_key = Some(key)
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_END |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE => {
// let value = util::cstring_to_slice(element.data, element.length);
// self.parsed_dict.push((self.parsed_key.unwrap(), value))
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_LAST => {
// let value = util::cstring_to_slice(element.data, element.length);
// self.parsed_dict.push((self.parsed_key.unwrap(), value));
// let complete = PsycDictParserResult::Complete {
// dict: self.parsed_dict.clone()
// };
// return Ok(complete);
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_INSUFFICIENT |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_START |
// PsycParseDictRC::PSYC_PARSE_DICT_KEY_CONT |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_START |
// PsycParseDictRC::PSYC_PARSE_DICT_VALUE_CONT |
// PsycParseDictRC::PSYC_PARSE_DICT_END /* FIXME: correct? */=> {
// return Ok(PsycDictParserResult::InsufficientData);
// },
//
// PsycParseDictRC::PSYC_PARSE_DICT_TYPE => (),
//
// _error => {
// return Err(mem::transmute(_error));
// },
// }
// }
// }
// }
//}
pub trait Parser {
/// Set a buffer of raw bytes for parsing
fn set_buffer(&mut self, buffer: &[u8]);
/// copies the remaining unparsed bytes to the beginning of the given buffer.
/// Returns the number of copied bytes. Must be called when parse() returned
/// InsufficientData as Result.
fn copy_unparsed_into_buffer(&self, buffer: &mut [u8]) -> usize {
let unparsed_pos = self.unparsed_position();
let unparsed_len = self.unparsed_length();
if unparsed_pos != 0 {
let copy_pos_second = unparsed_pos - unparsed_len;
let (part1, part2) = buffer.split_at_mut(unparsed_len);
part1.copy_from_slice(&part2[copy_pos_second .. copy_pos_second + unparsed_len]);
}
unparsed_len
}
fn unparsed_position(&self) -> usize;
fn unparsed_length(&self) -> usize;
}

View file

@ -26,7 +26,7 @@ pub struct PsycParseState {
#[repr(C)] #[repr(C)]
pub struct PsycParseListState { pub struct PsycParseListState {
buffer: PsycString, buffer: PsycString,
cursor: usize, pub cursor: usize,
startc: usize, startc: usize,
list_type: PsycString, list_type: PsycString,
elemlen: usize, elemlen: usize,
@ -38,7 +38,7 @@ pub struct PsycParseListState {
#[repr(C)] #[repr(C)]
pub struct PsycParseDictState { pub struct PsycParseDictState {
buffer: PsycString, buffer: PsycString,
cursor: usize, pub cursor: usize,
startc: usize, startc: usize,
elemlen: usize, elemlen: usize,
elem_parsed: usize, elem_parsed: usize,
@ -49,7 +49,7 @@ pub struct PsycParseDictState {
#[repr(C)] #[repr(C)]
pub struct PsycParseIndexState { pub struct PsycParseIndexState {
buffer: PsycString, buffer: PsycString,
cursor: usize, pub cursor: usize,
startc: usize, startc: usize,
elemlen: usize, elemlen: usize,
elem_parsed: usize, elem_parsed: usize,
@ -60,7 +60,7 @@ pub struct PsycParseIndexState {
#[repr(C)] #[repr(C)]
pub struct PsycParseUpdateState { pub struct PsycParseUpdateState {
buffer: PsycString, buffer: PsycString,
cursor: usize, pub cursor: usize,
startc: usize, startc: usize,
elemlen: usize, elemlen: usize,
elem_parsed: usize, elem_parsed: usize,
@ -181,6 +181,7 @@ pub enum PsycParseListRC {
PSYC_PARSE_LIST_END = 8, PSYC_PARSE_LIST_END = 8,
} }
#[derive(Debug)]
#[repr(C)] #[repr(C)]
pub enum PsycParseDictRC { pub enum PsycParseDictRC {
PSYC_PARSE_DICT_ERROR_VALUE = -9, PSYC_PARSE_DICT_ERROR_VALUE = -9,

6
rust/src/util.rs Normal file
View file

@ -0,0 +1,6 @@
use std::slice;
use std::os::raw::c_char;
pub unsafe fn cstring_to_slice<'a>(cstring: *const c_char, length: usize) -> &'a[u8] {
slice::from_raw_parts(cstring as *const u8, length)
}

View file

@ -7,8 +7,8 @@ fn test_parse() {
let expected = vec![PsycParserResult::RoutingModifier{ let expected = vec![PsycParserResult::RoutingModifier{
operator: ':', operator: ':',
name: &test_data[1 .. 8], name: "_target".as_bytes(),
value: &test_data[9 .. 36], value: "psyc://ve.symlynx.com/@blog".as_bytes(),
}, },
PsycParserResult::StateSync]; PsycParserResult::StateSync];
@ -23,34 +23,39 @@ fn test_parse() {
#[test] #[test]
fn test_insufficient() { fn test_insufficient() {
let test_data = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick\tlurchi\n|\n".to_string().into_bytes(); let mut test_data1 = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick".to_string().into_bytes();
let mut test_data2 = "\tlurchi\n|\n".to_string().into_bytes();
let expected = vec![PsycParserResult::InsufficientData, let expected = vec![PsycParserResult::InsufficientData,
PsycParserResult::RoutingModifier { PsycParserResult::RoutingModifier {
operator: ':', operator: ':',
name: &test_data[1 .. 8], name: "_target".as_bytes(),
value: &test_data[9 .. 36] value: "psyc://ve.symlynx.com/@blog".as_bytes()
}, },
PsycParserResult::InsufficientData, PsycParserResult::InsufficientData,
PsycParserResult::EntityModifier{ PsycParserResult::EntityModifier{
operator: ':', operator: ':',
name: &test_data[39 .. 44], name: "_nick".as_bytes(),
value: &test_data[45 .. 51], value: "lurchi".as_bytes(),
}, },
PsycParserResult::Complete]; PsycParserResult::Complete];
let mut parser = PsycParser::new(); let mut parser = PsycParser::new();
parser.set_buffer(&test_data[.. 1]); parser.set_buffer(&test_data1[.. 1]);
assert_eq!(parser.parse().unwrap(), expected[0]); assert_eq!(parser.parse().unwrap(), expected[0]);
parser.set_buffer(&test_data[.. 46]); let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1);
assert_eq!(unparsed_length, 1);
parser.set_buffer(&test_data1[.. 44]);
assert_eq!(parser.parse().unwrap(), expected[1]); assert_eq!(parser.parse().unwrap(), expected[1]);
parser.set_buffer(&test_data[.. 49]);
assert_eq!(parser.parse().unwrap(), expected[2]); assert_eq!(parser.parse().unwrap(), expected[2]);
parser.set_buffer(&test_data); let unparsed_length = parser.copy_unparsed_into_buffer(&mut test_data1);
test_data1.resize(unparsed_length, 0);
test_data1.append(&mut test_data2);
parser.set_buffer(&test_data1);
assert_eq!(parser.parse().unwrap(), expected[3]); assert_eq!(parser.parse().unwrap(), expected[3]);
assert_eq!(parser.parse().unwrap(), expected[4]); assert_eq!(parser.parse().unwrap(), expected[4]);