improve parser: incomplete data is handled internally

This commit is contained in:
lurchi 2016-08-29 01:00:31 +02:00
parent a0abf99f72
commit 2003e77ae3
3 changed files with 189 additions and 134 deletions

View File

@ -7,14 +7,14 @@ use std::os::raw::c_char;
extern "C" { extern "C" {
fn psyc_parse_state_init(state: *mut PsycParseState, flags: u8); fn psyc_parse_state_init(state: *mut PsycParseState, flags: u8);
fn psyc_parse_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); fn psyc_parse_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize);
fn psyc_parse_list_state_init(state: *mut PsycParseState); fn psyc_parse_list_state_init(state: *mut PsycParseListState);
fn psyc_parse_list_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); fn psyc_parse_list_buffer_set(state: *mut PsycParseListState, buffer: *const c_char, length: usize);
fn psyc_parse_dict_state_init(state: *mut PsycParseState); fn psyc_parse_dict_state_init(state: *mut PsycParseDictState);
fn psyc_parse_dict_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); fn psyc_parse_dict_buffer_set(state: *mut PsycParseDictState, buffer: *const c_char, length: usize);
fn psyc_parse_index_state_init(state: *mut PsycParseState); fn psyc_parse_index_state_init(state: *mut PsycParseIndexState);
fn psyc_parse_index_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); fn psyc_parse_index_buffer_set(state: *mut PsycParseIndexState, buffer: *const c_char, length: usize);
fn psyc_parse_update_state_init(state: *mut PsycParseState); fn psyc_parse_update_state_init(state: *mut PsycParseUpdateState);
fn psyc_parse_update_buffer_set(state: *mut PsycParseState, buffer: *const c_char, length: usize); fn psyc_parse_update_buffer_set(state: *mut PsycParseUpdateState, buffer: *const c_char, length: usize);
fn psyc_parse_content_length(state: *mut PsycParseState) -> usize; fn psyc_parse_content_length(state: *mut PsycParseState) -> usize;
fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool; fn psyc_parse_content_length_found(state: *mut PsycParseState) -> bool;
fn psyc_parse_value_length(state: *mut PsycParseState) -> usize; fn psyc_parse_value_length(state: *mut PsycParseState) -> usize;
@ -60,61 +60,66 @@ extern "C" {
fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize; fn psyc_parse_keyword(data: *const c_char, len: usize) -> usize;
} }
pub struct PsycParser { pub struct PsycParser<'a> {
state: PsycParseState state: PsycParseState,
operator: char,
name: Option<&'a [u8]>,
buffer: Option<&'a [u8]>,
cursor: usize
} }
pub struct PsycListParser { //pub struct PsycListParser<'a> {
state: PsycParseListState // state: PsycParseListState,
} // parsed_list: Vec<Vec<u8>>,
// buffer: &'a [u8]
pub struct PsycDictParser { //}
state: PsycParseDictState //
} //pub struct PsycDictParser<'a> {
// state: PsycParseDictState,
pub struct PsycIndexParser { // parsed_dict: Vec<(Vec<u8>, Vec<u8>)>,
state: PsycParseIndexState // buffer: &'a [u8]
} //}
//
pub struct PsycUpdateParser { //// TODO: What data structures does the index parser need?
state: PsycParseUpdateState //pub struct PsycIndexParser {
} // state: PsycParseIndexState
//}
//
//// TODO: what data structures does the update parser need?
//pub struct PsycUpdateParser {
// state: PsycParseUpdateState
//}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum PsycParserResult { pub enum PsycParserResult<'a> {
StateSync, StateSync,
StateReset, StateReset,
ParsingComplete, Complete,
InsufficientData,
RoutingModifier { RoutingModifier {
operator: char, operator: char,
name: String, name: &'a [u8],
value: Vec<u8> value: &'a [u8]
}, },
EntityModifier { EntityModifier {
operator: char, operator: char,
name: String, name: &'a [u8],
value: Vec<u8> value: &'a [u8]
},
IncompleteEntityModifier {
operator: char,
name: String,
value: Vec<u8>,
cursor: usize
}, },
Body { Body {
name: String, name: &'a [u8],
value: Vec<u8> value: &'a [u8]
},
IncompleteBody {
name: String,
value: Vec<u8>,
cursor: usize
},
InsufficientData {
cursor: usize
}, },
} }
//#[derive(Debug, PartialEq)]
//pub enum PsycDictParserResult {
// InsufficientData,
// Dict {
// data: Vec<(&'a [u8], &'a[u8])>
// }
//}
#[repr(C)] #[repr(C)]
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum PsycParserError { pub enum PsycParserError {
@ -130,7 +135,7 @@ pub enum PsycParserError {
GenericError = PsycParseRC::PSYC_PARSE_ERROR as _, GenericError = PsycParseRC::PSYC_PARSE_ERROR as _,
} }
impl PsycParser { impl<'a> PsycParser<'a> {
/// Create a PsycParser /// Create a PsycParser
pub fn new() -> Self { pub fn new() -> Self {
let mut state: PsycParseState; let mut state: PsycParseState;
@ -139,96 +144,41 @@ impl PsycParser {
let state_ptr = &mut state as *mut PsycParseState; let state_ptr = &mut state as *mut PsycParseState;
psyc_parse_state_init(state_ptr, PsycParseFlag::PSYC_PARSE_ALL as u8) psyc_parse_state_init(state_ptr, PsycParseFlag::PSYC_PARSE_ALL as u8)
} }
PsycParser{state: state} PsycParser{
state: state,
operator: '\0',
name: None,
buffer: None,
cursor: 0
}
} }
/// Set a buffer of raw bytes for parsing /// Set a buffer of raw bytes for parsing
pub fn set_buffer(&mut self, buffer: &[u8]) { pub fn set_buffer(&mut self, buffer: &'a [u8]) {
self.buffer = Some(buffer);
let state_ptr = &mut self.state as *mut PsycParseState; let state_ptr = &mut self.state as *mut PsycParseState;
let buffer_ptr = &buffer[0] as *const u8 as *const c_char; let buffer_ptr = &buffer[self.cursor] as *const u8 as *const c_char;
unsafe { unsafe {
psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len()) psyc_parse_buffer_set(state_ptr, buffer_ptr, buffer.len() - self.cursor)
} }
} }
/// Parse the buffer previously set by set_buffer. Call repeatedly until the /// Parse the buffer previously set by set_buffer. Call repeatedly until the
/// result is PsycParserResult::ParsingComplete or a PsycParserError. /// result is PsycParserResult::Complete or a PsycParserError.
pub fn parse(&mut self) -> Result<PsycParserResult, PsycParserError> { pub fn parse(&mut self)
-> Result<PsycParserResult, PsycParserError> {
let state_ptr = &mut self.state as *mut PsycParseState; let state_ptr = &mut self.state as *mut PsycParseState;
let mut operator: char;
let mut name: PsycString; let mut name: PsycString;
let mut value: PsycString; let mut value: PsycString;
unsafe { unsafe {
operator = mem::uninitialized();
name = mem::uninitialized(); name = mem::uninitialized();
value = mem::uninitialized(); value = mem::uninitialized();
let operator_ptr = &mut operator as *mut char as *mut c_char; let operator_ptr = &mut self.operator as *mut char as *mut c_char;
let name_ptr = &mut name as *mut PsycString; let name_ptr = &mut name as *mut PsycString;
let value_ptr = &mut value as *mut PsycString; let value_ptr = &mut value as *mut PsycString;
let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr); let parse_result = psyc_parse(state_ptr, operator_ptr, name_ptr, value_ptr);
self.cursor = self.cursor + psyc_parse_cursor(state_ptr);
match parse_result { match parse_result {
PsycParseRC::PSYC_PARSE_INSUFFICIENT => {
let result =
PsycParserResult::InsufficientData {
cursor: psyc_parse_cursor(state_ptr)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ROUTING => {
let result =
PsycParserResult::RoutingModifier {
operator: operator,
name: Self::cstring_to_string(name.data, name.length),
value: Self::cstring_to_bytes(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY |
PsycParseRC::PSYC_PARSE_ENTITY_END => {
let result =
PsycParserResult::EntityModifier {
operator: operator,
name: Self::cstring_to_string(name.data, name.length),
value: Self::cstring_to_bytes(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_START |
PsycParseRC::PSYC_PARSE_ENTITY_CONT => {
let result =
PsycParserResult::IncompleteEntityModifier {
operator: operator,
name: Self::cstring_to_string(name.data, name.length),
value: Self::cstring_to_bytes(value.data, value.length),
cursor: psyc_parse_cursor(state_ptr)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY |
PsycParseRC::PSYC_PARSE_BODY_END => {
let result =
PsycParserResult::Body {
name: Self::cstring_to_string(name.data, name.length),
value: Self::cstring_to_bytes(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY_START |
PsycParseRC::PSYC_PARSE_BODY_CONT => {
let result =
PsycParserResult::IncompleteBody {
name: Self::cstring_to_string(name.data, name.length),
value: Self::cstring_to_bytes(value.data, value.length),
cursor: psyc_parse_cursor(state_ptr)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_STATE_RESYNC => PsycParseRC::PSYC_PARSE_STATE_RESYNC =>
Ok(PsycParserResult::StateSync), Ok(PsycParserResult::StateSync),
@ -236,19 +186,69 @@ impl PsycParser {
Ok(PsycParserResult::StateReset), Ok(PsycParserResult::StateReset),
PsycParseRC::PSYC_PARSE_COMPLETE => PsycParseRC::PSYC_PARSE_COMPLETE =>
Ok(PsycParserResult::ParsingComplete), Ok(PsycParserResult::Complete),
PsycParseRC::PSYC_PARSE_ROUTING => {
let result = PsycParserResult::RoutingModifier {
operator: self.operator,
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY => {
let result = PsycParserResult::EntityModifier {
operator: self.operator,
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY => {
let result = PsycParserResult::Body {
name: Self::cstring_to_slice(name.data, name.length),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_ENTITY_START |
PsycParseRC::PSYC_PARSE_BODY_START => {
self.name = Some(Self::cstring_to_slice(name.data, name.length));
Ok(PsycParserResult::InsufficientData)
},
PsycParseRC::PSYC_PARSE_ENTITY_END => {
let result = PsycParserResult::EntityModifier {
operator: self.operator,
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_BODY_END => {
let result = PsycParserResult::Body {
name: self.name.unwrap(),
value: Self::cstring_to_slice(value.data, value.length)
};
Ok(result)
},
PsycParseRC::PSYC_PARSE_INSUFFICIENT |
PsycParseRC::PSYC_PARSE_ENTITY_CONT |
PsycParseRC::PSYC_PARSE_BODY_CONT => {
Ok(PsycParserResult::InsufficientData)
},
_error => Err(mem::transmute(_error)), _error => Err(mem::transmute(_error)),
} }
} }
} }
unsafe fn cstring_to_string(cstring: *const c_char, length: usize) -> String { unsafe fn cstring_to_slice(cstring: *const c_char, length: usize) -> &'a [u8] {
let vec = Self::cstring_to_bytes(cstring, length); slice::from_raw_parts(cstring as *const u8, length)
String::from_utf8(vec).unwrap()
}
unsafe fn cstring_to_bytes(cstring: *const c_char, length: usize) -> Vec<u8> {
slice::from_raw_parts(cstring as *const u8, length).to_vec()
} }
} }

View File

@ -79,6 +79,7 @@ pub enum PsycParseFlag {
PSYC_PARSE_START_AT_CONTENT = 2, PSYC_PARSE_START_AT_CONTENT = 2,
} }
#[derive(Debug)]
#[repr(C)] #[repr(C)]
pub enum PsycParseRC { pub enum PsycParseRC {
/// Error, no length is set for a modifier which is longer than PSYC_MODIFIER_SIZE_THRESHOLD. /// Error, no length is set for a modifier which is longer than PSYC_MODIFIER_SIZE_THRESHOLD.

View File

@ -1,5 +1,4 @@
extern crate psyc; extern crate psyc;
use psyc::parser::*; use psyc::parser::*;
#[test] #[test]
@ -9,18 +8,73 @@ fn test_parse() {
let expected1 = let expected1 =
PsycParserResult::RoutingModifier{ PsycParserResult::RoutingModifier{
operator: ':', operator: ':',
name: "_target".to_string(), name: &test_data[1 .. 8],
value: "psyc://ve.symlynx.com/@blog".to_string().into_bytes() value: &test_data[9 .. 36],
}; };
let expected2 = PsycParserResult::StateSync; let expected2 = PsycParserResult::StateSync;
let mut parser = PsycParser::new(); let mut parser = PsycParser::new();
parser.set_buffer(&test_data); parser.set_buffer(&test_data);
let result1 = parser.parse(); {
let result2 = parser.parse(); let result1 = parser.parse();
assert_eq!(result1, Ok(expected1));
}
assert_eq!(result1, Ok(expected1)); {
assert_eq!(result2, Ok(expected2)); let result2 = parser.parse();
assert_eq!(result2, Ok(expected2));
}
}
#[test]
fn test_insufficient() {
let test_data = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_nick\tlurchi\n|\n".to_string().into_bytes();
let expected1 = PsycParserResult::InsufficientData;
let expected2 =PsycParserResult::RoutingModifier {
operator: ':',
name: &test_data[1 .. 8],
value: &test_data[9 .. 36]
};
let expected3 = PsycParserResult::InsufficientData;
let expected4 = PsycParserResult::EntityModifier{
operator: ':',
name: &test_data[39 .. 44],
value: &test_data[45 .. 51],
};
let expected5 = PsycParserResult::Complete;
let mut parser = PsycParser::new();
{
parser.set_buffer(&test_data[.. 1]);
let result1 = parser.parse();
assert_eq!(result1, Ok(expected1));
}
{
parser.set_buffer(&test_data[.. 46]);
let result2 = parser.parse();
assert_eq!(result2, Ok(expected2));
}
{
parser.set_buffer(&test_data[.. 49]);
let result3 = parser.parse();
assert_eq!(result3, Ok(expected3));
}
{
parser.set_buffer(&test_data);
let result4 = parser.parse();
assert_eq!(result4, Ok(expected4));
}
{
let result5 = parser.parse();
assert_eq!(result5, Ok(expected5));
}
} }