From e41822ed2b4c278ad258e1cf4381bc149cb27cc7 Mon Sep 17 00:00:00 2001 From: "tg(x)" <*@tg-x.net> Date: Tue, 19 Apr 2011 19:41:25 +0200 Subject: [PATCH] list parsing --- include/psyc/parser.h | 75 ++++++++--- src/parser.c | 187 ++++++++++++++++++++-------- src/tests/packets/test-2-list | 2 +- src/tests/packets/test-2-list-error | 26 ++++ src/tests/packets/test-3-list | 2 +- src/tests/testParser.c | 49 +++++--- 6 files changed, 259 insertions(+), 82 deletions(-) create mode 100644 src/tests/packets/test-2-list-error diff --git a/include/psyc/parser.h b/include/psyc/parser.h index e599bb7..c5d13be 100644 --- a/include/psyc/parser.h +++ b/include/psyc/parser.h @@ -28,27 +28,43 @@ enum PSYC_ReturnCodes PSYC_INCOMPLETE = 9, }; +enum PSYC_ListReturnCodes +{ + PSYC_ERROR_LIST_DELIM = -5, + PSYC_ERROR_LIST_LEN = -4, + PSYC_ERROR_LIST_TYPE = -3, + PSYC_ERROR_LIST_NAME = -2, + PSYC_ERROR_LIST= -1, + PSYC_LIST_ELEM = 1, + PSYC_LIST_END = 2, + PSYC_LIST_INCOMPLETE = 3, +}; + enum PSYC_Parts { PSYC_PART_RESET = -1, PSYC_PART_HEADER = 0, PSYC_PART_LENGTH, - PSYC_PART_HEADER_END, PSYC_PART_CONTENT, PSYC_PART_METHOD, PSYC_PART_DATA, PSYC_PART_END, }; +enum PSYC_ListTypes +{ + PSYC_LIST_TEXT = 1, + PSYC_LIST_BINARY = 2, +}; -typedef struct +typedef struct { size_t length; const uint8_t * ptr; } PSYC_Array; -typedef struct +typedef struct { size_t cursor; /** current position in buffer */ size_t startc; /** line start position */ @@ -59,45 +75,72 @@ typedef struct size_t contentParsed; /** number of bytes parsed from the content so far */ size_t contentLength; /** expected length of the content */ char contentLengthFound; /** is there a length given for this packet? */ - size_t valueParsed; /** number of bytes parsef from the value so far */ + size_t valueParsed; /** number of bytes parsed from the value so far */ size_t valueLength; /** expected length of the value */ } PSYC_State; +typedef struct +{ + size_t cursor; /** current position in buffer */ + size_t startc; /** line start position */ + PSYC_Array buffer; + char type; /** list type, see PSYC_ListTypes */ + + size_t elemParsed; /** number of bytes parsed from the elem so far */ + size_t elemLength; /** expected length of the elem */ +} PSYC_ListState; + #ifndef PSYC_COMPILE_LIBRARY -/* @brief shortcut for creating an array +/* @brief Shortcut for creating an array. * - * @param memory pointer to the buffer - * @param length length of that buffer + * @param memory Pointer to the buffer. + * @param length Length of that buffer. * - * @returns an instance of the PSYC_Array struct */ + * @return An instance of the PSYC_Array struct. */ inline PSYC_Array PSYC_createArray (uint8_t* const memory, size_t length) { PSYC_Array arr = {length, memory}; - return arr; } -/* @brief initiates the state struct with flags +/* @brief Initiates the state struct with flags. * - * @param state pointer to the state struct that should be initiated - * @param flags the flags that one ones to set, see PSYC_Flags */ + * @param state Pointer to the state struct that should be initiated. + * @param flags The flags that one ones to set, see PSYC_Flags. + */ inline void PSYC_initState2 (PSYC_State* state, uint8_t flags ) { memset(state, 0, sizeof(PSYC_State)); state->flags = flags; } -/* @brief initiates the state struct +/* @brief Initiates the state struct. * - * @param state pointer to the state struct that should be initiated */ + * @param state Pointer to the state struct that should be initiated. + */ inline void PSYC_initState (PSYC_State* state) { memset(state, 0, sizeof(PSYC_State)); } +/* @brief Initiates the list state struct. + * + * @param state Pointer to the list state struct that should be initiated. + */ +inline void PSYC_initListState (PSYC_ListState* state) +{ + memset(state, 0, sizeof(PSYC_ListState)); +} + inline void PSYC_nextBuffer (PSYC_State* state, PSYC_Array newBuf) { - state->buffer = newBuf; + state->buffer = newBuf; + state->cursor = 0; +} + +inline void PSYC_nextListBuffer (PSYC_ListState* state, PSYC_Array newBuf) +{ + state->buffer = newBuf; state->cursor = 0; } @@ -109,3 +152,5 @@ inline size_t PSYC_getContentLength (PSYC_State* s) #endif int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Array* value); + +int PSYC_parseList(PSYC_ListState* state, PSYC_Array *name, PSYC_Array* value, PSYC_Array* elem); diff --git a/src/parser.c b/src/parser.c index ef927b2..0c363c2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -7,11 +7,11 @@ #include -#define ADVANCE_CURSOR_OR_RETURN() \ +#define ADVANCE_CURSOR_OR_RETURN(ret) \ if (++(state->cursor) >= state->buffer.length) \ { \ state->cursor = state->startc; \ - return PSYC_INSUFFICIENT; \ + return ret; \ } /** @brief isGlyph @@ -24,14 +24,14 @@ inline char isGlyph(uint8_t g) { case ':': case '=': - case '?': case '+': case '-': + case '?': + case '!': return 1; default: return 0; } - } inline char isNumeric(uint8_t c) @@ -64,8 +64,8 @@ inline int PSYC_parseName(PSYC_State* state, PSYC_Array* name) while (isKwChar(state->buffer.ptr[state->cursor])) { - name->length++; // was a valid char, increase length - ADVANCE_CURSOR_OR_RETURN(); + name->length++; /* was a valid char, increase length */ + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); } return name->length > 0 ? PSYC_SUCCESS : PSYC_ERROR; @@ -81,11 +81,10 @@ inline int PSYC_parseBinaryValue(PSYC_State* state, PSYC_Array* value, size_t* l size_t remaining = *length - *parsed; value->ptr = state->buffer.ptr + state->cursor; - if (state->cursor + remaining > state->buffer.length) // is the length larger than this buffer? + if (state->cursor + remaining > state->buffer.length) /* is the length larger than this buffer? */ { value->length = state->buffer.length - state->cursor; *parsed += value->length; - state->cursor = 0; return PSYC_INCOMPLETE; } @@ -103,7 +102,7 @@ inline int PSYC_parseBinaryValue(PSYC_State* state, PSYC_Array* value, size_t* l inline int PSYC_parseVar(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Array* value) { *modifier = *(state->buffer.ptr + state->cursor); - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); if (PSYC_parseName(state, name) != PSYC_SUCCESS) return PSYC_ERROR_VAR_NAME; @@ -112,9 +111,9 @@ inline int PSYC_parseVar(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, /* Parse the value. * If we're in the content part check if it's a binary var */ - if (state->part == PSYC_PART_CONTENT && state->buffer.ptr[state->cursor] == ' ') // binary arg - { // after SP the length follows. - ADVANCE_CURSOR_OR_RETURN(); + if (state->part == PSYC_PART_CONTENT && state->buffer.ptr[state->cursor] == ' ') /* binary arg */ + { /* After SP the length follows. */ + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); state->valueLength = 0; state->valueParsed = 0; @@ -123,22 +122,19 @@ inline int PSYC_parseVar(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, do { state->valueLength = 10 * state->valueLength + state->buffer.ptr[state->cursor] - '0'; - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); } while (isNumeric(state->buffer.ptr[state->cursor])); } else return PSYC_ERROR_VAR_LEN; - // after the length a TAB follows + /* After the length a TAB follows. */ if (state->buffer.ptr[state->cursor] != '\t') return PSYC_ERROR_VAR_TAB; - if (state->buffer.length <= ++(state->cursor)) // incremented cursor inside length? - { - state->cursor = 0; + if (state->buffer.length <= ++(state->cursor)) /* Incremented cursor inside length? */ return PSYC_ENTITY_INCOMPLETE; - } if (PSYC_parseBinaryValue(state, value, &(state->valueLength), &(state->valueParsed)) == PSYC_INCOMPLETE) return PSYC_ENTITY_INCOMPLETE; @@ -146,15 +142,15 @@ inline int PSYC_parseVar(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, state->cursor++; return PSYC_SUCCESS; } - else if (state->buffer.ptr[state->cursor] == '\t') // simple arg + else if (state->buffer.ptr[state->cursor] == '\t') /* simple arg */ { - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); value->ptr = state->buffer.ptr + state->cursor; while (state->buffer.ptr[state->cursor] != '\n') { value->length++; - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); } state->cursor++; return PSYC_SUCCESS; @@ -163,45 +159,45 @@ inline int PSYC_parseVar(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, return PSYC_ERROR_VAR_TAB; } -/** @brief generalized line-based parser */ +/** @brief Generalized line-based parser. */ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Array* value) { int ret; /* a return value */ size_t pos; /* a cursor position */ - /* start position of the current line in the buffer - * in case we return insufficent, we rewind to this position */ + /* Start position of the current line in the buffer + * in case we return insufficent, we rewind to this position. */ state->startc = state->cursor; - /* first we test if we can access the first char */ - if (state->buffer.length <= state->cursor) // cursor is not inside the length - return PSYC_INSUFFICIENT; // return insufficient data. + /* First we test if we can access the first char. */ + if (state->cursor >= state->buffer.length) /* cursor is not inside the length */ + return PSYC_INSUFFICIENT; /* return insufficient data. */ switch (state->part) { - case PSYC_PART_RESET: // new packet starts here, reset state + case PSYC_PART_RESET: /* New packet starts here, reset state. */ state->valueParsed = 0; state->valueLength = 0; state->contentParsed = 0; state->contentLength = 0; state->contentLengthFound = 0; state->part = PSYC_PART_HEADER; - // fall thru + /* fall thru */ case PSYC_PART_HEADER: /* Each line of the header starts with a glyph, * i.e. :_name, -_name +_name etc, * so just test if the first char is a glyph. */ - if (isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph? - { // it is a glyph, so a variable starts here + if (isGlyph(state->buffer.ptr[state->cursor])) /* is the first char a glyph? */ + { /* it is a glyph, so a variable starts here */ ret = PSYC_parseVar(state, modifier, name, value); return ret == PSYC_SUCCESS ? PSYC_ROUTING : ret; } - else // not a glyph + else /* not a glyph */ { state->part = PSYC_PART_LENGTH; - // fall thru + /* fall thru */ } case PSYC_PART_LENGTH: @@ -213,7 +209,7 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra do { state->contentLength = 10 * state->contentLength + state->buffer.ptr[state->cursor] - '0'; - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); } while (isNumeric(state->buffer.ptr[state->cursor])); } @@ -226,9 +222,9 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra else state->part = PSYC_PART_CONTENT; } - else /* not start of content, this must be the end */ + else /* Not start of content, this must be the end. */ { - /* if we have a length then it should've been followed by a \n */ + /* If we have a length then it should've been followed by a \n */ if (state->contentLengthFound) return PSYC_ERROR_LENGTH; @@ -236,9 +232,9 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra goto PSYC_PART_END; } - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); state->startc = state->cursor; - // fall thru + /* fall thru */ case PSYC_PART_CONTENT: /* In case of an incomplete binary variable resume parsing it. */ @@ -265,7 +261,7 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra { state->part = PSYC_PART_METHOD; state->startc = state->cursor; - // fall thru + /* fall thru */ } case PSYC_PART_METHOD: @@ -281,9 +277,9 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra state->part = PSYC_PART_DATA; if (state->cursor >= state->buffer.length) return PSYC_INSUFFICIENT; - // fall thru + /* fall thru */ } - else /* no method, which means the packet should end now */ + else /* No method, which means the packet should end now. */ { state->part = PSYC_PART_END; state->startc = state->cursor; @@ -294,7 +290,7 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra value->ptr = state->buffer.ptr + state->cursor; value->length = 0; - if (state->contentLengthFound) // we know the length of the packet + if (state->contentLengthFound) /* We know the length of the packet. */ { if (PSYC_parseBinaryValue(state, value, &(state->contentLength), &(state->contentParsed)) == PSYC_INCOMPLETE) return PSYC_BODY_INCOMPLETE; @@ -303,16 +299,16 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra state->part = PSYC_PART_END; return PSYC_BODY; } - else // search for the terminator + else /* Search for the terminator. */ { while (1) { if (state->buffer.ptr[state->cursor] == '\n') { - if (state->cursor+2 >= state->buffer.length) // incremented cursor inside length? + if (state->cursor+2 >= state->buffer.length) /* incremented cursor inside length? */ { - state->cursor = state->startc; // set to start value - return PSYC_INSUFFICIENT; // return insufficient + state->cursor = state->startc; + return PSYC_INSUFFICIENT; } if (state->buffer.ptr[state->cursor+1] == '|' && @@ -324,16 +320,16 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra } } value->length++; - ADVANCE_CURSOR_OR_RETURN(); + ADVANCE_CURSOR_OR_RETURN(PSYC_INSUFFICIENT); } } case PSYC_PART_END: PSYC_PART_END: - /* end of packet, at this point we have already passed a \n + /* End of packet, at this point we have already passed a \n and the cursor should point to | */ - if (state->cursor+1 >= state->buffer.length) // incremented cursor inside length? - return PSYC_INSUFFICIENT; // return insufficient + if (state->cursor+1 >= state->buffer.length) /* incremented cursor inside length? */ + return PSYC_INSUFFICIENT; if (state->buffer.ptr[state->cursor] == '|' && state->buffer.ptr[state->cursor+1] == '\n') /* packet ends here */ @@ -348,5 +344,94 @@ int PSYC_parse(PSYC_State* state, uint8_t* modifier, PSYC_Array* name, PSYC_Arra return PSYC_ERROR_END; } } - return PSYC_ERROR; // should not be reached + return PSYC_ERROR; /* should not be reached */ +} + +/** @brief list parser */ +int PSYC_parseList(PSYC_ListState* state, PSYC_Array *name, PSYC_Array* value, PSYC_Array* elem) +{ + if (state->cursor >= state->buffer.length) + return PSYC_LIST_INCOMPLETE; + + state->startc = state->cursor; + + if (!state->type) /* If type is not set we're at the start */ + { + if (name->length < 5 || memcmp(name->ptr, "_list", 5) != 0 || + (name->length > 5 && name->ptr[5] != '_')) /* name should be _list or should start with _list_ */ + return PSYC_ERROR_LIST_NAME; + + /* First character is either | for text lists, or a number for binary lists */ + if (state->buffer.ptr[state->cursor] == '|') + { + state->type = PSYC_LIST_TEXT; + state->cursor++; + } + else if (isNumeric(state->buffer.ptr[state->cursor])) + state->type = PSYC_LIST_BINARY; + else + return PSYC_ERROR_LIST_TYPE; + } + + if (state->type == PSYC_LIST_TEXT) + { + elem->ptr = state->buffer.ptr + state->cursor; + elem->length = 0; + + if (state->cursor >= state->buffer.length) + return PSYC_LIST_END; + + while (state->buffer.ptr[state->cursor] != '|') + { + elem->length++; + if (++(state->cursor) >= state->buffer.length) + return PSYC_LIST_END; + } + state->cursor++; + return PSYC_LIST_ELEM; + } + else /* binary list */ + { + if (!(state->elemParsed < state->elemLength)) { + /* Element starts with a number. */ + if (isNumeric(state->buffer.ptr[state->cursor])) + { + do + { + state->elemLength = 10 * state->elemLength + state->buffer.ptr[state->cursor] - '0'; + ADVANCE_CURSOR_OR_RETURN(PSYC_LIST_INCOMPLETE); + } + while (isNumeric(state->buffer.ptr[state->cursor])); + } + else + return PSYC_ERROR_LIST_LEN; + + if (state->buffer.ptr[state->cursor] != ' ') + return PSYC_ERROR_LIST_LEN; + + state->cursor++; + elem->ptr = state->buffer.ptr + state->cursor; + elem->length = 0; + state->elemParsed = 0; + } + + /* Start or resume parsing the binary data */ + if (state->elemParsed < state->elemLength) { + if (PSYC_parseBinaryValue((PSYC_State*)state, elem, &(state->elemLength), &(state->elemParsed)) == PSYC_INCOMPLETE) + return PSYC_LIST_INCOMPLETE; + + state->elemLength = 0; + + if (state->cursor >= state->buffer.length) + return PSYC_LIST_END; + + if (state->buffer.ptr[state->cursor] != '|') + return PSYC_ERROR_LIST_DELIM; + + state->cursor++; + return PSYC_LIST_ELEM; + } + } + + return PSYC_ERROR_LIST; /* should not be reached */ } diff --git a/src/tests/packets/test-2-list b/src/tests/packets/test-2-list index b534143..ebff895 100644 --- a/src/tests/packets/test-2-list +++ b/src/tests/packets/test-2-list @@ -1,7 +1,7 @@ | =_source psyc://foo/~bar :_target psyc://bar/~baz -=_list_foo foo|bar|baz +=_list_foo |foo|bar|baz :_tag sch1828hu3r2cm =_foo bar baz diff --git a/src/tests/packets/test-2-list-error b/src/tests/packets/test-2-list-error new file mode 100644 index 0000000..cbae3e6 --- /dev/null +++ b/src/tests/packets/test-2-list-error @@ -0,0 +1,26 @@ +| +=_source psyc://foo/~bar +:_target psyc://bar/~baz +=_list_foo foo|bar|baz +=_listfoo foo|bar|baz +:_tag sch1828hu3r2cm + +=_foo bar baz +=_abc_def 11 ghi jkl + +xq +=_list_bar 36 2 foo|3 bar|7 foo +bar|11 foo +bar +baz +:_foo_bar yay +_message_foo_bar +ohai there! +\o/ +| +:_target psyc://foo.bar/~baz + +:_test 123 +_message_test +ohai! +| diff --git a/src/tests/packets/test-3-list b/src/tests/packets/test-3-list index 2cd79b6..4776f47 100644 --- a/src/tests/packets/test-3-list +++ b/src/tests/packets/test-3-list @@ -1,7 +1,7 @@ | =_source psyc://foo/~bar :_target psyc://bar/~baz -=_list_foo foo|bar|baz +=_list_foo |foo|bar|baz :_tag sch1828hu3r2cm ?_test ignored diff --git a/src/tests/testParser.c b/src/tests/testParser.c index a9d7b9d..a704409 100644 --- a/src/tests/testParser.c +++ b/src/tests/testParser.c @@ -5,31 +5,26 @@ int main(int argc, char** argv) { - uint8_t buffer[2048]; - int index; + int index, ret; + uint8_t buffer[2048], modifier; + PSYC_Array name, value, elem; + PSYC_State state; + PSYC_ListState listState; int file = open(argv[1],O_RDONLY); if(file < 0) return -1; index = read(file,(void*)buffer,sizeof(buffer)); - write(1, ">> INPUT:\n", 10); + write(1, ">> INPUT\n", 9); write(1, buffer, index); - write(1, ">> PARSE:\n", 10); + write(1, ">> PARSE\n", 9); - PSYC_State state; PSYC_initState(&state); - - //unsigned int cursor=0,tmp=0; - //unsigned long expectedBytes=0; - uint8_t modifier; - int ret; - PSYC_Array name, value; - PSYC_nextBuffer(&state, PSYC_createArray(buffer, index)); // try parsing that now - while((ret=PSYC_parse(&state, &modifier, &name, &value))) + while (ret = PSYC_parse(&state, &modifier, &name, &value)) { switch (ret) { @@ -41,6 +36,33 @@ int main(int argc, char** argv) write(1, " = ", 3); write(1, value.ptr, value.length); write(1, "\n", 1); + if (memcmp(name.ptr, "_list", 5) == 0) + { + write(1, ">>> LIST START\n", 15); + PSYC_initListState(&listState); + PSYC_nextListBuffer(&listState, value); + while (ret = PSYC_parseList(&listState, &name, &value, &elem)) + { + switch (ret) + { + case PSYC_LIST_END: + case PSYC_LIST_ELEM: + write(1, "|", 1); + write(1, elem.ptr, elem.length); + write(1, "\n", 1); + break; + default: + printf("Error while parsing list: %i\n", ret); + return 1; + } + + if (ret == PSYC_LIST_END) + { + write(1, ">>> LIST END\n", 13); + break; + } + } + } break; case PSYC_COMPLETE: printf("Done parsing.\n"); @@ -54,5 +76,4 @@ int main(int argc, char** argv) } } return 0; - }