From 42c2709378deb5b8a91b51e02356f6ac0b16e5f2 Mon Sep 17 00:00:00 2001 From: "tg(x)" <*@tg-x.net> Date: Mon, 14 Nov 2011 22:02:02 +0100 Subject: [PATCH] table parsing --- .gitignore | 1 + include/psyc/parse.h | 113 ++++++++++++++++++++++++++++++++++------ src/parse.c | 120 ++++++++++++++++++++++++++++++++++++++++--- test/Makefile | 3 +- test/test_table.c | 80 +++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+), 24 deletions(-) create mode 100644 test/test_table.c diff --git a/.gitignore b/.gitignore index bfa0c96..83138ed 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ test/test_json test/test_json_glib test/test_strlen test/test_text +test/test_table test/var_is_routing test/var_type test/uniform_parse diff --git a/include/psyc/parse.h b/include/psyc/parse.h index da28af4..2572c0e 100644 --- a/include/psyc/parse.h +++ b/include/psyc/parse.h @@ -40,7 +40,8 @@ * char* raw_data; // points to our (possibly incomplete) packet * size_t raw_len; // how many bytes of data * - * psyc_parse_buffer_set(&state, raw_data, raw_len); // state is our initialized state from before + * // state is our initialized state from before + * psyc_parse_buffer_set(&state, raw_data, raw_len); * @endcode * * Now the the variables that will save the output of the parser need to be @@ -212,6 +213,43 @@ typedef enum { PSYC_PARSE_LIST_INCOMPLETE = 3, } PsycParseListRC; +typedef enum { + PSYC_PARSE_TABLE_ERROR_BODY = -5, + PSYC_PARSE_TABLE_ERROR_DELIM = -4, + PSYC_PARSE_TABLE_ERROR_HEAD = -3, + PSYC_PARSE_TABLE_ERROR_WIDTH = -2, + PSYC_PARSE_TABLE_ERROR = -1, + /// Completed parsing the width of the table. + PSYC_PARSE_TABLE_WIDTH = 1, +#ifdef PSYC_PARSE_TABLE_HEAD + /// Completed parsing the name of the key column. + PSYC_PARSE_TABLE_NAME_KEY = 2, + /// Completed parsing the name of a value column. + PSYC_PARSE_TABLE_NAME_VALUE = 3, +#endif + /// Completed parsing a key. + PSYC_PARSE_TABLE_KEY = 4, + /// Completed parsing a value. + PSYC_PARSE_TABLE_VALUE = 5, + /// Completed parsing a key and reached end of buffer. + PSYC_PARSE_TABLE_KEY_END = 6, + /// Completed parsing a value and reached end of buffer. + PSYC_PARSE_TABLE_VALUE_END = 7, + /// Binary table is incomplete. + PSYC_PARSE_TABLE_INCOMPLETE = 8, +} PsycParseTableRC; + +typedef enum { + PSYC_TABLE_PART_START = 0, + PSYC_TABLE_PART_WIDTH = 1, +#ifdef PSYC_PARSE_TABLE_HEAD + PSYC_TABLE_PART_HEAD_START = 2, + PSYC_TABLE_PART_HEAD = 3, +#endif + PSYC_TABLE_PART_BODY_START = 4, + PSYC_TABLE_PART_BODY = 5, +} PsycTablePart; + /** * Struct for keeping parser state. */ @@ -239,11 +277,26 @@ typedef struct { size_t startc; ///< Line start position. PsycString buffer; ///< Buffer with data to be parsed. PsycListType type; ///< List type. + char term; ///< Terminator character at the end. + uint8_t term_set; ///< Look for terminator. size_t elemParsed; ///< Number of bytes parsed from the elem so far. size_t elemLength; ///< Expected length of the elem. } PsycParseListState; +/** + * Struct for keeping table parser state. + */ +typedef struct { + size_t cursor; ///< Current position in buffer. + size_t startc; ///< Line start position. + PsycString buffer; ///< Buffer with data to be parsed. + PsycTablePart part; ///< Table type. + size_t width; ///< Width of table. + size_t elems; ///< Elems parsed so far in the table. + PsycParseListState list; +} PsycParseTableState; + /** * Initializes the state struct. * @@ -273,8 +326,7 @@ psyc_parse_state_init (PsycParseState *state, uint8_t flags) * @see PsycString */ static inline void -psyc_parse_buffer_set (PsycParseState *state, char *buffer, - size_t length) +psyc_parse_buffer_set (PsycParseState *state, const char *buffer, size_t length) { state->buffer = (PsycString) {length, buffer}; state->cursor = 0; @@ -286,9 +338,7 @@ psyc_parse_buffer_set (PsycParseState *state, char *buffer, } /** - * Initializes the list state struct. - * - * @param state Pointer to the list state struct that should be initialized. + * Initializes the list state. */ static inline void psyc_parse_list_state_init (PsycParseListState *state) @@ -306,6 +356,32 @@ psyc_parse_list_buffer_set (PsycParseListState *state, char *buffer, size_t leng state->cursor = 0; } +static inline void +psyc_parse_list_term_set (PsycParseListState *state, char term) +{ + state->term = term; + state->term_set = PSYC_TRUE; +} + +/** + * Initializes the table state. + */ +static inline void +psyc_parse_table_state_init (PsycParseTableState *state) +{ + memset(state, 0, sizeof(PsycParseTableState)); +} + +/** + * Sets a new buffer in the list parser state struct with data to be parsed. + */ +static inline void +psyc_parse_table_buffer_set (PsycParseTableState *state, char *buffer, size_t length) +{ + state->buffer = (PsycString) {length, buffer}; + state->cursor = 0; +} + static inline size_t psyc_parse_content_length (PsycParseState *state) { @@ -394,14 +470,17 @@ static inline PsycParseListRC psyc_parse_list (PsycParseListState *state, PsycString *elem); -static inline PsycBool +PsycParseTableRC +psyc_parse_table (PsycParseTableState *state, PsycString *elem); + +static inline PsycRC psyc_parse_number (const char *value, size_t len, int64_t *n) { size_t c = 0; uint8_t neg = 0; if (!value) - return PSYC_FALSE; + return PSYC_ERROR; if (value[0] == '-') neg = ++c; @@ -411,42 +490,42 @@ psyc_parse_number (const char *value, size_t len, int64_t *n) *n = 10 * *n + (value[c++] - '0'); if (c != len) - return PSYC_FALSE; + return PSYC_ERROR; if (neg) *n = 0 - *n; - return PSYC_TRUE; + return PSYC_OK; } -static inline PsycBool +static inline PsycRC psyc_parse_number_unsigned (const char *value, size_t len, uint64_t *n) { size_t c = 0; if (!value) - return PSYC_FALSE; + return PSYC_ERROR; *n = 0; while (c < len && value[c] >= '0' && value[c] <= '9') *n = 10 * *n + (value[c++] - '0'); - return c == len ? PSYC_TRUE : PSYC_FALSE; + return c == len ? PSYC_OK : PSYC_ERROR; } -static inline PsycBool +static inline PsycRC psyc_parse_time (const char *value, size_t len, time_t *t) { return psyc_parse_number(value, len, t); } -static inline PsycBool +static inline PsycRC psyc_parse_date (const char *value, size_t len, time_t *t) { if (psyc_parse_number(value, len, t)) { *t += PSYC_EPOCH; - return PSYC_TRUE; + return PSYC_OK; } - return PSYC_FALSE; + return PSYC_ERROR; } /** diff --git a/src/parse.c b/src/parse.c index 5a3dd05..27af0a5 100644 --- a/src/parse.c +++ b/src/parse.c @@ -328,7 +328,7 @@ psyc_parse (PsycParseState *state, char *oper, // fall thru case PSYC_PART_DATA: - PSYC_PART_DATA: + PSYC_PART_DATA: value->data = state->buffer.data + state->cursor; value->length = 0; @@ -385,7 +385,7 @@ psyc_parse (PsycParseState *state, char *oper, } case PSYC_PART_END: - PSYC_PART_END: + PSYC_PART_END: // if data was not empty next is the \n at the end of data if (state->contentLengthFound && state->valueLengthFound && state->valueLength && !(state->flags & PSYC_PARSE_ROUTING_ONLY)) { @@ -447,10 +447,20 @@ psyc_parse_list (PsycParseListState *state, PsycString *elem) if (state->cursor >= state->buffer.length) return PSYC_PARSE_LIST_END; - while (state->buffer.data[state->cursor] != '|') { - elem->length++; - if (++(state->cursor) >= state->buffer.length) - return PSYC_PARSE_LIST_END; + if (state->term_set) { + while (state->buffer.data[state->cursor] != '|') { + elem->length++; + if (state->buffer.data[state->cursor] == state->term) + return PSYC_PARSE_LIST_END; + if (++(state->cursor) >= state->buffer.length) + return PSYC_PARSE_LIST_END; + } + } else { + while (state->buffer.data[state->cursor] != '|') { + elem->length++; + if (++(state->cursor) >= state->buffer.length) + return PSYC_PARSE_LIST_END; + } } state->cursor++; return PSYC_PARSE_LIST_ELEM; @@ -497,3 +507,101 @@ psyc_parse_list (PsycParseListState *state, PsycString *elem) return PSYC_PARSE_LIST_ERROR; // should not be reached } + +PsycParseTableRC +psyc_parse_table (PsycParseTableState *state, PsycString *elem) +{ + if (state->cursor >= state->buffer.length) + return PSYC_PARSE_TABLE_INCOMPLETE; + + state->startc = state->cursor; + + switch (state->part) { + case PSYC_TABLE_PART_START: + if (state->buffer.data[state->cursor] != '*') { + state->part = PSYC_TABLE_PART_BODY_START; + goto PSYC_TABLE_PART_BODY_START; + } else { + state->part = PSYC_TABLE_PART_WIDTH; + ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_TABLE_INCOMPLETE); + } + // fall thru + + case PSYC_TABLE_PART_WIDTH: + if (psyc_is_numeric(state->buffer.data[state->cursor])) { + do { + state->width = + 10 * state->width + state->buffer.data[state->cursor] - '0'; + ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_TABLE_INCOMPLETE); + } while (psyc_is_numeric(state->buffer.data[state->cursor])); + } else + return PSYC_PARSE_TABLE_ERROR_WIDTH; + + switch (state->buffer.data[state->cursor]) { +#ifdef PSYC_PARSE_TABLE_HEAD + case '|': + state->part = PSYC_TABLE_PART_HEAD_START; + break; +#endif + case ' ': + state->part = PSYC_TABLE_PART_BODY_START; + state->cursor++; + } + + elem->length = state->width; + return PSYC_TABLE_PART_WIDTH; +#ifdef PSYC_PARSE_TABLE_HEAD + case PSYC_TABLE_PART_HEAD_START: + psyc_parse_list_buffer_set(&state->list, state->buffer.data + state->cursor, + state->buffer.length - state->cursor); + psyc_parse_list_term_set(&state->list, ' '); + state->part = PSYC_TABLE_PART_HEAD; + // fall thru + + case PSYC_TABLE_PART_HEAD: + switch (psyc_parse_list(&state->list, elem)) { + case PSYC_PARSE_LIST_ELEM: + if (state->elems == 0) { + state->elems++; + return PSYC_PARSE_TABLE_NAME_KEY; + } else if (state->elems < state->width) { + state->elems++; + return PSYC_PARSE_TABLE_NAME_VALUE; + } else // too many elements + return PSYC_PARSE_TABLE_ERROR_HEAD; + + case PSYC_PARSE_LIST_END: + if (state->elems != state->width) + return PSYC_PARSE_TABLE_ERROR_HEAD; + + state->part = PSYC_TABLE_PART_BODY_START; + state->cursor += state->list.cursor + 1; + psyc_parse_list_state_init(&state->list); + return state->elems++ == 0 + ? PSYC_PARSE_TABLE_NAME_KEY : PSYC_PARSE_TABLE_NAME_VALUE; + default: + return PSYC_PARSE_TABLE_ERROR_HEAD; + } +#endif + case PSYC_TABLE_PART_BODY_START: + PSYC_TABLE_PART_BODY_START: + psyc_parse_list_buffer_set(&state->list, state->buffer.data + state->cursor, + state->buffer.length - state->cursor); + state->part = PSYC_TABLE_PART_BODY; + // fall thru + + case PSYC_TABLE_PART_BODY: + switch (psyc_parse_list(&state->list, elem)) { + case PSYC_PARSE_LIST_ELEM: + return state->elems++ % (state->width + 1) == 0 + ? PSYC_PARSE_TABLE_KEY : PSYC_PARSE_TABLE_VALUE; + case PSYC_PARSE_LIST_END: + return state->elems++ % (state->width + 1) == 0 + ? PSYC_PARSE_TABLE_KEY_END : PSYC_PARSE_TABLE_VALUE_END; + default: + return PSYC_PARSE_TABLE_ERROR_BODY; + } + } + + return PSYC_PARSE_LIST_ERROR; // should not be reached +} diff --git a/test/Makefile b/test/Makefile index 05a41c2..ba570c3 100644 --- a/test/Makefile +++ b/test/Makefile @@ -3,7 +3,7 @@ DEBUG = 2 CFLAGS = -I../include -I../src -Wall -std=c99 ${OPT} LDFLAGS = -L../lib LOADLIBES = -lpsyc -lm -TARGETS = test_psyc test_psyc_speed test_parser test_match test_render test_text var_is_routing var_type uniform_parse +TARGETS = test_psyc test_psyc_speed test_parser test_match test_render test_text var_is_routing var_type uniform_parse test_table O = test.o WRAPPER = DIET = diet @@ -48,6 +48,7 @@ test: ${TARGETS} ./var_is_routing ./var_type ./uniform_parse + ./test_table x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x diff --git a/test/test_table.c b/test/test_table.c new file mode 100644 index 0000000..01759b2 --- /dev/null +++ b/test/test_table.c @@ -0,0 +1,80 @@ +#include +#include + +#include + +int +parse_table (char *buf, size_t buflen) +{ + printf(">> %.*s\n", (int)buflen, buf); + + int ret; + PsycString elem; + PsycParseTableState state; + psyc_parse_table_state_init(&state); + psyc_parse_table_buffer_set(&state, buf, buflen); + + do { + ret = psyc_parse_table(&state, &elem); + switch (ret) { + case PSYC_PARSE_TABLE_WIDTH: + printf("width: %ld\n", elem.length); + break; +#ifdef PSYC_PARSE_TABLE_HEAD + case PSYC_PARSE_TABLE_NAME_KEY: + printf("name key: %.*s\n", (int)PSYC_S2ARG2(elem)); + break; + case PSYC_PARSE_TABLE_NAME_VALUE: + printf("name val: %.*s\n", (int)PSYC_S2ARG2(elem)); + break; +#endif + case PSYC_PARSE_TABLE_KEY_END: + ret = 0; + case PSYC_PARSE_TABLE_KEY: + printf("key: %.*s\n", (int)PSYC_S2ARG2(elem)); + break; + case PSYC_PARSE_TABLE_VALUE_END: + ret = 0; + case PSYC_PARSE_TABLE_VALUE: + printf("val: %.*s\n", (int)PSYC_S2ARG2(elem)); + break; + default: + printf("err: %d\n", ret); + } + } while (ret > 0); + return ret == 0; +} + +int +main (int argc, char **argv) +{ + +#ifdef PSYC_PARSE_TABLE_HEAD + if (!parse_table(PSYC_C2ARG("*2|_key|_val1|_val2 |_foo|bar|baz|_aaa|bbb|ccc"))) + return 1; +#endif + + if (!parse_table(PSYC_C2ARG("*2 |_foo|bar|baz|_aaa|bbb|ccc"))) + return 2; + +#ifdef PSYC_PARSE_TABLE_HEAD + if (!parse_table(PSYC_C2ARG("*1|_key|_val1 |_foo|bar|_baz|aaa|_bbb|ccc"))) + return 1; +#endif + + if (!parse_table(PSYC_C2ARG("*1 |_foo|bar|_baz|aaa|_bbb|ccc"))) + return 3; + +#ifdef PSYC_PARSE_TABLE_HEAD + if (!parse_table(PSYC_C2ARG("*0|_key |foo|bar|baz|aaa|bbb|ccc"))) + return 4; +#endif + + if (!parse_table(PSYC_C2ARG("*0 |foo|bar|baz|aaa|bbb|ccc"))) + return 4; + + if (!parse_table(PSYC_C2ARG("|foo|bar|baz|aaa|bbb|ccc"))) + return 5; + + return 0; +}