1
0
Fork 0
mirror of git://git.psyc.eu/libpsyc synced 2024-08-15 03:19:02 +00:00

table parsing

This commit is contained in:
Gabor Adam Toth 2011-11-14 22:02:02 +01:00
parent f583c90dc3
commit 03cf28ae79
5 changed files with 293 additions and 24 deletions

1
.gitignore vendored
View file

@ -17,6 +17,7 @@ test/test_json
test/test_json_glib test/test_json_glib
test/test_strlen test/test_strlen
test/test_text test/test_text
test/test_table
test/var_is_routing test/var_is_routing
test/var_type test/var_type
test/uniform_parse test/uniform_parse

View file

@ -40,7 +40,8 @@
* char* raw_data; // points to our (possibly incomplete) packet * char* raw_data; // points to our (possibly incomplete) packet
* size_t raw_len; // how many bytes of data * size_t raw_len; // how many bytes of data
* *
* psyc_parse_buffer_set(&state, raw_data, raw_len); // state is our initialized state from before * // state is our initialized state from before
* psyc_parse_buffer_set(&state, raw_data, raw_len);
* @endcode * @endcode
* *
* Now the the variables that will save the output of the parser need to be * Now the the variables that will save the output of the parser need to be
@ -212,6 +213,43 @@ typedef enum {
PSYC_PARSE_LIST_INCOMPLETE = 3, PSYC_PARSE_LIST_INCOMPLETE = 3,
} PsycParseListRC; } PsycParseListRC;
typedef enum {
PSYC_PARSE_TABLE_ERROR_BODY = -5,
PSYC_PARSE_TABLE_ERROR_DELIM = -4,
PSYC_PARSE_TABLE_ERROR_HEAD = -3,
PSYC_PARSE_TABLE_ERROR_WIDTH = -2,
PSYC_PARSE_TABLE_ERROR = -1,
/// Completed parsing the width of the table.
PSYC_PARSE_TABLE_WIDTH = 1,
#ifdef PSYC_PARSE_TABLE_HEAD
/// Completed parsing the name of the key column.
PSYC_PARSE_TABLE_NAME_KEY = 2,
/// Completed parsing the name of a value column.
PSYC_PARSE_TABLE_NAME_VALUE = 3,
#endif
/// Completed parsing a key.
PSYC_PARSE_TABLE_KEY = 4,
/// Completed parsing a value.
PSYC_PARSE_TABLE_VALUE = 5,
/// Completed parsing a key and reached end of buffer.
PSYC_PARSE_TABLE_KEY_END = 6,
/// Completed parsing a value and reached end of buffer.
PSYC_PARSE_TABLE_VALUE_END = 7,
/// Binary table is incomplete.
PSYC_PARSE_TABLE_INCOMPLETE = 8,
} PsycParseTableRC;
typedef enum {
PSYC_TABLE_PART_START = 0,
PSYC_TABLE_PART_WIDTH = 1,
#ifdef PSYC_PARSE_TABLE_HEAD
PSYC_TABLE_PART_HEAD_START = 2,
PSYC_TABLE_PART_HEAD = 3,
#endif
PSYC_TABLE_PART_BODY_START = 4,
PSYC_TABLE_PART_BODY = 5,
} PsycTablePart;
/** /**
* Struct for keeping parser state. * Struct for keeping parser state.
*/ */
@ -239,11 +277,26 @@ typedef struct {
size_t startc; ///< Line start position. size_t startc; ///< Line start position.
PsycString buffer; ///< Buffer with data to be parsed. PsycString buffer; ///< Buffer with data to be parsed.
PsycListType type; ///< List type. PsycListType type; ///< List type.
char term; ///< Terminator character at the end.
uint8_t term_set; ///< Look for terminator.
size_t elemParsed; ///< Number of bytes parsed from the elem so far. size_t elemParsed; ///< Number of bytes parsed from the elem so far.
size_t elemLength; ///< Expected length of the elem. size_t elemLength; ///< Expected length of the elem.
} PsycParseListState; } PsycParseListState;
/**
* Struct for keeping table parser state.
*/
typedef struct {
size_t cursor; ///< Current position in buffer.
size_t startc; ///< Line start position.
PsycString buffer; ///< Buffer with data to be parsed.
PsycTablePart part; ///< Table type.
size_t width; ///< Width of table.
size_t elems; ///< Elems parsed so far in the table.
PsycParseListState list;
} PsycParseTableState;
/** /**
* Initializes the state struct. * Initializes the state struct.
* *
@ -273,8 +326,7 @@ psyc_parse_state_init (PsycParseState *state, uint8_t flags)
* @see PsycString * @see PsycString
*/ */
static inline void static inline void
psyc_parse_buffer_set (PsycParseState *state, char *buffer, psyc_parse_buffer_set (PsycParseState *state, const char *buffer, size_t length)
size_t length)
{ {
state->buffer = (PsycString) {length, buffer}; state->buffer = (PsycString) {length, buffer};
state->cursor = 0; state->cursor = 0;
@ -286,9 +338,7 @@ psyc_parse_buffer_set (PsycParseState *state, char *buffer,
} }
/** /**
* Initializes the list state struct. * Initializes the list state.
*
* @param state Pointer to the list state struct that should be initialized.
*/ */
static inline void static inline void
psyc_parse_list_state_init (PsycParseListState *state) psyc_parse_list_state_init (PsycParseListState *state)
@ -306,6 +356,32 @@ psyc_parse_list_buffer_set (PsycParseListState *state, char *buffer, size_t leng
state->cursor = 0; state->cursor = 0;
} }
static inline void
psyc_parse_list_term_set (PsycParseListState *state, char term)
{
state->term = term;
state->term_set = PSYC_TRUE;
}
/**
* Initializes the table state.
*/
static inline void
psyc_parse_table_state_init (PsycParseTableState *state)
{
memset(state, 0, sizeof(PsycParseTableState));
}
/**
* Sets a new buffer in the list parser state struct with data to be parsed.
*/
static inline void
psyc_parse_table_buffer_set (PsycParseTableState *state, char *buffer, size_t length)
{
state->buffer = (PsycString) {length, buffer};
state->cursor = 0;
}
static inline size_t static inline size_t
psyc_parse_content_length (PsycParseState *state) psyc_parse_content_length (PsycParseState *state)
{ {
@ -394,14 +470,17 @@ static inline
PsycParseListRC PsycParseListRC
psyc_parse_list (PsycParseListState *state, PsycString *elem); psyc_parse_list (PsycParseListState *state, PsycString *elem);
static inline PsycBool PsycParseTableRC
psyc_parse_table (PsycParseTableState *state, PsycString *elem);
static inline PsycRC
psyc_parse_number (const char *value, size_t len, int64_t *n) psyc_parse_number (const char *value, size_t len, int64_t *n)
{ {
size_t c = 0; size_t c = 0;
uint8_t neg = 0; uint8_t neg = 0;
if (!value) if (!value)
return PSYC_FALSE; return PSYC_ERROR;
if (value[0] == '-') if (value[0] == '-')
neg = ++c; neg = ++c;
@ -411,42 +490,42 @@ psyc_parse_number (const char *value, size_t len, int64_t *n)
*n = 10 * *n + (value[c++] - '0'); *n = 10 * *n + (value[c++] - '0');
if (c != len) if (c != len)
return PSYC_FALSE; return PSYC_ERROR;
if (neg) if (neg)
*n = 0 - *n; *n = 0 - *n;
return PSYC_TRUE; return PSYC_OK;
} }
static inline PsycBool static inline PsycRC
psyc_parse_number_unsigned (const char *value, size_t len, uint64_t *n) psyc_parse_number_unsigned (const char *value, size_t len, uint64_t *n)
{ {
size_t c = 0; size_t c = 0;
if (!value) if (!value)
return PSYC_FALSE; return PSYC_ERROR;
*n = 0; *n = 0;
while (c < len && value[c] >= '0' && value[c] <= '9') while (c < len && value[c] >= '0' && value[c] <= '9')
*n = 10 * *n + (value[c++] - '0'); *n = 10 * *n + (value[c++] - '0');
return c == len ? PSYC_TRUE : PSYC_FALSE; return c == len ? PSYC_OK : PSYC_ERROR;
} }
static inline PsycBool static inline PsycRC
psyc_parse_time (const char *value, size_t len, time_t *t) psyc_parse_time (const char *value, size_t len, time_t *t)
{ {
return psyc_parse_number(value, len, t); return psyc_parse_number(value, len, t);
} }
static inline PsycBool static inline PsycRC
psyc_parse_date (const char *value, size_t len, time_t *t) psyc_parse_date (const char *value, size_t len, time_t *t)
{ {
if (psyc_parse_number(value, len, t)) { if (psyc_parse_number(value, len, t)) {
*t += PSYC_EPOCH; *t += PSYC_EPOCH;
return PSYC_TRUE; return PSYC_OK;
} }
return PSYC_FALSE; return PSYC_ERROR;
} }
/** /**

View file

@ -328,7 +328,7 @@ psyc_parse (PsycParseState *state, char *oper,
// fall thru // fall thru
case PSYC_PART_DATA: case PSYC_PART_DATA:
PSYC_PART_DATA: PSYC_PART_DATA:
value->data = state->buffer.data + state->cursor; value->data = state->buffer.data + state->cursor;
value->length = 0; value->length = 0;
@ -385,7 +385,7 @@ psyc_parse (PsycParseState *state, char *oper,
} }
case PSYC_PART_END: case PSYC_PART_END:
PSYC_PART_END: PSYC_PART_END:
// if data was not empty next is the \n at the end of data // if data was not empty next is the \n at the end of data
if (state->contentLengthFound && state->valueLengthFound if (state->contentLengthFound && state->valueLengthFound
&& state->valueLength && !(state->flags & PSYC_PARSE_ROUTING_ONLY)) { && state->valueLength && !(state->flags & PSYC_PARSE_ROUTING_ONLY)) {
@ -447,10 +447,20 @@ psyc_parse_list (PsycParseListState *state, PsycString *elem)
if (state->cursor >= state->buffer.length) if (state->cursor >= state->buffer.length)
return PSYC_PARSE_LIST_END; return PSYC_PARSE_LIST_END;
while (state->buffer.data[state->cursor] != '|') { if (state->term_set) {
elem->length++; while (state->buffer.data[state->cursor] != '|') {
if (++(state->cursor) >= state->buffer.length) elem->length++;
return PSYC_PARSE_LIST_END; if (state->buffer.data[state->cursor] == state->term)
return PSYC_PARSE_LIST_END;
if (++(state->cursor) >= state->buffer.length)
return PSYC_PARSE_LIST_END;
}
} else {
while (state->buffer.data[state->cursor] != '|') {
elem->length++;
if (++(state->cursor) >= state->buffer.length)
return PSYC_PARSE_LIST_END;
}
} }
state->cursor++; state->cursor++;
return PSYC_PARSE_LIST_ELEM; return PSYC_PARSE_LIST_ELEM;
@ -497,3 +507,101 @@ psyc_parse_list (PsycParseListState *state, PsycString *elem)
return PSYC_PARSE_LIST_ERROR; // should not be reached return PSYC_PARSE_LIST_ERROR; // should not be reached
} }
PsycParseTableRC
psyc_parse_table (PsycParseTableState *state, PsycString *elem)
{
if (state->cursor >= state->buffer.length)
return PSYC_PARSE_TABLE_INCOMPLETE;
state->startc = state->cursor;
switch (state->part) {
case PSYC_TABLE_PART_START:
if (state->buffer.data[state->cursor] != '*') {
state->part = PSYC_TABLE_PART_BODY_START;
goto PSYC_TABLE_PART_BODY_START;
} else {
state->part = PSYC_TABLE_PART_WIDTH;
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_TABLE_INCOMPLETE);
}
// fall thru
case PSYC_TABLE_PART_WIDTH:
if (psyc_is_numeric(state->buffer.data[state->cursor])) {
do {
state->width =
10 * state->width + state->buffer.data[state->cursor] - '0';
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_TABLE_INCOMPLETE);
} while (psyc_is_numeric(state->buffer.data[state->cursor]));
} else
return PSYC_PARSE_TABLE_ERROR_WIDTH;
switch (state->buffer.data[state->cursor]) {
#ifdef PSYC_PARSE_TABLE_HEAD
case '|':
state->part = PSYC_TABLE_PART_HEAD_START;
break;
#endif
case ' ':
state->part = PSYC_TABLE_PART_BODY_START;
state->cursor++;
}
elem->length = state->width;
return PSYC_TABLE_PART_WIDTH;
#ifdef PSYC_PARSE_TABLE_HEAD
case PSYC_TABLE_PART_HEAD_START:
psyc_parse_list_buffer_set(&state->list, state->buffer.data + state->cursor,
state->buffer.length - state->cursor);
psyc_parse_list_term_set(&state->list, ' ');
state->part = PSYC_TABLE_PART_HEAD;
// fall thru
case PSYC_TABLE_PART_HEAD:
switch (psyc_parse_list(&state->list, elem)) {
case PSYC_PARSE_LIST_ELEM:
if (state->elems == 0) {
state->elems++;
return PSYC_PARSE_TABLE_NAME_KEY;
} else if (state->elems < state->width) {
state->elems++;
return PSYC_PARSE_TABLE_NAME_VALUE;
} else // too many elements
return PSYC_PARSE_TABLE_ERROR_HEAD;
case PSYC_PARSE_LIST_END:
if (state->elems != state->width)
return PSYC_PARSE_TABLE_ERROR_HEAD;
state->part = PSYC_TABLE_PART_BODY_START;
state->cursor += state->list.cursor + 1;
psyc_parse_list_state_init(&state->list);
return state->elems++ == 0
? PSYC_PARSE_TABLE_NAME_KEY : PSYC_PARSE_TABLE_NAME_VALUE;
default:
return PSYC_PARSE_TABLE_ERROR_HEAD;
}
#endif
case PSYC_TABLE_PART_BODY_START:
PSYC_TABLE_PART_BODY_START:
psyc_parse_list_buffer_set(&state->list, state->buffer.data + state->cursor,
state->buffer.length - state->cursor);
state->part = PSYC_TABLE_PART_BODY;
// fall thru
case PSYC_TABLE_PART_BODY:
switch (psyc_parse_list(&state->list, elem)) {
case PSYC_PARSE_LIST_ELEM:
return state->elems++ % (state->width + 1) == 0
? PSYC_PARSE_TABLE_KEY : PSYC_PARSE_TABLE_VALUE;
case PSYC_PARSE_LIST_END:
return state->elems++ % (state->width + 1) == 0
? PSYC_PARSE_TABLE_KEY_END : PSYC_PARSE_TABLE_VALUE_END;
default:
return PSYC_PARSE_TABLE_ERROR_BODY;
}
}
return PSYC_PARSE_LIST_ERROR; // should not be reached
}

View file

@ -3,7 +3,7 @@ DEBUG = 2
CFLAGS = -I../include -I../src -Wall -std=c99 ${OPT} CFLAGS = -I../include -I../src -Wall -std=c99 ${OPT}
LDFLAGS = -L../lib LDFLAGS = -L../lib
LOADLIBES = -lpsyc -lm LOADLIBES = -lpsyc -lm
TARGETS = test_psyc test_psyc_speed test_parser test_match test_render test_text var_is_routing var_type uniform_parse TARGETS = test_psyc test_psyc_speed test_parser test_match test_render test_text var_is_routing var_type uniform_parse test_table
O = test.o O = test.o
WRAPPER = WRAPPER =
DIET = diet DIET = diet
@ -48,6 +48,7 @@ test: ${TARGETS}
./var_is_routing ./var_is_routing
./var_type ./var_type
./uniform_parse ./uniform_parse
./test_table
x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x
x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./test_psyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x

80
test/test_table.c Normal file
View file

@ -0,0 +1,80 @@
#include <stdio.h>
#include <stdlib.h>
#include <psyc/parse.h>
int
parse_table (char *buf, size_t buflen)
{
printf(">> %.*s\n", (int)buflen, buf);
int ret;
PsycString elem;
PsycParseTableState state;
psyc_parse_table_state_init(&state);
psyc_parse_table_buffer_set(&state, buf, buflen);
do {
ret = psyc_parse_table(&state, &elem);
switch (ret) {
case PSYC_PARSE_TABLE_WIDTH:
printf("width: %ld\n", elem.length);
break;
#ifdef PSYC_PARSE_TABLE_HEAD
case PSYC_PARSE_TABLE_NAME_KEY:
printf("name key: %.*s\n", (int)PSYC_S2ARG2(elem));
break;
case PSYC_PARSE_TABLE_NAME_VALUE:
printf("name val: %.*s\n", (int)PSYC_S2ARG2(elem));
break;
#endif
case PSYC_PARSE_TABLE_KEY_END:
ret = 0;
case PSYC_PARSE_TABLE_KEY:
printf("key: %.*s\n", (int)PSYC_S2ARG2(elem));
break;
case PSYC_PARSE_TABLE_VALUE_END:
ret = 0;
case PSYC_PARSE_TABLE_VALUE:
printf("val: %.*s\n", (int)PSYC_S2ARG2(elem));
break;
default:
printf("err: %d\n", ret);
}
} while (ret > 0);
return ret == 0;
}
int
main (int argc, char **argv)
{
#ifdef PSYC_PARSE_TABLE_HEAD
if (!parse_table(PSYC_C2ARG("*2|_key|_val1|_val2 |_foo|bar|baz|_aaa|bbb|ccc")))
return 1;
#endif
if (!parse_table(PSYC_C2ARG("*2 |_foo|bar|baz|_aaa|bbb|ccc")))
return 2;
#ifdef PSYC_PARSE_TABLE_HEAD
if (!parse_table(PSYC_C2ARG("*1|_key|_val1 |_foo|bar|_baz|aaa|_bbb|ccc")))
return 1;
#endif
if (!parse_table(PSYC_C2ARG("*1 |_foo|bar|_baz|aaa|_bbb|ccc")))
return 3;
#ifdef PSYC_PARSE_TABLE_HEAD
if (!parse_table(PSYC_C2ARG("*0|_key |foo|bar|baz|aaa|bbb|ccc")))
return 4;
#endif
if (!parse_table(PSYC_C2ARG("*0 |foo|bar|baz|aaa|bbb|ccc")))
return 4;
if (!parse_table(PSYC_C2ARG("|foo|bar|baz|aaa|bbb|ccc")))
return 5;
return 0;
}