1
0
Fork 0
mirror of git://git.psyc.eu/libpsyc synced 2024-08-15 03:19:02 +00:00
libpsyc/src/parser.c

494 lines
13 KiB
C
Raw Normal View History

2010-02-20 16:40:09 +00:00
#include <stdint.h>
2010-02-20 19:31:22 +00:00
#include <stdlib.h>
2010-02-20 16:40:09 +00:00
#ifdef DEBUG
#include <stdio.h>
#endif
#include <psyc/lib.h>
#include <psyc/parser.h>
2010-02-20 19:31:22 +00:00
2011-04-19 17:41:25 +00:00
#define ADVANCE_CURSOR_OR_RETURN(ret) \
if (++(state->cursor) >= state->buffer.length) \
{ \
state->cursor = state->startc; \
2011-04-19 17:41:25 +00:00
return ret; \
}
2011-04-22 18:33:22 +00:00
inline void PSYC_initParseState (PSYC_ParseState* state)
{
memset(state, 0, sizeof(PSYC_ParseState));
}
inline void PSYC_initParseState2 (PSYC_ParseState* state, uint8_t flags)
{
memset(state, 0, sizeof(PSYC_ParseState));
state->flags = flags;
}
inline void PSYC_initParseListState (PSYC_ParseListState* state)
{
memset(state, 0, sizeof(PSYC_ParseListState));
}
inline void PSYC_nextParseBuffer (PSYC_ParseState* state, PSYC_String newBuf)
2011-04-22 18:33:22 +00:00
{
state->buffer = newBuf;
state->cursor = 0;
}
inline void PSYC_nextParseListBuffer (PSYC_ParseListState* state, PSYC_String newBuf)
2011-04-22 18:33:22 +00:00
{
state->buffer = newBuf;
state->cursor = 0;
}
inline size_t PSYC_getContentLength (PSYC_ParseState* s)
{
return s->contentLength;
}
2011-04-19 19:54:44 +00:00
/**
* Determines if the argument is a glyph.
* Glyphs are: : = + - ? !
2010-02-20 16:40:09 +00:00
*/
inline char isGlyph(uint8_t g)
{
switch(g)
{
case ':':
case '=':
case '+':
case '-':
2011-04-19 17:41:25 +00:00
case '?':
case '!':
2010-02-20 16:40:09 +00:00
return 1;
default:
return 0;
}
}
2011-04-19 19:54:44 +00:00
/**
* Determines if the argument is numeric.
*/
2010-02-20 19:31:22 +00:00
inline char isNumeric(uint8_t c)
{
return c >= '0' && c <= '9';
2010-02-20 19:31:22 +00:00
}
2011-04-19 19:54:44 +00:00
/**
* Determines if the argument is alphanumeric.
*/
2010-02-20 16:40:09 +00:00
inline char isAlphaNumeric(uint8_t c)
{
return
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
isNumeric(c);
2010-02-20 16:40:09 +00:00
}
2011-04-19 19:54:44 +00:00
/**
* Determines if the argument is a keyword character.
* Keyword characters are: alphanumeric and _
*/
inline char isKwChar(uint8_t c)
{
2011-04-19 10:26:51 +00:00
return isAlphaNumeric(c) || c == '_';
}
2010-02-20 19:31:22 +00:00
/**
2011-04-19 19:54:44 +00:00
* Parse variable name or method name.
* It should contain one or more keyword characters.
2011-04-22 15:09:32 +00:00
* @return PSYC_PARSE_ERROR or PSYC_PARSE_SUCCESS
*/
inline PSYC_ParseRC PSYC_parseName(PSYC_ParseState* state, PSYC_String* name)
2010-02-20 16:40:09 +00:00
{
name->ptr = state->buffer.ptr + state->cursor;
name->length = 0;
2011-04-18 08:09:35 +00:00
while (isKwChar(state->buffer.ptr[state->cursor]))
{
2011-04-19 19:54:44 +00:00
name->length++; // was a valid char, increase length
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
2011-04-18 08:09:35 +00:00
2011-04-22 15:09:32 +00:00
return name->length > 0 ? PSYC_PARSE_SUCCESS : PSYC_PARSE_ERROR;
}
2010-02-20 16:40:09 +00:00
/**
* Parse binary data.
*
* @param state Parser state.
* @param value Start & length of parsed data is saved here.
* @param length Expected length of the data.
* @param parsed Number of bytes parsed so far.
*
2011-04-22 15:09:32 +00:00
* @return PSYC_PARSE_COMPLETE or PSYC_PARSE_INCOMPLETE
*/
inline PSYC_ParseRC PSYC_parseBinaryValue(PSYC_ParseState* state, PSYC_String* value, size_t* length, size_t* parsed)
{
2011-04-19 07:42:43 +00:00
size_t remaining = *length - *parsed;
value->ptr = state->buffer.ptr + state->cursor;
2010-02-20 16:40:09 +00:00
2011-04-19 19:54:44 +00:00
if (state->cursor + remaining > state->buffer.length) // is the length larger than this buffer?
2010-02-20 16:40:09 +00:00
{
value->length = state->buffer.length - state->cursor;
*parsed += value->length;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_INCOMPLETE;
}
2011-04-17 10:05:14 +00:00
value->length += remaining;
state->cursor += remaining;
*parsed += value->length;
2011-04-17 10:05:14 +00:00
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_COMPLETE;
}
2011-04-17 10:05:14 +00:00
/**
2011-04-19 19:54:44 +00:00
* Parse simple or binary variable.
2011-04-22 15:09:32 +00:00
* @return PSYC_PARSE_ERROR or PSYC_PARSE_SUCCESS
*/
inline PSYC_ParseRC PSYC_parseModifier(PSYC_ParseState* state, char* oper, PSYC_String* name, PSYC_String* value)
{
2011-04-22 20:59:15 +00:00
*oper = *(state->buffer.ptr + state->cursor);
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
2011-04-17 10:05:14 +00:00
2011-04-22 15:09:32 +00:00
if (PSYC_parseName(state, name) != PSYC_PARSE_SUCCESS)
return PSYC_PARSE_ERROR_VAR_NAME;
2011-04-18 08:09:35 +00:00
value->length = 0;
state->valueLength = 0;
state->valueParsed = 0;
2010-02-20 16:40:09 +00:00
2011-04-19 19:54:44 +00:00
// Parse the value.
// If we're in the content part check if it's a binary var.
if (state->part == PSYC_PART_CONTENT && state->buffer.ptr[state->cursor] == ' ') // binary arg
{ // After SP the length follows.
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
2010-02-20 16:40:09 +00:00
if (isNumeric(state->buffer.ptr[state->cursor]))
2010-02-20 16:40:09 +00:00
{
do
{
state->valueLength = 10 * state->valueLength + state->buffer.ptr[state->cursor] - '0';
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
while (isNumeric(state->buffer.ptr[state->cursor]));
}
else
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_VAR_LEN;
2011-04-19 19:54:44 +00:00
// After the length a TAB follows.
if (state->buffer.ptr[state->cursor] != '\t')
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_VAR_TAB;
2011-04-19 19:54:44 +00:00
if (state->buffer.length <= ++(state->cursor)) // Incremented cursor inside length?
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ENTITY_INCOMPLETE;
2011-04-22 15:09:32 +00:00
if (PSYC_parseBinaryValue(state, value, &(state->valueLength), &(state->valueParsed)) == PSYC_PARSE_INCOMPLETE)
return PSYC_PARSE_ENTITY_INCOMPLETE;
state->cursor++;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_SUCCESS;
}
2011-04-19 19:54:44 +00:00
else if (state->buffer.ptr[state->cursor] == '\t') // simple arg
2010-02-20 16:40:09 +00:00
{
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
value->ptr = state->buffer.ptr + state->cursor;
while (state->buffer.ptr[state->cursor] != '\n')
2010-02-20 16:40:09 +00:00
{
value->length++;
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
state->cursor++;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_SUCCESS;
}
else
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_VAR_TAB;
}
2010-02-20 16:40:09 +00:00
2011-04-19 19:54:44 +00:00
/**
* Parse PSYC packets.
* Generalized line-based parser.
*/
PSYC_ParseRC PSYC_parse(PSYC_ParseState* state, char* oper, PSYC_String* name, PSYC_String* value)
{
2011-04-19 19:54:44 +00:00
int ret; // a return value
size_t pos; // a cursor position
2011-04-19 19:54:44 +00:00
// Start position of the current line in the buffer
// in case we return insufficent, we rewind to this position.
state->startc = state->cursor;
2011-04-19 19:54:44 +00:00
// First we test if we can access the first char.
if (state->cursor >= state->buffer.length) // cursor is not inside the length
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_INSUFFICIENT;
switch (state->part)
{
2011-04-19 19:54:44 +00:00
case PSYC_PART_RESET: // New packet starts here, reset state.
state->valueParsed = 0;
state->valueLength = 0;
state->contentParsed = 0;
state->contentLength = 0;
state->contentLengthFound = 0;
2011-04-22 15:09:32 +00:00
state->part = PSYC_PART_ROUTING;
2011-04-19 19:54:44 +00:00
// fall thru
2011-04-22 15:09:32 +00:00
case PSYC_PART_ROUTING:
2011-04-19 19:54:44 +00:00
// Each line of the header starts with a glyph,
// i.e. :_name, -_name +_name etc,
// so just test if the first char is a glyph.
if (isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph?
{ // it is a glyph, so a variable starts here
ret = PSYC_parseModifier(state, oper, name, value);
2011-04-22 15:09:32 +00:00
return ret == PSYC_PARSE_SUCCESS ? PSYC_PARSE_ROUTING : ret;
2011-04-17 11:59:07 +00:00
}
2011-04-19 19:54:44 +00:00
else // not a glyph
2011-04-17 11:59:07 +00:00
{
state->part = PSYC_PART_LENGTH;
2011-04-19 19:54:44 +00:00
// fall thru
2010-02-20 16:40:09 +00:00
}
2011-04-17 11:59:07 +00:00
case PSYC_PART_LENGTH:
2011-04-19 19:54:44 +00:00
// End of header, content starts with an optional length then a NL
if (isNumeric(state->buffer.ptr[state->cursor]))
2011-04-17 11:59:07 +00:00
{
state->contentLengthFound = 1;
state->contentLength = 0;
do
{
state->contentLength = 10 * state->contentLength + state->buffer.ptr[state->cursor] - '0';
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
while (isNumeric(state->buffer.ptr[state->cursor]));
2011-04-17 11:59:07 +00:00
}
2011-04-19 19:54:44 +00:00
if (state->buffer.ptr[state->cursor] == '\n') // start of content
{
2011-04-19 19:54:44 +00:00
// If we need to parse the header only and we know the content length,
// then skip content parsing.
2011-04-22 15:09:32 +00:00
if (state->flags & PSYC_PARSE_HEADER_ONLY && state->contentLengthFound)
state->part = PSYC_PART_DATA;
else
state->part = PSYC_PART_CONTENT;
}
2011-04-19 19:54:44 +00:00
else // Not start of content, this must be the end.
{
2011-04-19 19:54:44 +00:00
// If we have a length then it should've been followed by a \n
if (state->contentLengthFound)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_LENGTH;
2010-02-20 16:40:09 +00:00
state->part = PSYC_PART_END;
goto PSYC_PART_END;
}
2010-02-20 16:40:09 +00:00
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
state->startc = state->cursor;
2011-04-19 19:54:44 +00:00
// fall thru
2010-02-20 16:40:09 +00:00
case PSYC_PART_CONTENT:
2011-04-19 19:54:44 +00:00
// In case of an incomplete binary variable resume parsing it.
if (state->valueParsed < state->valueLength) {
ret = PSYC_parseBinaryValue(state, value, &(state->valueLength), &(state->valueParsed));
state->contentParsed += value->length;
2011-04-22 15:09:32 +00:00
return ret == PSYC_PARSE_COMPLETE ? PSYC_PARSE_ENTITY : PSYC_PARSE_ENTITY_INCOMPLETE;
}
2010-02-20 16:40:09 +00:00
2011-04-19 19:54:44 +00:00
// Each line of the header starts with a glyph,
// i.e. :_name, -_name +_name etc.
// So just test if the first char is a glyph.
// In the body, the same applies, only that the
// method does not start with a glyph.
if (isGlyph(state->buffer.ptr[state->cursor]))
{
pos = state->cursor;
ret = PSYC_parseModifier(state, oper, name, value);
state->contentParsed += state->cursor - pos;
2011-04-22 15:09:32 +00:00
return ret == PSYC_PARSE_SUCCESS ? PSYC_PARSE_ENTITY : ret;
}
else
{
state->part = PSYC_PART_METHOD;
state->startc = state->cursor;
2011-04-19 19:54:44 +00:00
// fall thru
}
2010-02-20 16:40:09 +00:00
case PSYC_PART_METHOD:
pos = state->cursor;
2011-04-22 15:09:32 +00:00
if (PSYC_parseName(state, name) == PSYC_PARSE_SUCCESS)
2011-04-19 19:54:44 +00:00
{ // the method ends with a \n then the data follows
if (state->buffer.ptr[state->cursor] != '\n')
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_METHOD;
state->cursor++;
state->startc = state->cursor;
state->contentParsed += state->cursor - pos;
state->part = PSYC_PART_DATA;
if (state->cursor >= state->buffer.length)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_INSUFFICIENT;
2011-04-19 19:54:44 +00:00
// fall thru
}
2011-04-19 19:54:44 +00:00
else // No method, which means the packet should end now.
{
state->part = PSYC_PART_END;
state->startc = state->cursor;
goto PSYC_PART_END;
}
2010-02-20 16:40:09 +00:00
case PSYC_PART_DATA:
value->ptr = state->buffer.ptr + state->cursor;
value->length = 0;
2010-02-20 16:40:09 +00:00
2011-04-19 19:54:44 +00:00
if (state->contentLengthFound) // We know the length of the packet.
{
2011-04-22 15:09:32 +00:00
if (PSYC_parseBinaryValue(state, value, &(state->contentLength), &(state->contentParsed)) == PSYC_PARSE_INCOMPLETE)
return PSYC_PARSE_BODY_INCOMPLETE;
state->cursor++;
state->part = PSYC_PART_END;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_BODY;
2011-04-15 23:42:36 +00:00
}
2011-04-19 19:54:44 +00:00
else // Search for the terminator.
{
2011-04-15 23:42:36 +00:00
while (1)
2010-02-20 18:50:10 +00:00
{
if (state->buffer.ptr[state->cursor] == '\n')
2010-02-20 21:18:39 +00:00
{
2011-04-19 19:54:44 +00:00
if (state->cursor+2 >= state->buffer.length) // incremented cursor inside length?
2010-02-20 21:18:39 +00:00
{
2011-04-19 17:41:25 +00:00
state->cursor = state->startc;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_INSUFFICIENT;
2010-02-20 21:18:39 +00:00
}
if (state->buffer.ptr[state->cursor+1] == '|' &&
2011-04-19 19:54:44 +00:00
state->buffer.ptr[state->cursor+2] == '\n') // packet ends here
{
state->cursor++;
state->part = PSYC_PART_END;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_BODY;
}
2010-02-20 18:50:10 +00:00
}
value->length++;
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
2010-02-20 22:06:33 +00:00
}
}
case PSYC_PART_END:
PSYC_PART_END:
2011-04-19 19:54:44 +00:00
// End of packet, at this point we have already passed a \n
// and the cursor should point to |
if (state->cursor+1 >= state->buffer.length) // incremented cursor inside length?
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_INSUFFICIENT;
if (state->buffer.ptr[state->cursor] == '|' &&
2011-04-19 19:54:44 +00:00
state->buffer.ptr[state->cursor+1] == '\n') // packet ends here
2010-02-20 19:31:22 +00:00
{
state->cursor += 2;
state->part = PSYC_PART_RESET;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_COMPLETE;
}
2011-04-19 19:54:44 +00:00
else // packet should've ended here, return error
{
state->part = PSYC_PART_RESET;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR_END;
2010-02-20 19:31:22 +00:00
}
2010-02-20 16:40:09 +00:00
}
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_ERROR; // should not be reached
2011-04-19 17:41:25 +00:00
}
2011-04-19 19:54:44 +00:00
/**
* List value parser.
2011-04-20 20:31:04 +00:00
* @return see PSYC_ListRC.
2011-04-19 19:54:44 +00:00
*/
PSYC_ParseListRC PSYC_parseList(PSYC_ParseListState* state, PSYC_String *name, PSYC_String* value, PSYC_String* elem)
2011-04-19 17:41:25 +00:00
{
if (state->cursor >= state->buffer.length)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_INCOMPLETE;
2011-04-19 17:41:25 +00:00
state->startc = state->cursor;
2011-04-19 19:54:44 +00:00
if (!state->type) // If type is not set we're at the start
2011-04-19 17:41:25 +00:00
{
if (name->length < 5 || memcmp(name->ptr, "_list", 5) != 0 ||
2011-04-19 19:54:44 +00:00
(name->length > 5 && name->ptr[5] != '_')) // name should be _list or should start with _list_
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR_NAME;
2011-04-19 17:41:25 +00:00
2011-04-19 19:54:44 +00:00
// First character is either | for text lists, or a number for binary lists
2011-04-19 17:41:25 +00:00
if (state->buffer.ptr[state->cursor] == '|')
{
state->type = PSYC_LIST_TEXT;
state->cursor++;
}
else if (isNumeric(state->buffer.ptr[state->cursor]))
state->type = PSYC_LIST_BINARY;
else
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR_TYPE;
2011-04-19 17:41:25 +00:00
}
if (state->type == PSYC_LIST_TEXT)
{
elem->ptr = state->buffer.ptr + state->cursor;
elem->length = 0;
if (state->cursor >= state->buffer.length)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_END;
2011-04-19 17:41:25 +00:00
while (state->buffer.ptr[state->cursor] != '|')
{
elem->length++;
if (++(state->cursor) >= state->buffer.length)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_END;
2011-04-19 17:41:25 +00:00
}
state->cursor++;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ELEM;
2011-04-19 17:41:25 +00:00
}
2011-04-19 19:54:44 +00:00
else // binary list
2011-04-19 17:41:25 +00:00
{
2011-04-19 19:54:44 +00:00
if (!(state->elemParsed < state->elemLength))
{
// Element starts with a number.
2011-04-19 17:41:25 +00:00
if (isNumeric(state->buffer.ptr[state->cursor]))
{
do
{
state->elemLength = 10 * state->elemLength + state->buffer.ptr[state->cursor] - '0';
2011-04-22 15:09:32 +00:00
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_LIST_INCOMPLETE);
2011-04-19 17:41:25 +00:00
}
while (isNumeric(state->buffer.ptr[state->cursor]));
}
else
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR_LEN;
2011-04-19 17:41:25 +00:00
if (state->buffer.ptr[state->cursor] != ' ')
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR_LEN;
2011-04-19 17:41:25 +00:00
state->cursor++;
elem->ptr = state->buffer.ptr + state->cursor;
elem->length = 0;
state->elemParsed = 0;
}
2011-04-19 19:54:44 +00:00
// Start or resume parsing the binary data
if (state->elemParsed < state->elemLength)
{
2011-04-22 15:09:32 +00:00
if (PSYC_parseBinaryValue((PSYC_ParseState*)state, elem, &(state->elemLength), &(state->elemParsed)) == PSYC_PARSE_INCOMPLETE)
return PSYC_PARSE_LIST_INCOMPLETE;
2011-04-19 17:41:25 +00:00
state->elemLength = 0;
if (state->cursor >= state->buffer.length)
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_END;
2011-04-19 17:41:25 +00:00
if (state->buffer.ptr[state->cursor] != '|')
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR_DELIM;
2011-04-19 17:41:25 +00:00
state->cursor++;
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ELEM;
2011-04-19 17:41:25 +00:00
}
}
2011-04-22 15:09:32 +00:00
return PSYC_PARSE_LIST_ERROR; // should not be reached
2010-02-20 16:40:09 +00:00
}