1
0
Fork 0
mirror of git://git.psyc.eu/libpsyc synced 2024-08-15 03:19:02 +00:00
libpsyc/include/psyc/parse.h

512 lines
14 KiB
C
Raw Normal View History

2011-05-09 07:02:15 +00:00
#ifndef PSYC_PARSE_H
/**
2011-05-09 07:02:15 +00:00
* @file psyc/parse.h
2011-05-09 12:37:57 +00:00
* @brief Interface for PSYC packet parsing.
2011-04-19 20:27:38 +00:00
*
2011-05-09 12:37:57 +00:00
* All parsing functions and the definitions they use are defined here.
*/
2011-04-19 20:27:38 +00:00
/**
2011-05-09 10:42:42 +00:00
* @defgroup parse Parsing Functions
*
2011-05-08 23:36:57 +00:00
* This module contains packet and list parsing functions.
2011-10-31 19:04:16 +00:00
* The parser adheres to the definition of a packet found at
*
2011-05-09 00:07:13 +00:00
* http://about.psyc.eu/Spec:Packet
*
* and the according terms are used throughout this documentation and in the
* return codes. You should be at least
2011-05-15 18:26:52 +00:00
* vaguely familiar with differences between "body" and "content" as
* well as "routing variable" and "entity variable".
2011-05-09 00:07:13 +00:00
*
2011-05-08 23:36:57 +00:00
*
* To parse a packet you first have to initialize a state:
*
* @code
2011-10-31 19:26:47 +00:00
* PsycParseState state;
2011-10-31 19:04:16 +00:00
* psyc_parse_state_init(&state, flags);
2011-05-08 23:36:57 +00:00
* @endcode
*
2011-10-31 19:04:16 +00:00
* With the flags parameter you can fine-tune what
2011-10-31 19:26:47 +00:00
* part of the packet should be parsed. @see PsycParseFlag
2011-05-08 23:36:57 +00:00
*
* Next, you have to tell the parser what it should parse. Assuming the variable
* raw_data points to our packet and raw_len contains the length, you can pass
* it to the parser as follows:
*
* @code
* char* raw_data; // points to our (possibly incomplete) packet
* size_t raw_len; // how many bytes of data
*
2011-10-31 19:04:16 +00:00
* psyc_parse_buffer_set(&state, raw_data, raw_len); // state is our initialized state from before
2011-05-08 23:36:57 +00:00
* @endcode
*
* Now the the variables that will save the output of the parser need to be
* declared:
*
* @code
2011-10-31 19:26:47 +00:00
* PsycString name, // Name of the variable or method
2011-05-08 23:36:57 +00:00
* value; // Value of the variable or body
* char oper; // operator of the variable (if any)
* @endcode
*
* They will be passed to the parsing function which will set them to
2011-05-08 23:36:57 +00:00
* the according positions and lengths.
*
* Now the real parsing begins. The parsing function needs to be called
* repeatedly with various actions in between, depending on the return values.
*
* A simplified example follows, see test/testPsyc.c for actual code that
2011-06-12 12:16:39 +00:00
* handles incomplete packets as well.
2011-05-08 23:36:57 +00:00
*
* @code
*
* int ret;
2011-05-08 23:36:57 +00:00
*
* do // run the parsing in a loop, each time parsing one line
* {
* name.length = value.length = oper = 0; // reset the output variables
*
* ret = psyc_parse(&state, &oper, &name, &value); // call the parsing function
*
* switch (ret) // look at the return value
* {
* case PSYC_PARSE_ROUTING: // it is a routing variable
* case PSYC_PARSE_ENTITY: // it is a entity variable
* // Name, value and operator of the variable can now be found in the
* // respective variables:
2011-10-31 19:04:16 +00:00
* printf("Variable: %.*s Value: %.*s Operator: %c\n",
2011-11-01 11:06:58 +00:00
* name.length, name.data,
* value.length, value.data,
2011-05-08 23:49:04 +00:00
* oper);
2011-11-01 11:06:58 +00:00
* // Note that the .data member still points at your original buffer. If
2011-05-08 23:36:57 +00:00
* // you want to reuse that buffer for the next packet, you better copy it
* // before passing it to the parser or you copy each variable now.
* break;
* case PSYC_PARSE_BODY: // it is the method and the body of the packet.
2011-10-31 19:04:16 +00:00
* printf("Method Name: %.*s Body: %.*s\n",
2011-11-01 11:06:58 +00:00
* name.length, name.data, // name of the method
* value.length, value.data); // value of the body
2011-05-08 23:36:57 +00:00
* break;
2011-05-08 23:53:30 +00:00
* case PSYC_PARSE_COMPLETE: // parsing of this packet is complete
2011-05-08 23:36:57 +00:00
* // You can simply continue parsing till you get the
* // PSYC_PARSE_INSUFFICIENT code which means the line is incomplete.
2011-05-08 23:36:57 +00:00
* continue;
2011-10-31 19:04:16 +00:00
* default: //
2011-05-08 23:36:57 +00:00
* perror("Error %i happened :(\n", res);
* return res;
* }
2011-10-31 19:04:16 +00:00
* }
2011-05-15 22:01:02 +00:00
* while (ret > 0)
2011-05-08 23:36:57 +00:00
* @endcode
*
2011-05-09 07:11:59 +00:00
* This simple example does not consider some more complex cases when you
* receive incomplete packets but still want to access the data. This code would
2011-05-08 23:36:57 +00:00
* simply reject incomplete packets as error. A more detailed tutorial for
2011-05-09 07:11:59 +00:00
* incomplete packets will follow. In the mean time, have look at the return
2011-10-31 19:26:47 +00:00
* codes in PsycParseRC and their explanations. @see PsycParseRC
2011-04-19 20:57:49 +00:00
*/
2011-05-08 23:53:30 +00:00
/** @{ */ // begin of parser group
2010-02-20 16:40:09 +00:00
#include <stdint.h>
2011-04-15 23:42:36 +00:00
#include <string.h>
#include <psyc.h>
2011-04-15 23:42:36 +00:00
2011-10-13 22:29:32 +00:00
typedef enum {
2011-05-15 19:10:27 +00:00
/// Default Flag. Parse everything.
PSYC_PARSE_ALL = 0,
/// Parse only the header
PSYC_PARSE_ROUTING_ONLY = 1,
2011-05-09 14:32:39 +00:00
/// Parse only the content.
/// Parsing starts at the content and the content must be complete.
PSYC_PARSE_START_AT_CONTENT = 2,
2011-10-31 19:26:47 +00:00
} PsycParseFlag;
2011-04-17 10:56:24 +00:00
/**
* The return value definitions for the packet parsing function.
* @see psyc_parse()
2011-04-19 20:57:49 +00:00
*/
2011-10-13 22:29:32 +00:00
typedef enum {
2011-05-09 14:32:39 +00:00
/// Error, packet is not ending with a valid delimiter.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_ERROR_END = -8,
2011-05-09 14:32:39 +00:00
/// Error, expected NL after the method.
2011-04-27 16:37:03 +00:00
PSYC_PARSE_ERROR_METHOD = -7,
2011-05-09 14:32:39 +00:00
/// Error, expected NL after a modifier.
2011-04-27 16:37:03 +00:00
PSYC_PARSE_ERROR_MOD_NL = -6,
2011-05-09 14:32:39 +00:00
/// Error, modifier length is not numeric.
2011-04-27 16:37:03 +00:00
PSYC_PARSE_ERROR_MOD_LEN = -5,
2011-05-09 14:32:39 +00:00
/// Error, expected TAB before modifier value.
2011-04-27 16:37:03 +00:00
PSYC_PARSE_ERROR_MOD_TAB = -4,
2011-05-09 14:32:39 +00:00
/// Error, modifier name is missing.
2011-04-27 16:37:03 +00:00
PSYC_PARSE_ERROR_MOD_NAME = -3,
2011-05-09 14:32:39 +00:00
/// Error, expected NL after the content length.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_ERROR_LENGTH = -2,
2011-05-09 14:32:39 +00:00
/// Error in packet.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_ERROR = -1,
2011-05-09 14:32:39 +00:00
/// Buffer contains insufficient amount of data.
/// Fill another buffer and concatenate it with the end of the current buffer,
/// from the cursor position to the end.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_INSUFFICIENT = 1,
2011-05-09 14:32:39 +00:00
/// Routing modifier parsing done.
/// Operator, name & value contains the respective parts.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_ROUTING = 2,
2011-05-09 14:32:39 +00:00
/// Start of an incomplete entity modifier.
/// Operator & name are complete, value is incomplete.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_ENTITY_START = 3,
2011-05-09 14:32:39 +00:00
/// Continuation of an incomplete entity modifier.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_ENTITY_CONT = 4,
2011-05-09 14:32:39 +00:00
/// End of an incomplete entity modifier.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_ENTITY_END = 5,
2011-05-09 14:32:39 +00:00
/// Entity modifier parsing done in one go.
/// Operator, name & value contains the respective parts.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_ENTITY = 6,
2011-06-12 12:25:22 +00:00
/// Start of an incomplete body.
2011-05-09 14:32:39 +00:00
/// Name contains method, value contains part of the body.
2011-06-12 12:25:22 +00:00
/// Used when packet length is given
2011-05-07 17:25:18 +00:00
PSYC_PARSE_BODY_START = 7,
2011-05-09 14:32:39 +00:00
/// Continuation of an incomplete body.
2011-06-12 12:25:22 +00:00
/// Used when packet length is given
2011-05-07 17:25:18 +00:00
PSYC_PARSE_BODY_CONT = 8,
2011-05-09 14:32:39 +00:00
/// End of an incomplete body.
2011-06-12 12:25:22 +00:00
/// Used when packet length is given
2011-05-07 17:25:18 +00:00
PSYC_PARSE_BODY_END = 9,
2011-05-09 14:32:39 +00:00
/// Body parsing done in one go, name contains method, value contains body.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_BODY = 10,
2011-05-09 14:32:39 +00:00
/// Start of an incomplete content, value contains part of content.
/// Used when PSYC_PARSE_ROUTING_ONLY is set.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_CONTENT_START = 7,
2011-06-02 13:01:26 +00:00
/// Continuation of an incomplete content.
2011-05-09 14:32:39 +00:00
/// Used when PSYC_PARSE_ROUTING_ONLY is set.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_CONTENT_CONT = 8,
2011-06-02 13:01:26 +00:00
/// End of an incomplete content.
2011-05-09 14:32:39 +00:00
/// Used when PSYC_PARSE_ROUTING_ONLY is set.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_CONTENT_END = 9,
2011-05-09 14:32:39 +00:00
/// Content parsing done in one go, value contains the whole content.
/// Used when PSYC_PARSE_ROUTING_ONLY is set.
2011-05-07 17:25:18 +00:00
PSYC_PARSE_CONTENT = 10,
2011-05-09 14:32:39 +00:00
/// Finished parsing packet.
PSYC_PARSE_COMPLETE = 11,
2011-10-31 19:26:47 +00:00
} PsycParseRC;
/**
* The return value definitions for the list parsing function.
2011-10-31 19:04:16 +00:00
* @see psyc_parse_list()
*/
2011-10-13 22:29:32 +00:00
typedef enum {
PSYC_PARSE_LIST_ERROR_DELIM = -4,
PSYC_PARSE_LIST_ERROR_LEN = -3,
PSYC_PARSE_LIST_ERROR_TYPE = -2,
2011-05-08 22:14:48 +00:00
PSYC_PARSE_LIST_ERROR = -1,
2011-05-09 14:32:39 +00:00
/// Completed parsing a list element.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_LIST_ELEM = 1,
2011-05-09 14:32:39 +00:00
/// Reached end of buffer.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_LIST_END = 2,
2011-05-09 14:32:39 +00:00
/// Binary list is incomplete.
2011-04-22 15:09:32 +00:00
PSYC_PARSE_LIST_INCOMPLETE = 3,
2011-10-31 19:26:47 +00:00
} PsycParseListRC;
2011-04-15 23:42:36 +00:00
/**
* Struct for keeping parser state.
*/
2011-10-13 22:29:32 +00:00
typedef struct {
2011-05-09 14:32:39 +00:00
size_t cursor; ///< Current position in buffer.
size_t startc; ///< Position where the parsing would be resumed.
2011-10-31 19:26:47 +00:00
PsycString buffer; ///< Buffer with data to be parsed.
uint8_t flags; ///< Flags for the parser, see PsycParseFlag.
PsycPart part; ///< Part of the packet being parsed currently.
2011-04-15 23:42:36 +00:00
2011-05-09 14:32:39 +00:00
size_t routingLength; ///< Length of routing part parsed so far.
size_t contentParsed; ///< Number of bytes parsed from the content so far.
size_t contentLength; ///< Expected length of the content.
2011-10-31 19:26:47 +00:00
PsycBool contentLengthFound; ///< Is there a length given for this packet?
2011-05-09 14:32:39 +00:00
size_t valueParsed; ///< Number of bytes parsed from the value so far.
size_t valueLength; ///< Expected length of the value.
2011-10-31 19:26:47 +00:00
PsycBool valueLengthFound; ///< Is there a length given for this modifier?
} PsycParseState;
2011-04-15 23:42:36 +00:00
/**
* Struct for keeping list parser state.
*/
2011-10-13 22:29:32 +00:00
typedef struct {
2011-05-09 14:32:39 +00:00
size_t cursor; ///< Current position in buffer.
size_t startc; ///< Line start position.
2011-10-31 19:26:47 +00:00
PsycString buffer; ///< Buffer with data to be parsed.
PsycListType type; ///< List type.
2011-04-19 17:41:25 +00:00
2011-05-09 14:32:39 +00:00
size_t elemParsed; ///< Number of bytes parsed from the elem so far.
size_t elemLength; ///< Expected length of the elem.
2011-10-31 19:26:47 +00:00
} PsycParseListState;
2011-04-19 17:41:25 +00:00
2011-04-19 19:54:44 +00:00
/**
2011-05-03 23:30:09 +00:00
* Initializes the state struct.
2011-04-17 12:47:25 +00:00
*
2011-05-03 23:30:09 +00:00
* @param state Pointer to the state struct that should be initialized.
2011-10-31 19:26:47 +00:00
* @param flags Flags to be set for the parser, see PsycParseFlag.
* @see PsycParseFlag
2011-04-19 17:41:25 +00:00
*/
2011-05-03 21:11:13 +00:00
static inline
2011-10-31 19:26:47 +00:00
void psyc_parse_state_init (PsycParseState *state, uint8_t flags)
{
2011-10-31 19:26:47 +00:00
memset(state, 0, sizeof(PsycParseState));
state->flags = flags;
if (flags & PSYC_PARSE_START_AT_CONTENT)
state->part = PSYC_PART_CONTENT;
}
2011-04-15 23:42:36 +00:00
/**
* Sets a new buffer in the parser state struct with data to be parsed.
*
* This function does NOT copy the buffer. It will parse whatever is
* at the memory pointed to by buffer.
*
* @param state Pointer to the initialized state of the parser
2011-10-31 19:04:16 +00:00
* @param buffer pointer to the data that should be parsed
* @param length length of the data in bytes
2011-10-31 19:26:47 +00:00
* @see PsycString
*/
2011-05-03 21:11:13 +00:00
static inline
2011-10-31 19:26:47 +00:00
void psyc_parse_buffer_set (PsycParseState *state, char *buffer, size_t length)
{
2011-10-31 19:26:47 +00:00
state->buffer = (PsycString) {length, buffer};
state->cursor = 0;
2011-10-13 22:29:32 +00:00
if (state->flags & PSYC_PARSE_START_AT_CONTENT) {
2011-10-31 19:04:16 +00:00
state->contentLength = length;
state->contentLengthFound = PSYC_TRUE;
}
}
2011-04-19 19:54:44 +00:00
/**
2011-05-03 23:30:09 +00:00
* Initializes the list state struct.
2011-04-19 17:41:25 +00:00
*
2011-05-03 23:30:09 +00:00
* @param state Pointer to the list state struct that should be initialized.
2011-04-19 17:41:25 +00:00
*/
2011-05-03 21:11:13 +00:00
static inline
2011-10-31 19:26:47 +00:00
void psyc_parse_list_state_init (PsycParseListState *state)
{
2011-10-31 19:26:47 +00:00
memset(state, 0, sizeof(PsycParseListState));
}
2011-04-19 17:41:25 +00:00
/**
* Sets a new buffer in the list parser state struct with data to be parsed.
*/
2011-05-03 21:11:13 +00:00
static inline
2011-10-31 19:26:47 +00:00
void psyc_parse_list_buffer_set (PsycParseListState *state, char *buffer, size_t length)
{
2011-10-31 19:26:47 +00:00
state->buffer = (PsycString) {length, buffer};
state->cursor = 0;
}
2011-04-19 17:41:25 +00:00
2011-05-03 21:11:13 +00:00
static inline
2011-10-31 19:26:47 +00:00
size_t psyc_parse_content_length (PsycParseState *state)
{
return state->contentLength;
}
static inline
2011-10-31 19:26:47 +00:00
PsycBool psyc_parse_content_length_found (PsycParseState *state)
{
return state->contentLengthFound;
}
static inline
2011-10-31 19:26:47 +00:00
size_t psyc_parse_value_length (PsycParseState *state)
{
return state->valueLength;
}
static inline
2011-10-31 19:26:47 +00:00
PsycBool psyc_parse_value_length_found (PsycParseState *state)
{
return state->valueLengthFound;
}
static inline
2011-10-31 19:26:47 +00:00
size_t psyc_parse_cursor (PsycParseState *state)
{
return state->cursor;
}
static inline
2011-10-31 19:26:47 +00:00
size_t psyc_parse_buffer_length (PsycParseState *state)
{
return state->buffer.length;
}
static inline
2011-10-31 19:26:47 +00:00
size_t psyc_parse_remaining_length (PsycParseState *state)
{
return state->buffer.length - state->cursor;
}
static inline
2011-10-31 19:26:47 +00:00
const char * psyc_parse_remaining_buffer (PsycParseState *state)
{
2011-11-01 11:06:58 +00:00
return state->buffer.data + state->cursor;
}
2011-04-18 08:09:35 +00:00
/**
* Parse PSYC packets.
*
* This function parses a full or partial PSYC packet while keeping parsing
* state in a state variable that you have to pass in every time, and returns
2011-10-31 19:26:47 +00:00
* whenever a modifier or the body is found. See PsycParseRC for the possible
* return codes. When it returns oper, name & value will point to the respective
* parts of the buffer, no memory allocation is done.
*
2011-10-31 19:26:47 +00:00
* @param state An initialized PsycParseState.
* @param oper In case of a modifier it will be set to the operator.
* @param name In case of a modifier it will point to the name,
* in case of the body it will point to the method.
* @param value In case of a modifier it will point to the value,
* in case of the body it will point to the data.
2011-04-19 20:31:43 +00:00
*/
#ifdef __INLINE_PSYC_PARSE
static inline
#endif
2011-10-31 19:26:47 +00:00
PsycParseRC psyc_parse (PsycParseState *state, char *oper,
PsycString *name, PsycString *value);
2011-04-19 17:41:25 +00:00
2011-04-19 20:31:43 +00:00
/**
* List parser.
*
* This function parses a _list modifier value and returns one element a time
* while keeping parsing state in a state variable that you have to pass in
* every time. When it returns elem will point to the next element in value, no
* memory allocation is done.
*
2011-10-31 19:26:47 +00:00
* @param state An initialized PsycParseListState.
* @param elem It will point to the next element in the list.
2011-04-19 20:31:43 +00:00
*/
#ifdef __INLINE_PSYC_PARSE
static inline
#endif
2011-10-31 19:26:47 +00:00
PsycParseListRC psyc_parse_list (PsycParseListState *state, PsycString *elem);
2011-04-19 19:55:22 +00:00
static inline
2011-10-31 19:26:47 +00:00
PsycBool psyc_parse_number (const char *value, size_t len, ssize_t *n)
{
size_t c = 0;
uint8_t neg = 0;
if (!value)
return PSYC_FALSE;
if (value[0] == '-')
neg = ++c;
*n = 0;
while (c < len && value[c] >= '0' && value[c] <= '9')
*n = 10 * *n + (value[c++] - '0');
if (c != len)
return PSYC_FALSE;
if (neg)
*n = 0 - *n;
return PSYC_TRUE;
}
static inline
2011-10-31 19:26:47 +00:00
PsycBool psyc_parse_time (const char *value, size_t len, time_t *t)
{
2011-10-31 19:04:16 +00:00
return psyc_parse_number(value, len, t);
}
static inline
2011-10-31 19:26:47 +00:00
PsycBool psyc_parse_date (const char *value, size_t len, time_t *t)
{
2011-10-31 19:04:16 +00:00
if (psyc_parse_number(value, len, t)) {
*t += PSYC_EPOCH;
return PSYC_TRUE;
}
return PSYC_FALSE;
}
2011-10-13 22:29:32 +00:00
/**
* Determines if the argument is a glyph.
* Glyphs are: : = + - ? !
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_glyph (uint8_t g)
2011-10-13 22:29:32 +00:00
{
switch(g) {
case ':':
case '=':
case '+':
case '-':
case '?':
case '!':
return 1;
default:
return 0;
}
}
/**
* Determines if the argument is numeric.
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_numeric (uint8_t c)
2011-10-13 22:29:32 +00:00
{
return c >= '0' && c <= '9';
}
/**
* Determines if the argument is alphabetic.
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_alpha (uint8_t c)
2011-10-13 22:29:32 +00:00
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
/**
* Determines if the argument is alphanumeric.
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_alpha_numeric (uint8_t c)
2011-10-13 22:29:32 +00:00
{
2011-10-31 19:04:16 +00:00
return psyc_is_alpha(c) || psyc_is_numeric(c);
2011-10-13 22:29:32 +00:00
}
/**
* Determines if the argument is a keyword character.
* Keyword characters are: alphanumeric and _
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_kw_char (uint8_t c)
2011-10-13 22:29:32 +00:00
{
2011-10-31 19:04:16 +00:00
return psyc_is_alpha_numeric(c) || c == '_';
2011-10-13 22:29:32 +00:00
}
/**
* Determines if the argument is a name character.
* Name characters are: see opaque_part in RFC 2396
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_name_char (uint8_t c)
2011-10-13 22:29:32 +00:00
{
2011-10-31 19:04:16 +00:00
return psyc_is_alpha(c) || (c >= '$' && c <= ';') ||
2011-10-13 22:29:32 +00:00
c == '_' || c == '!' || c == '?' || c == '=' || c == '@' || c == '~';
}
/**
* Determines if the argument is a hostname character.
* Hostname characters are: alphanumeric and -
*/
static inline
2011-10-31 19:04:16 +00:00
char psyc_is_host_char (uint8_t c)
2011-10-13 22:29:32 +00:00
{
2011-10-31 19:04:16 +00:00
return psyc_is_alpha_numeric(c) || c == '.' || c == '-';
2011-10-13 22:29:32 +00:00
}
2011-05-09 10:42:42 +00:00
/** @} */ // end of parse group
2011-05-09 07:02:15 +00:00
#define PSYC_PARSE_H
#endif