1
0
Fork 0
mirror of git://git.psyc.eu/libpsyc synced 2024-08-15 03:19:02 +00:00

uniform parser

This commit is contained in:
tg(x) 2011-10-14 00:29:32 +02:00
parent 1e3368e66a
commit 01358610fd
9 changed files with 387 additions and 77 deletions

1
.gitignore vendored
View file

@ -17,6 +17,7 @@ test/testStrlen
test/testText
test/isRoutingVar
test/getVarType
test/parseUniform
perl/*.c
perl/*.so

View file

@ -24,6 +24,8 @@
#define PSYC_C2STR(string) {sizeof(string)-1, string}
#define PSYC_C2ARG(string) string, sizeof(string)-1
#define PSYC_S2ARG(string) string.ptr, string.length
#define PSYC_S2ARG2(string) string.length, string.ptr
typedef enum
{

View file

@ -115,8 +115,7 @@
#include <string.h>
#include <psyc.h>
typedef enum
{
typedef enum {
/// Default Flag. Parse everything.
PSYC_PARSE_ALL = 0,
/// Parse only the header
@ -130,8 +129,7 @@ typedef enum
* The return value definitions for the packet parsing function.
* @see psyc_parse()
*/
typedef enum
{
typedef enum {
/// Error, packet is not ending with a valid delimiter.
PSYC_PARSE_ERROR_END = -8,
/// Error, expected NL after the method.
@ -197,8 +195,7 @@ typedef enum
* The return value definitions for the list parsing function.
* @see psyc_parseList()
*/
typedef enum
{
typedef enum {
PSYC_PARSE_LIST_ERROR_DELIM = -4,
PSYC_PARSE_LIST_ERROR_LEN = -3,
PSYC_PARSE_LIST_ERROR_TYPE = -2,
@ -214,8 +211,7 @@ typedef enum
/**
* Struct for keeping parser state.
*/
typedef struct
{
typedef struct {
size_t cursor; ///< Current position in buffer.
size_t startc; ///< Position where the parsing would be resumed.
psycString buffer; ///< Buffer with data to be parsed.
@ -234,8 +230,7 @@ typedef struct
/**
* Struct for keeping list parser state.
*/
typedef struct
{
typedef struct {
size_t cursor; ///< Current position in buffer.
size_t startc; ///< Line start position.
psycString buffer; ///< Buffer with data to be parsed.
@ -291,8 +286,7 @@ void psyc_setParseBuffer (psycParseState *state, psycString buffer)
state->buffer = buffer;
state->cursor = 0;
if (state->flags & PSYC_PARSE_START_AT_CONTENT)
{
if (state->flags & PSYC_PARSE_START_AT_CONTENT) {
state->contentLength = buffer.length;
state->contentLengthFound = PSYC_TRUE;
}
@ -489,6 +483,84 @@ psycBool psyc_parseDate (psycString *value, time_t *t)
return psyc_parseDate2(value->ptr, value->length, t);
}
/**
* Determines if the argument is a glyph.
* Glyphs are: : = + - ? !
*/
static inline
char psyc_isGlyph (uint8_t g)
{
switch(g) {
case ':':
case '=':
case '+':
case '-':
case '?':
case '!':
return 1;
default:
return 0;
}
}
/**
* Determines if the argument is numeric.
*/
static inline
char psyc_isNumeric (uint8_t c)
{
return c >= '0' && c <= '9';
}
/**
* Determines if the argument is alphabetic.
*/
static inline
char psyc_isAlpha (uint8_t c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
/**
* Determines if the argument is alphanumeric.
*/
static inline
char psyc_isAlphaNumeric (uint8_t c)
{
return psyc_isAlpha(c) || psyc_isNumeric(c);
}
/**
* Determines if the argument is a keyword character.
* Keyword characters are: alphanumeric and _
*/
static inline
char psyc_isKwChar (uint8_t c)
{
return psyc_isAlphaNumeric(c) || c == '_';
}
/**
* Determines if the argument is a name character.
* Name characters are: see opaque_part in RFC 2396
*/
static inline
char psyc_isNameChar (uint8_t c)
{
return psyc_isAlpha(c) || (c >= '$' && c <= ';') ||
c == '_' || c == '!' || c == '?' || c == '=' || c == '@' || c == '~';
}
/**
* Determines if the argument is a hostname character.
* Hostname characters are: alphanumeric and -
*/
static inline
char psyc_isHostChar (uint8_t c)
{
return psyc_isAlphaNumeric(c) || c == '.' || c == '-';
}
/** @} */ // end of parse group
#define PSYC_PARSE_H

69
include/psyc/uniform.h Normal file
View file

@ -0,0 +1,69 @@
#ifndef PSYC_UNIFORM_H
/**
* @file uniform.h
* @brief Uniform parsing.
*/
#include <psyc.h>
typedef enum {
// essential parts
PSYC_UNIFORM_SCHEME = 0,
PSYC_UNIFORM_USER = 1,
PSYC_UNIFORM_PASS = 2,
PSYC_UNIFORM_HOST = 3,
PSYC_UNIFORM_PORT = 4,
PSYC_UNIFORM_TRANSPORT = 5,
PSYC_UNIFORM_RESOURCE = 6,
PSYC_UNIFORM_QUERY = 7,
PSYC_UNIFORM_CHANNEL = 8,
// convenient snippets of the URL
PSYC_UNIFORM_FULL = 9, // the URL as such
PSYC_UNIFORM_BODY = 10, // the URL without scheme and '//'
PSYC_UNIFORM_USERATHOST = 11, // mailto and xmpp style
PSYC_UNIFORM_HOSTPORT = 12, // just host:port (and transport)
PSYC_UNIFORM_ROOT = 13, // root UNI of peer/server
PSYC_UNIFORM_SLASHES = 14, // the // if the protocol has them
PSYC_UNIFORM_NICK = 15, // whatever works as a nickname
PSYC_UNIFORM_SIZE = 16,
} psycUniformField;
typedef enum {
PSYC_PARSE_UNIFORM_INVALID_SLASHES = -7,
PSYC_PARSE_UNIFORM_INVALID_CHANNEL = -6,
PSYC_PARSE_UNIFORM_INVALID_RESOURCE = -5,
PSYC_PARSE_UNIFORM_INVALID_TRANSPORT = -4,
PSYC_PARSE_UNIFORM_INVALID_PORT = -3,
PSYC_PARSE_UNIFORM_INVALID_HOST = -2,
PSYC_PARSE_UNIFORM_INVALID_SCHEME = -1,
} psycParseUniformRC;
typedef enum {
PSYC_SCHEME_PSYC = 0,
PSYC_SCHEME_IRC = 1,
PSYC_SCHEME_XMPP = 2,
PSYC_SCHEME_SIP = 3,
} psycScheme;
typedef enum {
PSYC_TRANSPORT_TCP = 'c',
PSYC_TRANSPORT_UDP = 'd',
PSYC_TRANSPORT_TLS = 's',
PSYC_TRANSPORT_GNUNET = 'g',
} psycTransport;
typedef enum {
PSYC_ENTITY_PERSON = '~',
PSYC_ENTITY_PLACE = '@',
PSYC_ENTITY_SERVICE = '$',
} psycEntityType;
typedef psycString psycUniform[PSYC_UNIFORM_SIZE];
int psyc_parseUniform2(psycUniform *uni, const char *str, size_t length);
int psyc_parseUniform(psycUniform *uni, psycString *str);
#define PSYC_UNIFORM_H
#endif

View file

@ -3,8 +3,8 @@ DEBUG = 2
CFLAGS = -I../include -Wall -std=c99 -fPIC ${OPT}
DIET = diet
S = packet.c parse.c match.c render.c memmem.c itoa.c variable.c text.c
O = packet.o parse.o match.o render.o memmem.o itoa.o variable.o text.o
S = packet.c parse.c match.c render.c memmem.c itoa.c variable.c text.c uniform.c
O = packet.o parse.o match.o render.o memmem.o itoa.o variable.o text.o uniform.o
P = match itoa
A = ../lib/libpsyc.a

View file

@ -23,58 +23,6 @@ typedef enum {
PARSE_INCOMPLETE = 101,
} parseRC;
/**
* Determines if the argument is a glyph.
* Glyphs are: : = + - ? !
*/
static inline
char isGlyph (uint8_t g)
{
switch(g)
{
case ':':
case '=':
case '+':
case '-':
case '?':
case '!':
return 1;
default:
return 0;
}
}
/**
* Determines if the argument is numeric.
*/
static inline
char isNumeric (uint8_t c)
{
return c >= '0' && c <= '9';
}
/**
* Determines if the argument is alphanumeric.
*/
static inline
char isAlphaNumeric (uint8_t c)
{
return
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
isNumeric(c);
}
/**
* Determines if the argument is a keyword character.
* Keyword characters are: alphanumeric and _
*/
static inline
char isKwChar (uint8_t c)
{
return isAlphaNumeric(c) || c == '_';
}
/**
* Parse variable name or method name.
* It should contain one or more keyword characters.
@ -86,7 +34,7 @@ parseRC psyc_parseKeyword (psycParseState *state, psycString *name)
name->ptr = state->buffer.ptr + state->cursor;
name->length = 0;
while (isKwChar(state->buffer.ptr[state->cursor]))
while (psyc_isKwChar(state->buffer.ptr[state->cursor]))
{
name->length++; // was a valid char, increase length
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
@ -157,7 +105,7 @@ parseRC psyc_parseModifier (psycParseState *state, char *oper,
{ // After SP the length follows.
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
if (isNumeric(state->buffer.ptr[state->cursor]))
if (psyc_isNumeric(state->buffer.ptr[state->cursor]))
{
state->valueLengthFound = 1;
do
@ -165,7 +113,7 @@ parseRC psyc_parseModifier (psycParseState *state, char *oper,
length = 10 * length + state->buffer.ptr[state->cursor] - '0';
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
while (isNumeric(state->buffer.ptr[state->cursor]));
while (psyc_isNumeric(state->buffer.ptr[state->cursor]));
state->valueLength = length;
}
else
@ -249,7 +197,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper,
// Each line of the header starts with a glyph,
// i.e. :_name, -_name +_name etc,
// so just test if the first char is a glyph.
if (isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph?
if (psyc_isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph?
{ // it is a glyph, so a variable starts here
ret = psyc_parseModifier(state, oper, name, value);
state->routingLength += state->cursor - pos;
@ -264,7 +212,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper,
case PSYC_PART_LENGTH:
// End of header, content starts with an optional length then a NL
if (isNumeric(state->buffer.ptr[state->cursor]))
if (psyc_isNumeric(state->buffer.ptr[state->cursor]))
{
state->contentLengthFound = 1;
state->contentLength = 0;
@ -274,7 +222,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper,
state->contentLength = 10 * state->contentLength + state->buffer.ptr[state->cursor] - '0';
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
}
while (isNumeric(state->buffer.ptr[state->cursor]));
while (psyc_isNumeric(state->buffer.ptr[state->cursor]));
}
if (state->buffer.ptr[state->cursor] == '\n') // start of content
@ -332,7 +280,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper,
// So just test if the first char is a glyph.
// In the body, the same applies, only that the
// method does not start with a glyph.
if (isGlyph(state->buffer.ptr[state->cursor]))
if (psyc_isGlyph(state->buffer.ptr[state->cursor]))
{
ret = psyc_parseModifier(state, oper, name, value);
state->contentParsed += state->cursor - pos;
@ -504,7 +452,7 @@ psycParseListRC psyc_parseList (psycParseListState *state, psycString *elem)
state->type = PSYC_LIST_TEXT;
state->cursor++;
}
else if (isNumeric(state->buffer.ptr[state->cursor]))
else if (psyc_isNumeric(state->buffer.ptr[state->cursor]))
state->type = PSYC_LIST_BINARY;
else
return PSYC_PARSE_LIST_ERROR_TYPE;
@ -532,14 +480,14 @@ psycParseListRC psyc_parseList (psycParseListState *state, psycString *elem)
if (!(state->elemParsed < state->elemLength))
{
// Element starts with a number.
if (isNumeric(state->buffer.ptr[state->cursor]))
if (psyc_isNumeric(state->buffer.ptr[state->cursor]))
{
do
{
state->elemLength = 10 * state->elemLength + state->buffer.ptr[state->cursor] - '0';
ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_LIST_INCOMPLETE);
}
while (isNumeric(state->buffer.ptr[state->cursor]));
while (psyc_isNumeric(state->buffer.ptr[state->cursor]));
}
else
return PSYC_PARSE_LIST_ERROR_LEN;

166
src/uniform.c Normal file
View file

@ -0,0 +1,166 @@
#include <ctype.h>
#include "lib.h"
#include "psyc/uniform.h"
#include "psyc/parse.h"
int psyc_parseUniform2 (psycUniform *uni, const char *str, size_t length)
{
char c;
psycString *p;
size_t pos = 0, part = PSYC_UNIFORM_SCHEME, scheme;
(*uni)[PSYC_UNIFORM_FULL].ptr = str;
(*uni)[PSYC_UNIFORM_FULL].length = length;
while (pos < length) {
c = str[pos];
if (c == ':') {
(*uni)[part].ptr = str;
(*uni)[part].length = pos++;
break;
} else if (!psyc_isHostChar(c))
return PSYC_PARSE_UNIFORM_INVALID_SCHEME;
pos++;
}
p = &(*uni)[PSYC_UNIFORM_SCHEME];
if (p->length == 4 &&
tolower(p->ptr[0]) == 'p' &&
tolower(p->ptr[1]) == 's' &&
tolower(p->ptr[2]) == 'y' &&
tolower(p->ptr[3]) == 'c') {
scheme = PSYC_SCHEME_PSYC;
part = PSYC_UNIFORM_SLASHES;
(*uni)[part].ptr = str + pos;
(*uni)[part].length = 0;
while (pos < length) {
c = str[pos];
switch (part) {
case PSYC_UNIFORM_SLASHES:
if (c == '/')
(*uni)[part].length++;
else return PSYC_PARSE_UNIFORM_INVALID_SLASHES;
if ((*uni)[part].length == 2) {
part = PSYC_UNIFORM_HOST;
(*uni)[part].ptr = str + pos + 1;
(*uni)[part].length = 0;
}
break;
case PSYC_UNIFORM_HOST:
if (psyc_isHostChar(c)) {
(*uni)[part].length++;
break;
}
if ((*uni)[part].length == 0)
return PSYC_PARSE_UNIFORM_INVALID_HOST;
if (c == ':')
part = PSYC_UNIFORM_PORT;
else if (c == '/')
part = PSYC_UNIFORM_RESOURCE;
else return PSYC_PARSE_UNIFORM_INVALID_HOST;
(*uni)[part].ptr = str + pos + 1;
(*uni)[part].length = 0;
break;
case PSYC_UNIFORM_PORT:
if (psyc_isNumeric(c)) {
(*uni)[part].length++;
break;
}
if ((*uni)[part].length == 0 && c != PSYC_TRANSPORT_GNUNET)
return PSYC_PARSE_UNIFORM_INVALID_PORT;
if (c == '/') {
part = PSYC_UNIFORM_RESOURCE;
(*uni)[part].ptr = str + pos + 1;
(*uni)[part].length = 0;
break;
}
else {
part = PSYC_UNIFORM_TRANSPORT;
(*uni)[part].ptr = str + pos;
(*uni)[part].length = 0;
}
// fall thru
case PSYC_UNIFORM_TRANSPORT:
switch (c) {
case PSYC_TRANSPORT_GNUNET:
if ((*uni)[PSYC_UNIFORM_PORT].length > 0)
return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT;
case PSYC_TRANSPORT_TCP:
case PSYC_TRANSPORT_UDP:
case PSYC_TRANSPORT_TLS:
if ((*uni)[part].length > 0)
return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT;
(*uni)[part].length++;
break;
case '/':
part = PSYC_UNIFORM_RESOURCE;
(*uni)[part].ptr = str + pos + 1;
(*uni)[part].length = 0;
break;
default:
return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT;
}
break;
case PSYC_UNIFORM_RESOURCE:
if (psyc_isNameChar(c)) {
(*uni)[part].length++;
break;
} else if (c == '#') {
part = PSYC_UNIFORM_CHANNEL;
(*uni)[part].ptr = str + pos + 1;
(*uni)[part].length = 0;
break;
} else return PSYC_PARSE_UNIFORM_INVALID_RESOURCE;
case PSYC_UNIFORM_CHANNEL:
if (psyc_isNameChar(c)) {
(*uni)[part].length++;
break;
} else return PSYC_PARSE_UNIFORM_INVALID_CHANNEL;
}
pos++;
}
if ((*uni)[PSYC_UNIFORM_HOST].length == 0)
return PSYC_PARSE_UNIFORM_INVALID_HOST;
(*uni)[PSYC_UNIFORM_ROOT].ptr = str;
(*uni)[PSYC_UNIFORM_ROOT].length = (*uni)[PSYC_UNIFORM_SCHEME].length + 1 +
(*uni)[PSYC_UNIFORM_SLASHES].length + (*uni)[PSYC_UNIFORM_HOST].length +
(*uni)[PSYC_UNIFORM_PORT].length + (*uni)[PSYC_UNIFORM_TRANSPORT].length;
if ((*uni)[PSYC_UNIFORM_PORT].length > 0 ||
(*uni)[PSYC_UNIFORM_TRANSPORT].length > 0)
(*uni)[PSYC_UNIFORM_ROOT].length++; // :
(*uni)[PSYC_UNIFORM_BODY].ptr = (*uni)[PSYC_UNIFORM_HOST].ptr;
(*uni)[PSYC_UNIFORM_BODY].length = length - (*uni)[PSYC_UNIFORM_SCHEME].length - 1 -
(*uni)[PSYC_UNIFORM_SLASHES].length;
if ((*uni)[PSYC_UNIFORM_RESOURCE].length) {
(*uni)[PSYC_UNIFORM_NICK].ptr = (*uni)[PSYC_UNIFORM_RESOURCE].ptr + 1;
(*uni)[PSYC_UNIFORM_NICK].length = (*uni)[PSYC_UNIFORM_RESOURCE].length;
}
} else return PSYC_PARSE_UNIFORM_INVALID_SCHEME;
if ((*uni)[PSYC_UNIFORM_HOST].length == 0)
return PSYC_PARSE_UNIFORM_INVALID_HOST;
return scheme;
}
int psyc_parseUniform (psycUniform *uni, psycString *str)
{
return psyc_parseUniform2(uni, str->ptr, str->length);
}

View file

@ -3,7 +3,7 @@ DEBUG = 2
CFLAGS = -I../include -I../src -Wall -std=c99 ${OPT}
LDFLAGS = -L../lib
LOADLIBES = -lpsyc -lm
TARGETS = testPsyc testPsycSpeed testParser testMatch testRender testText isRoutingVar getVarType
TARGETS = testPsyc testPsycSpeed testParser testMatch testRender testText isRoutingVar getVarType parseUniform
O = test.o
WRAPPER =
DIET = diet
@ -47,6 +47,7 @@ test: ${TARGETS}
./testText
./isRoutingVar
./getVarType
./parseUniform
x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x
x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x

51
test/parseUniform.c Normal file
View file

@ -0,0 +1,51 @@
#include <psyc/uniform.h>
#include <stdlib.h>
#include <stdio.h>
#include <lib.h>
void
testUniform(char *str, int ret) {
psycUniform *uni = malloc(sizeof(psycUniform));
memset(uni, 0, sizeof(psycUniform));
printf("%s\n", str);
int r = psyc_parseUniform2(uni, str, strlen(str));
PP(("[%.*s] : [%.*s] [%.*s] : [%.*s] [%.*s] / [%.*s] # [%.*s]\n[%.*s] [%.*s]\n[%.*s]\n\n",
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_SCHEME]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_SLASHES]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_HOST]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_PORT]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_TRANSPORT]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_RESOURCE]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_CHANNEL]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_ROOT]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_NICK]),
(int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_BODY])));
free(uni);
if (r != ret) {
fprintf(stderr, "ERROR: psyc_parseUniform returned %d instead of %d\n", r, ret);
exit(1);
}
}
int main() {
testUniform("psyc://foo.tld:4404d/@bar#baz", PSYC_SCHEME_PSYC);
testUniform("psyc://foo:4405/~bar", PSYC_SCHEME_PSYC);
testUniform("psyc://foo:1234", PSYC_SCHEME_PSYC);
testUniform("psyc://foo:1234d", PSYC_SCHEME_PSYC);
testUniform("psyc://foo/", PSYC_SCHEME_PSYC);
testUniform("psyc://foo", PSYC_SCHEME_PSYC);
testUniform("psyc://1234567890abcdef:g/~foo", PSYC_SCHEME_PSYC);
testUniform("xmpp:user@host", PSYC_PARSE_UNIFORM_INVALID_SCHEME);
testUniform("psyc:host", PSYC_PARSE_UNIFORM_INVALID_SLASHES);
testUniform("psyc://", PSYC_PARSE_UNIFORM_INVALID_HOST);
testUniform("psyc://:123/", PSYC_PARSE_UNIFORM_INVALID_HOST);
testUniform("psyc://host:/~foo", PSYC_PARSE_UNIFORM_INVALID_PORT);
testUniform("psyc://host:d/~foo", PSYC_PARSE_UNIFORM_INVALID_PORT);
testUniform("psyc://1234567890abcdef:1g/~foo", PSYC_PARSE_UNIFORM_INVALID_TRANSPORT);
printf("SUCCESS: psyc_parseUniform passed all tests.\n");
return 0;
}