diff --git a/.gitignore b/.gitignore index 5f4516d..a193596 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ test/testStrlen test/testText test/isRoutingVar test/getVarType +test/parseUniform perl/*.c perl/*.so diff --git a/include/psyc.h b/include/psyc.h index 769ea8b..416f9eb 100644 --- a/include/psyc.h +++ b/include/psyc.h @@ -24,6 +24,8 @@ #define PSYC_C2STR(string) {sizeof(string)-1, string} #define PSYC_C2ARG(string) string, sizeof(string)-1 +#define PSYC_S2ARG(string) string.ptr, string.length +#define PSYC_S2ARG2(string) string.length, string.ptr typedef enum { diff --git a/include/psyc/parse.h b/include/psyc/parse.h index d8289e0..d0b22d5 100644 --- a/include/psyc/parse.h +++ b/include/psyc/parse.h @@ -115,8 +115,7 @@ #include #include -typedef enum -{ +typedef enum { /// Default Flag. Parse everything. PSYC_PARSE_ALL = 0, /// Parse only the header @@ -130,8 +129,7 @@ typedef enum * The return value definitions for the packet parsing function. * @see psyc_parse() */ -typedef enum -{ +typedef enum { /// Error, packet is not ending with a valid delimiter. PSYC_PARSE_ERROR_END = -8, /// Error, expected NL after the method. @@ -197,8 +195,7 @@ typedef enum * The return value definitions for the list parsing function. * @see psyc_parseList() */ -typedef enum -{ +typedef enum { PSYC_PARSE_LIST_ERROR_DELIM = -4, PSYC_PARSE_LIST_ERROR_LEN = -3, PSYC_PARSE_LIST_ERROR_TYPE = -2, @@ -214,8 +211,7 @@ typedef enum /** * Struct for keeping parser state. */ -typedef struct -{ +typedef struct { size_t cursor; ///< Current position in buffer. size_t startc; ///< Position where the parsing would be resumed. psycString buffer; ///< Buffer with data to be parsed. @@ -234,8 +230,7 @@ typedef struct /** * Struct for keeping list parser state. */ -typedef struct -{ +typedef struct { size_t cursor; ///< Current position in buffer. size_t startc; ///< Line start position. psycString buffer; ///< Buffer with data to be parsed. @@ -291,8 +286,7 @@ void psyc_setParseBuffer (psycParseState *state, psycString buffer) state->buffer = buffer; state->cursor = 0; - if (state->flags & PSYC_PARSE_START_AT_CONTENT) - { + if (state->flags & PSYC_PARSE_START_AT_CONTENT) { state->contentLength = buffer.length; state->contentLengthFound = PSYC_TRUE; } @@ -489,6 +483,84 @@ psycBool psyc_parseDate (psycString *value, time_t *t) return psyc_parseDate2(value->ptr, value->length, t); } +/** + * Determines if the argument is a glyph. + * Glyphs are: : = + - ? ! + */ +static inline +char psyc_isGlyph (uint8_t g) +{ + switch(g) { + case ':': + case '=': + case '+': + case '-': + case '?': + case '!': + return 1; + default: + return 0; + } +} + +/** + * Determines if the argument is numeric. + */ +static inline +char psyc_isNumeric (uint8_t c) +{ + return c >= '0' && c <= '9'; +} + +/** + * Determines if the argument is alphabetic. + */ +static inline +char psyc_isAlpha (uint8_t c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +/** + * Determines if the argument is alphanumeric. + */ +static inline +char psyc_isAlphaNumeric (uint8_t c) +{ + return psyc_isAlpha(c) || psyc_isNumeric(c); +} + +/** + * Determines if the argument is a keyword character. + * Keyword characters are: alphanumeric and _ + */ +static inline +char psyc_isKwChar (uint8_t c) +{ + return psyc_isAlphaNumeric(c) || c == '_'; +} + +/** + * Determines if the argument is a name character. + * Name characters are: see opaque_part in RFC 2396 + */ +static inline +char psyc_isNameChar (uint8_t c) +{ + return psyc_isAlpha(c) || (c >= '$' && c <= ';') || + c == '_' || c == '!' || c == '?' || c == '=' || c == '@' || c == '~'; +} + +/** + * Determines if the argument is a hostname character. + * Hostname characters are: alphanumeric and - + */ +static inline +char psyc_isHostChar (uint8_t c) +{ + return psyc_isAlphaNumeric(c) || c == '.' || c == '-'; +} + /** @} */ // end of parse group #define PSYC_PARSE_H diff --git a/include/psyc/uniform.h b/include/psyc/uniform.h new file mode 100644 index 0000000..9f338f8 --- /dev/null +++ b/include/psyc/uniform.h @@ -0,0 +1,69 @@ +#ifndef PSYC_UNIFORM_H +/** + * @file uniform.h + * @brief Uniform parsing. + */ + +#include + +typedef enum { +// essential parts + PSYC_UNIFORM_SCHEME = 0, + PSYC_UNIFORM_USER = 1, + PSYC_UNIFORM_PASS = 2, + PSYC_UNIFORM_HOST = 3, + PSYC_UNIFORM_PORT = 4, + PSYC_UNIFORM_TRANSPORT = 5, + PSYC_UNIFORM_RESOURCE = 6, + PSYC_UNIFORM_QUERY = 7, + PSYC_UNIFORM_CHANNEL = 8, + +// convenient snippets of the URL + PSYC_UNIFORM_FULL = 9, // the URL as such + PSYC_UNIFORM_BODY = 10, // the URL without scheme and '//' + PSYC_UNIFORM_USERATHOST = 11, // mailto and xmpp style + PSYC_UNIFORM_HOSTPORT = 12, // just host:port (and transport) + PSYC_UNIFORM_ROOT = 13, // root UNI of peer/server + PSYC_UNIFORM_SLASHES = 14, // the // if the protocol has them + PSYC_UNIFORM_NICK = 15, // whatever works as a nickname + PSYC_UNIFORM_SIZE = 16, +} psycUniformField; + +typedef enum { + PSYC_PARSE_UNIFORM_INVALID_SLASHES = -7, + PSYC_PARSE_UNIFORM_INVALID_CHANNEL = -6, + PSYC_PARSE_UNIFORM_INVALID_RESOURCE = -5, + PSYC_PARSE_UNIFORM_INVALID_TRANSPORT = -4, + PSYC_PARSE_UNIFORM_INVALID_PORT = -3, + PSYC_PARSE_UNIFORM_INVALID_HOST = -2, + PSYC_PARSE_UNIFORM_INVALID_SCHEME = -1, +} psycParseUniformRC; + +typedef enum { + PSYC_SCHEME_PSYC = 0, + PSYC_SCHEME_IRC = 1, + PSYC_SCHEME_XMPP = 2, + PSYC_SCHEME_SIP = 3, +} psycScheme; + +typedef enum { + PSYC_TRANSPORT_TCP = 'c', + PSYC_TRANSPORT_UDP = 'd', + PSYC_TRANSPORT_TLS = 's', + PSYC_TRANSPORT_GNUNET = 'g', +} psycTransport; + +typedef enum { + PSYC_ENTITY_PERSON = '~', + PSYC_ENTITY_PLACE = '@', + PSYC_ENTITY_SERVICE = '$', +} psycEntityType; + +typedef psycString psycUniform[PSYC_UNIFORM_SIZE]; + +int psyc_parseUniform2(psycUniform *uni, const char *str, size_t length); + +int psyc_parseUniform(psycUniform *uni, psycString *str); + +#define PSYC_UNIFORM_H +#endif diff --git a/src/Makefile b/src/Makefile index da0452d..92d8218 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,8 +3,8 @@ DEBUG = 2 CFLAGS = -I../include -Wall -std=c99 -fPIC ${OPT} DIET = diet -S = packet.c parse.c match.c render.c memmem.c itoa.c variable.c text.c -O = packet.o parse.o match.o render.o memmem.o itoa.o variable.o text.o +S = packet.c parse.c match.c render.c memmem.c itoa.c variable.c text.c uniform.c +O = packet.o parse.o match.o render.o memmem.o itoa.o variable.o text.o uniform.o P = match itoa A = ../lib/libpsyc.a diff --git a/src/parse.c b/src/parse.c index 4248c7c..eaa85ad 100644 --- a/src/parse.c +++ b/src/parse.c @@ -23,58 +23,6 @@ typedef enum { PARSE_INCOMPLETE = 101, } parseRC; -/** - * Determines if the argument is a glyph. - * Glyphs are: : = + - ? ! - */ -static inline -char isGlyph (uint8_t g) -{ - switch(g) - { - case ':': - case '=': - case '+': - case '-': - case '?': - case '!': - return 1; - default: - return 0; - } -} - -/** - * Determines if the argument is numeric. - */ -static inline -char isNumeric (uint8_t c) -{ - return c >= '0' && c <= '9'; -} - -/** - * Determines if the argument is alphanumeric. - */ -static inline -char isAlphaNumeric (uint8_t c) -{ - return - (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - isNumeric(c); -} - -/** - * Determines if the argument is a keyword character. - * Keyword characters are: alphanumeric and _ - */ -static inline -char isKwChar (uint8_t c) -{ - return isAlphaNumeric(c) || c == '_'; -} - /** * Parse variable name or method name. * It should contain one or more keyword characters. @@ -86,7 +34,7 @@ parseRC psyc_parseKeyword (psycParseState *state, psycString *name) name->ptr = state->buffer.ptr + state->cursor; name->length = 0; - while (isKwChar(state->buffer.ptr[state->cursor])) + while (psyc_isKwChar(state->buffer.ptr[state->cursor])) { name->length++; // was a valid char, increase length ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT); @@ -157,7 +105,7 @@ parseRC psyc_parseModifier (psycParseState *state, char *oper, { // After SP the length follows. ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT); - if (isNumeric(state->buffer.ptr[state->cursor])) + if (psyc_isNumeric(state->buffer.ptr[state->cursor])) { state->valueLengthFound = 1; do @@ -165,7 +113,7 @@ parseRC psyc_parseModifier (psycParseState *state, char *oper, length = 10 * length + state->buffer.ptr[state->cursor] - '0'; ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT); } - while (isNumeric(state->buffer.ptr[state->cursor])); + while (psyc_isNumeric(state->buffer.ptr[state->cursor])); state->valueLength = length; } else @@ -249,7 +197,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper, // Each line of the header starts with a glyph, // i.e. :_name, -_name +_name etc, // so just test if the first char is a glyph. - if (isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph? + if (psyc_isGlyph(state->buffer.ptr[state->cursor])) // is the first char a glyph? { // it is a glyph, so a variable starts here ret = psyc_parseModifier(state, oper, name, value); state->routingLength += state->cursor - pos; @@ -264,7 +212,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper, case PSYC_PART_LENGTH: // End of header, content starts with an optional length then a NL - if (isNumeric(state->buffer.ptr[state->cursor])) + if (psyc_isNumeric(state->buffer.ptr[state->cursor])) { state->contentLengthFound = 1; state->contentLength = 0; @@ -274,7 +222,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper, state->contentLength = 10 * state->contentLength + state->buffer.ptr[state->cursor] - '0'; ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT); } - while (isNumeric(state->buffer.ptr[state->cursor])); + while (psyc_isNumeric(state->buffer.ptr[state->cursor])); } if (state->buffer.ptr[state->cursor] == '\n') // start of content @@ -332,7 +280,7 @@ psycParseRC psyc_parse (psycParseState *state, char *oper, // So just test if the first char is a glyph. // In the body, the same applies, only that the // method does not start with a glyph. - if (isGlyph(state->buffer.ptr[state->cursor])) + if (psyc_isGlyph(state->buffer.ptr[state->cursor])) { ret = psyc_parseModifier(state, oper, name, value); state->contentParsed += state->cursor - pos; @@ -504,7 +452,7 @@ psycParseListRC psyc_parseList (psycParseListState *state, psycString *elem) state->type = PSYC_LIST_TEXT; state->cursor++; } - else if (isNumeric(state->buffer.ptr[state->cursor])) + else if (psyc_isNumeric(state->buffer.ptr[state->cursor])) state->type = PSYC_LIST_BINARY; else return PSYC_PARSE_LIST_ERROR_TYPE; @@ -532,14 +480,14 @@ psycParseListRC psyc_parseList (psycParseListState *state, psycString *elem) if (!(state->elemParsed < state->elemLength)) { // Element starts with a number. - if (isNumeric(state->buffer.ptr[state->cursor])) + if (psyc_isNumeric(state->buffer.ptr[state->cursor])) { do { state->elemLength = 10 * state->elemLength + state->buffer.ptr[state->cursor] - '0'; ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_LIST_INCOMPLETE); } - while (isNumeric(state->buffer.ptr[state->cursor])); + while (psyc_isNumeric(state->buffer.ptr[state->cursor])); } else return PSYC_PARSE_LIST_ERROR_LEN; diff --git a/src/uniform.c b/src/uniform.c new file mode 100644 index 0000000..ac61742 --- /dev/null +++ b/src/uniform.c @@ -0,0 +1,166 @@ +#include +#include "lib.h" +#include "psyc/uniform.h" +#include "psyc/parse.h" + +int psyc_parseUniform2 (psycUniform *uni, const char *str, size_t length) +{ + char c; + psycString *p; + size_t pos = 0, part = PSYC_UNIFORM_SCHEME, scheme; + + (*uni)[PSYC_UNIFORM_FULL].ptr = str; + (*uni)[PSYC_UNIFORM_FULL].length = length; + + while (pos < length) { + c = str[pos]; + if (c == ':') { + (*uni)[part].ptr = str; + (*uni)[part].length = pos++; + break; + } else if (!psyc_isHostChar(c)) + return PSYC_PARSE_UNIFORM_INVALID_SCHEME; + pos++; + } + + p = &(*uni)[PSYC_UNIFORM_SCHEME]; + if (p->length == 4 && + tolower(p->ptr[0]) == 'p' && + tolower(p->ptr[1]) == 's' && + tolower(p->ptr[2]) == 'y' && + tolower(p->ptr[3]) == 'c') { + scheme = PSYC_SCHEME_PSYC; + part = PSYC_UNIFORM_SLASHES; + (*uni)[part].ptr = str + pos; + (*uni)[part].length = 0; + + while (pos < length) { + c = str[pos]; + switch (part) { + case PSYC_UNIFORM_SLASHES: + if (c == '/') + (*uni)[part].length++; + else return PSYC_PARSE_UNIFORM_INVALID_SLASHES; + + if ((*uni)[part].length == 2) { + part = PSYC_UNIFORM_HOST; + (*uni)[part].ptr = str + pos + 1; + (*uni)[part].length = 0; + } + break; + + case PSYC_UNIFORM_HOST: + if (psyc_isHostChar(c)) { + (*uni)[part].length++; + break; + } + + if ((*uni)[part].length == 0) + return PSYC_PARSE_UNIFORM_INVALID_HOST; + + if (c == ':') + part = PSYC_UNIFORM_PORT; + else if (c == '/') + part = PSYC_UNIFORM_RESOURCE; + else return PSYC_PARSE_UNIFORM_INVALID_HOST; + + (*uni)[part].ptr = str + pos + 1; + (*uni)[part].length = 0; + break; + + case PSYC_UNIFORM_PORT: + if (psyc_isNumeric(c)) { + (*uni)[part].length++; + break; + } + + if ((*uni)[part].length == 0 && c != PSYC_TRANSPORT_GNUNET) + return PSYC_PARSE_UNIFORM_INVALID_PORT; + + if (c == '/') { + part = PSYC_UNIFORM_RESOURCE; + (*uni)[part].ptr = str + pos + 1; + (*uni)[part].length = 0; + break; + } + else { + part = PSYC_UNIFORM_TRANSPORT; + (*uni)[part].ptr = str + pos; + (*uni)[part].length = 0; + } + // fall thru + + case PSYC_UNIFORM_TRANSPORT: + switch (c) { + case PSYC_TRANSPORT_GNUNET: + if ((*uni)[PSYC_UNIFORM_PORT].length > 0) + return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT; + case PSYC_TRANSPORT_TCP: + case PSYC_TRANSPORT_UDP: + case PSYC_TRANSPORT_TLS: + if ((*uni)[part].length > 0) + return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT; + (*uni)[part].length++; + break; + case '/': + part = PSYC_UNIFORM_RESOURCE; + (*uni)[part].ptr = str + pos + 1; + (*uni)[part].length = 0; + break; + default: + return PSYC_PARSE_UNIFORM_INVALID_TRANSPORT; + } + break; + + case PSYC_UNIFORM_RESOURCE: + if (psyc_isNameChar(c)) { + (*uni)[part].length++; + break; + } else if (c == '#') { + part = PSYC_UNIFORM_CHANNEL; + (*uni)[part].ptr = str + pos + 1; + (*uni)[part].length = 0; + break; + } else return PSYC_PARSE_UNIFORM_INVALID_RESOURCE; + + case PSYC_UNIFORM_CHANNEL: + if (psyc_isNameChar(c)) { + (*uni)[part].length++; + break; + } else return PSYC_PARSE_UNIFORM_INVALID_CHANNEL; + } + pos++; + } + + if ((*uni)[PSYC_UNIFORM_HOST].length == 0) + return PSYC_PARSE_UNIFORM_INVALID_HOST; + + (*uni)[PSYC_UNIFORM_ROOT].ptr = str; + (*uni)[PSYC_UNIFORM_ROOT].length = (*uni)[PSYC_UNIFORM_SCHEME].length + 1 + + (*uni)[PSYC_UNIFORM_SLASHES].length + (*uni)[PSYC_UNIFORM_HOST].length + + (*uni)[PSYC_UNIFORM_PORT].length + (*uni)[PSYC_UNIFORM_TRANSPORT].length; + + if ((*uni)[PSYC_UNIFORM_PORT].length > 0 || + (*uni)[PSYC_UNIFORM_TRANSPORT].length > 0) + (*uni)[PSYC_UNIFORM_ROOT].length++; // : + + (*uni)[PSYC_UNIFORM_BODY].ptr = (*uni)[PSYC_UNIFORM_HOST].ptr; + (*uni)[PSYC_UNIFORM_BODY].length = length - (*uni)[PSYC_UNIFORM_SCHEME].length - 1 - + (*uni)[PSYC_UNIFORM_SLASHES].length; + + if ((*uni)[PSYC_UNIFORM_RESOURCE].length) { + (*uni)[PSYC_UNIFORM_NICK].ptr = (*uni)[PSYC_UNIFORM_RESOURCE].ptr + 1; + (*uni)[PSYC_UNIFORM_NICK].length = (*uni)[PSYC_UNIFORM_RESOURCE].length; + } + } else return PSYC_PARSE_UNIFORM_INVALID_SCHEME; + + if ((*uni)[PSYC_UNIFORM_HOST].length == 0) + return PSYC_PARSE_UNIFORM_INVALID_HOST; + + return scheme; +} + +int psyc_parseUniform (psycUniform *uni, psycString *str) +{ + return psyc_parseUniform2(uni, str->ptr, str->length); +} diff --git a/test/Makefile b/test/Makefile index b41e6bb..ed0de0b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -3,7 +3,7 @@ DEBUG = 2 CFLAGS = -I../include -I../src -Wall -std=c99 ${OPT} LDFLAGS = -L../lib LOADLIBES = -lpsyc -lm -TARGETS = testPsyc testPsycSpeed testParser testMatch testRender testText isRoutingVar getVarType +TARGETS = testPsyc testPsycSpeed testParser testMatch testRender testText isRoutingVar getVarType parseUniform O = test.o WRAPPER = DIET = diet @@ -47,6 +47,7 @@ test: ${TARGETS} ./testText ./isRoutingVar ./getVarType + ./parseUniform x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x diff --git a/test/parseUniform.c b/test/parseUniform.c new file mode 100644 index 0000000..384dd5a --- /dev/null +++ b/test/parseUniform.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include + +void +testUniform(char *str, int ret) { + psycUniform *uni = malloc(sizeof(psycUniform)); + memset(uni, 0, sizeof(psycUniform)); + printf("%s\n", str); + int r = psyc_parseUniform2(uni, str, strlen(str)); + + PP(("[%.*s] : [%.*s] [%.*s] : [%.*s] [%.*s] / [%.*s] # [%.*s]\n[%.*s] [%.*s]\n[%.*s]\n\n", + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_SCHEME]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_SLASHES]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_HOST]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_PORT]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_TRANSPORT]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_RESOURCE]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_CHANNEL]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_ROOT]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_NICK]), + (int)PSYC_S2ARG2((*uni)[PSYC_UNIFORM_BODY]))); + + free(uni); + if (r != ret) { + fprintf(stderr, "ERROR: psyc_parseUniform returned %d instead of %d\n", r, ret); + exit(1); + } +} + +int main() { + testUniform("psyc://foo.tld:4404d/@bar#baz", PSYC_SCHEME_PSYC); + testUniform("psyc://foo:4405/~bar", PSYC_SCHEME_PSYC); + testUniform("psyc://foo:1234", PSYC_SCHEME_PSYC); + testUniform("psyc://foo:1234d", PSYC_SCHEME_PSYC); + testUniform("psyc://foo/", PSYC_SCHEME_PSYC); + testUniform("psyc://foo", PSYC_SCHEME_PSYC); + testUniform("psyc://1234567890abcdef:g/~foo", PSYC_SCHEME_PSYC); + + testUniform("xmpp:user@host", PSYC_PARSE_UNIFORM_INVALID_SCHEME); + testUniform("psyc:host", PSYC_PARSE_UNIFORM_INVALID_SLASHES); + testUniform("psyc://", PSYC_PARSE_UNIFORM_INVALID_HOST); + testUniform("psyc://:123/", PSYC_PARSE_UNIFORM_INVALID_HOST); + testUniform("psyc://host:/~foo", PSYC_PARSE_UNIFORM_INVALID_PORT); + testUniform("psyc://host:d/~foo", PSYC_PARSE_UNIFORM_INVALID_PORT); + testUniform("psyc://1234567890abcdef:1g/~foo", PSYC_PARSE_UNIFORM_INVALID_TRANSPORT); + + printf("SUCCESS: psyc_parseUniform passed all tests.\n"); + return 0; +}