// vim:foldmethod=marker:syntax=lpc:noexpandtab // $Id: parse.c,v 1.30 2008/12/18 18:16:14 lynx Exp $ // #include "psyc.h" #include #include private string buffer; private string body_buffer; int state; int body_len; int may_parse_more; // tempoary used to hold assigment lists vname -> ({ glyph, state, vvalue }) array(mixed) tvars; mapping hvars; // being faded out in favor of regular croak() #define PARSEERROR(reason) { \ croak("_error_syntax_broken", \ "Failed parsing: " reason); \ return 0; } #define DELIM S_GLYPH_PACKET_DELIMITER "\n" #define C_LINEFEED '\n' #define MVAR_GLYPH 0 #define MVAR_STATE 1 #define MVAR_VALUE 2 #ifndef PSYC_UDP # define QUIT remove_interactive(ME); return 0; #endif step(); // prototype // reset parser state void parser_reset() { if (state != PSYCPARSE_STATE_BLOCKED) state = PSYCPARSE_STATE_HEADER; body_len = 0; body_buffer = 0; may_parse_more = 1; tvars = ({ }); hvars = ([ ]); } // initialize the parser void parser_init() { buffer = ""; parser_reset(); state = PSYCPARSE_STATE_GREET; // AFTER reset } // it is sometimes useful to stop parsing void interrupt_parse() { state = PSYCPARSE_STATE_BLOCKED; } // and resume after some blocking operation is done void resume_parse() { state = PSYCPARSE_STATE_HEADER; } // input data to the buffer void feed(string data) { # ifdef _flag_log_sockets_SPYC log_file("RAW_SPYC", "ยป %O\n%s\n", ME, data); # endif buffer += data; do { may_parse_more = 0; step(); } while (may_parse_more); } // overload this as needed varargs mixed croak(string mc, string data, vamapping vars) { return 0; } // called when a complete packet has arrived void dispatch(mixed header_vars, mixed varops, mixed method, mixed body) { parser_reset(); } // processes routing header variable assignments // basic version does no state mapping process_header(mixed varops) { mapping vars = ([ ]); // apply mmp state foreach(mixed vop : varops) { string vname = vop[0]; switch(vop[1]) { case C_GLYPH_MODIFIER_SET: vars[vname] = vop[2]; break; case C_GLYPH_MODIFIER_AUGMENT: case C_GLYPH_MODIFIER_DIMINISH: case C_GLYPH_MODIFIER_QUERY: case C_GLYPH_MODIFIER_ASSIGN: croak("_failure_unavailable_state"); QUIT default: croak("_failure_unknown_glyph"); QUIT } // FIXME: not every legal varname is a routing varname // look at shared_memory("routing") if (!legal_keyword(vname) || abbrev("_INTERNAL", vname)) { croak("_error_illegal_protocol_variable", 0, ([ "_variable_name": vname ])); return 0; } } return vars; } // parse the header part of the packet // i.e. that is all mmp modifiers // switch to content buffering mode afterwards void parse_header() { if (!strlen(buffer)) return; if (buffer[0] == C_GLYPH_PACKET_DELIMITER) { if (strlen(buffer) < 2) return; if (buffer[1] == C_LINEFEED) { buffer = buffer[2..]; hvars = process_header(tvars); tvars = ({ }); dispatch(hvars, tvars, 0, 0); } else { // this one is sth like |whatever // actually this should be noglyph i think PARSEERROR("strange thing") } } else if (buffer[0] == C_LINEFEED) { // state transition from parsing header to buffering body buffer = buffer[1..]; hvars = process_header(tvars); tvars = ({ }); // FIXME: validate source/context here state = PSYCPARSE_STATE_CONTENT; if (hvars["_length"]) body_len = to_int(hvars["_length"][MVAR_VALUE]); step(); } else { // parse mmp-header int fit; int glyph; string vname, vvalue; switch(buffer[0]) { case C_GLYPH_MODIFIER_SET: case C_GLYPH_MODIFIER_ASSIGN: case C_GLYPH_MODIFIER_AUGMENT: case C_GLYPH_MODIFIER_DIMINISH: case C_GLYPH_MODIFIER_QUERY: glyph = buffer[0]; buffer = buffer[1..]; break; default: croak("_error_syntax_expected_routing"); QUIT } fit = sscanf(buffer, "%.1s%t", vname); if (fit != 1) { PARSEERROR("vname") } buffer = buffer[strlen(vname)..]; switch(buffer[0]) { case '\t': fit = sscanf(buffer, "\t%s\n%.0s", vvalue, buffer); if (fit != 2) { PARSEERROR("simple-arg") } break; case '\n': // deletion // this is currently implemented as "vvalue is 0" internally // and must handled in dispatch() when merging buffer = buffer[1..]; break; default: PARSEERROR("arg") } tvars += ({ ({ vname, glyph, vvalue }) }); step(); } } // parse all psyc-modifiers // this differes from the mmp modifiers in the sense that // a) packet is known to be complete here // b) psyc modifiers may have binary args void parse_psyc() { while(1) { // slurp in all psyc-modifiers int fit, len; int glyph; string vname, vvalue; switch(body_buffer[0]) { case C_GLYPH_MODIFIER_SET: case C_GLYPH_MODIFIER_ASSIGN: case C_GLYPH_MODIFIER_AUGMENT: case C_GLYPH_MODIFIER_DIMINISH: case C_GLYPH_MODIFIER_QUERY: glyph = body_buffer[0]; body_buffer = body_buffer[1..]; break; default: // this is the method return; } fit = sscanf(body_buffer, "%.1s%t", vname); if (fit != 1) { PARSEERROR("vname") } body_buffer = body_buffer[strlen(vname)..]; switch(body_buffer[0]) { case ' ': fit = sscanf(body_buffer, " %d\t%.0s", len, body_buffer); if (fit != 2) { PARSEERROR("binary-arg length") } if (len < 0) { PARSEERROR("negative binary length") } if (strlen(body_buffer) < len) { // ouch, this is a problem here PARSEERROR("not enough to read binary arg, may not happen") } vvalue = body_buffer[..len-1]; body_buffer = body_buffer[len..]; if (body_buffer[0] != C_LINEFEED) { PARSEERROR("binary terminal") } body_buffer = body_buffer[1..]; break; case '\t': fit = sscanf(body_buffer, "\t%s\n%.0s", vvalue, body_buffer); if (fit != 2) { PARSEERROR("simple-arg") } break; case '\n': switch(glyph) { case C_GLYPH_MODIFIER_ASSIGN: // delete this context's state PARSEERROR("tbd") // unfortunately the routing hasn't been processed at // this moment yet, so we don't know whose context this // is and if it is legitimate. we first have to fix the // processing of the routing layer before we can implement // anything of this continue; case C_GLYPH_MODIFIER_SET: // remember to temporarily ignore state PARSEERROR("tbd") continue; case C_GLYPH_MODIFIER_QUERY: // mark that the next reply packet should // contain a state sync PARSEERROR("tbd") continue; default: PARSEERROR("undefined operation") } return; default: PARSEERROR("arg") } tvars += ({ ({ vname, glyph, vvalue }) }); } } // parse completed content void parse_content() { int fit; string method, body; // at this point we should check for relaying, then potentially // route the data without parsing it.. TODO parse_psyc(); // ASSERT strlen(buffer) if (body_buffer[0] == C_LINEFEED) { PARSEERROR("empty method") } // FIXME: i am not sure if this is correct... if (body_buffer == S_GLYPH_PACKET_DELIMITER) { P0(("encountered packet delimiter in packet body? how's that?\n")) dispatch(hvars, tvars, 0, 0); return; } fit = sscanf(body_buffer, "%.1s\n%.0s", method, body_buffer); if (fit != 2 || !legal_keyword(method)) { croak("_error_illegal_method"); // "That's not a valid method name."); return; } // mhmm... why does body_buffer still contain the newline? // because the newline is by definition not part of the body! if (strlen(body_buffer)) body = body_buffer[..<2]; dispatch(hvars, tvars, method, body); } // buffer content until complete // then parse it // note: you could overload this, if you just want to route // the packet void buffer_content() { int t; if (body_len) { if (strlen(buffer) >= body_len + 3) { // make sure that the packet is properly terminated if (buffer[body_len..body_len+2] != "\n" DELIM) { PARSEERROR("packet delimiter after binary body not found") } body_buffer = buffer[..body_len]; buffer = buffer[body_len+3..]; parse_content(); } else { P4(("buffer_content: waiting for more binary data\n")) } } else if ((t = strstr(buffer, "\n" DELIM)) != -1) { body_buffer = buffer[..t]; buffer = buffer[t+3..]; parse_content(); P4(("buffer_content: packet complete\n")) } else { P4(("buffer_content: waiting for more plain data. buffer %O vs %O\n", to_array(buffer), to_array("\n" DELIM))) } } // respond to the first empty packet void first_response() { P0(("parser::first_response called. overload this!")) } // parser stepping function void step() { P3(("%O step: state %O, buffer %O\n", ME, state, buffer)) if (!strlen(buffer)) return; switch(state) { case PSYCPARSE_STATE_HEADER: parse_header(); break; case PSYCPARSE_STATE_CONTENT: buffer_content(); break; case PSYCPARSE_STATE_BLOCKED: // someone requested to stop parsing - e.g. _request_features circuit // message break; case PSYCPARSE_STATE_GREET: // wait for greeting if (strlen(buffer) < 2) return; if (buffer[0..1] == DELIM) { state = PSYCPARSE_STATE_HEADER; buffer = buffer[2 ..]; first_response(); step(); } else { croak("_error_syntax_initialization"); // "The new protocol begins with a pipe and a line feed."); } break; default: // uhm... if we ever get here this is the programmers fault break; } } // FIXME should be in a standalone module //#define PARSEERROR(args) debug_message(sprintf("LIST PARSE ERROR: " args)); #define LISTSEP '|' #define LISTPARSE_FAIL -1 #define LISTMODE_PLAIN 0 #define LISTMODE_BINARY 1 // list parsing function - val is assumed to be stripped of the final LF mixed list_parse(string val) { mixed *lv = ({ }); if (val[0] == LISTSEP) return explode(val[1..], "|"); while(strlen(val)) { int fit, len; fit = sscanf(val, "%d %s", len, val); if (fit != 2) { // invalid binary fit return LISTPARSE_FAIL; } if (len < 0 || len > strlen(val)) { // invalid binary length return LISTPARSE_FAIL; } if (len != strlen(val) && val[len] != LISTSEP) { // listsep not found after binary return LISTPARSE_FAIL; } lv += ({ val[..len-1] }); val = val[len+1..]; } return lv; } #ifdef SELFTESTS test() { list_parse("|psyc://example.symlynX.com/~jim|psyc://example.org/~judy"); list_parse("5\tabcde|4\tabcd"); } #endif