mirror of
git://git.psyced.org/git/psyced
synced 2024-08-15 03:25:10 +00:00
350 lines
10 KiB
C
350 lines
10 KiB
C
// $Id: parse.c,v 1.54 2008/05/17 07:26:53 lynx Exp $ // vim:syntax=lpc
|
|
//
|
|
// this code is employed to parse both XML and XMPP.
|
|
// if expat has been provided at compiling time, it will try to use it.
|
|
//
|
|
|
|
// local debug messages - turn them on by using psyclpc -DDxml=<level>
|
|
#ifdef Dxml
|
|
# undef DEBUG
|
|
# define DEBUG Dxml
|
|
#endif
|
|
|
|
// until you fix that TODO (please do!)
|
|
#undef __EXPAT__
|
|
|
|
#ifdef JABBER_PARSE
|
|
volatile XMLNode currentnode = 0;
|
|
volatile XMLNode *nodestack = ({ });
|
|
volatile int length = 0;
|
|
volatile closure nodeHandler = #'jabberMsg;
|
|
# ifdef JABBER_TRANSPARENCY
|
|
volatile string innerxml, lasta, ixbuf;
|
|
# endif
|
|
#else
|
|
# include <net.h>
|
|
# include <xml.h>
|
|
inherit NET_PATH "xml/common";
|
|
volatile string charset;
|
|
|
|
# define XML_ERROR(code, long) \
|
|
P0(("XML parse in %O: %s\n", ME, long))
|
|
#endif
|
|
|
|
#if !defined(__EXPAT__) || defined(JABBER_PARSE)
|
|
// DOM style XML parser
|
|
xmlparse(a) {
|
|
#ifndef JABBER_PARSE
|
|
XMLNode currentnode = 0;
|
|
XMLNode *nodestack = ({ });
|
|
#endif
|
|
string t, tag, data = "", params = "";
|
|
int pos, close;
|
|
int list;
|
|
|
|
#ifdef JABBER_PARSE
|
|
# ifdef _flag_log_sockets_XMPP
|
|
log_file("RAW_XMPP", "\n» %O\t%s", ME, a);
|
|
# endif
|
|
length += sizeof(a);
|
|
pos = index(a, '<', pos) + 1;
|
|
data = xmlunquote(a[0..pos -2]);
|
|
close = strlen(a) - 1;
|
|
# ifdef JABBER_TRANSPARENCY
|
|
if (ixbuf) {
|
|
if (lasta) ixbuf += lasta;
|
|
lasta = a;
|
|
}
|
|
# endif
|
|
#else
|
|
pos = 0;
|
|
close = -1;
|
|
// jabber parser doesn't while, so it has one indent step less
|
|
while(pos = index(a, '<', pos) + 1) {
|
|
data += xmlunquote(a[close + 1..pos - 2]);
|
|
#endif
|
|
#if 1 //def HANDLE_CDATA
|
|
//
|
|
// http:/www.techjamaica.com/forums/external.php?type=rss2
|
|
// uses <![CDATA[<p>this is<br/>embedded html</p>]]> syntax
|
|
// to embed potentially broken html into xml. in fact most
|
|
// blogs produce this sort of rss code these days.
|
|
//
|
|
// do we want to support ![CDATA[ ]] for XMPP, too? ... then fix here!
|
|
if (a[pos..pos+7] == "![CDATA[") {
|
|
pos += 8;
|
|
close = strstr(a, "]]>", pos);
|
|
data += xmlunquote(a[pos..close-1]);
|
|
close += 2;
|
|
pos = close; // this may seem optional.. but?
|
|
P4(("%O unCDATAfied %O\n", ME, data));
|
|
# ifndef JABBER_PARSE
|
|
continue;
|
|
# else
|
|
// ok, so this doesn't hurt at least..
|
|
// but should return here? and what?
|
|
# endif
|
|
}
|
|
#endif
|
|
#ifndef JABBER_PARSE
|
|
close = index(a, '>', pos);
|
|
#endif
|
|
tag = a[pos..close-1];
|
|
pos = close+1; // do not reparse seen things (opt)
|
|
sscanf(tag, "%s%t%s", tag, params); //|| (params = 0);
|
|
if (tag == "") return -1;
|
|
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?')) {
|
|
#ifndef JABBER_PARSE
|
|
// charset handling currently limited to news parsers
|
|
if (lower_case(tag) == "?xml" &&
|
|
(sscanf(params, "%sencoding=\"%s\"%s", t, charset, t) >= 2 ||
|
|
sscanf(params, "%sencoding=\'%s\'%s", t, charset, t) >= 2)) {
|
|
charset = upper_case(charset);
|
|
if (charset != SYSTEM_CHARSET) {
|
|
// ok, we believe it's working :)
|
|
PT(("%O converting from charset %O\n", ME, charset))
|
|
iconv(a, charset, SYSTEM_CHARSET);
|
|
}
|
|
}
|
|
else {
|
|
PT(("%O skipping funny %O tag (%O)\n", ME, tag, params))
|
|
}
|
|
#endif
|
|
#ifdef JABBER_PARSE
|
|
} else if (strlen(tag) && tag == "/stream:stream"){
|
|
// close_stream();
|
|
// quit();
|
|
#endif
|
|
// tag is a close tag
|
|
} else if (strlen(tag) && tag[0] == '/') {
|
|
P4(("should be closing tag %O and am closing %O\n",
|
|
currentnode[Tag], tag[1..]))
|
|
if (!currentnode || currentnode[Tag] != tag[1..]) {
|
|
XML_ERROR("xml-not-well-formed",
|
|
"Unbalanced XML encountered");
|
|
PT(("%O closing %O instead of tag in %O\n", ME, tag,
|
|
currentnode))
|
|
#ifdef JABBER_PARSE
|
|
// this will trigger disconnect in calling object
|
|
return;
|
|
#endif
|
|
} else {
|
|
// schliessender tag gefunden, die haben keine Parameter
|
|
if (strlen(data) && data != "\r\n" && data != "\n"){
|
|
// we just concatenate the cdata!
|
|
if (!stringp(currentnode[Cdata]))
|
|
currentnode[Cdata] = data;
|
|
else
|
|
currentnode[Cdata] += data;
|
|
}
|
|
data = "";
|
|
#ifdef JABBER_PARSE
|
|
# ifdef JABBER_TRANSPARENCY
|
|
// the two ifs can be optimized if we like this
|
|
// approach better than three comparisons
|
|
if (sizeof(nodestack) == 0) {
|
|
// if (tag == "/iq"
|
|
// || tag == "/presence"
|
|
// || tag == "/message") {
|
|
innerxml = ixbuf;
|
|
ixbuf = lasta = 0;
|
|
P4((" <%s>\n", tag))
|
|
P4(("innerxml body %O\n", innerxml))
|
|
}
|
|
# endif
|
|
#endif
|
|
if (sizeof(nodestack) == 0) {
|
|
#ifdef JABBER_PARSE
|
|
currentnode[NodeLen] = length;
|
|
// handle stuff
|
|
funcall(nodeHandler, currentnode);
|
|
currentnode = 0;
|
|
length = 0;
|
|
#else
|
|
// we can probably break/return here
|
|
break;
|
|
#endif
|
|
} else {
|
|
currentnode = nodestack[<1];
|
|
nodestack = nodestack[..<2];
|
|
}
|
|
}
|
|
} else { // opening tag
|
|
int selfclosing;
|
|
XMLNode newnode;
|
|
string key, val;
|
|
mixed *ptmp;
|
|
|
|
if (currentnode && data && data != "\r\n" && data != "\n") {
|
|
// we just concatenate the cdata!
|
|
// watch out, nearly identical code above
|
|
if (!stringp(currentnode[Cdata]))
|
|
currentnode[Cdata] = data;
|
|
else
|
|
currentnode[Cdata] += data;
|
|
}
|
|
data = "";
|
|
|
|
if (strlen(params) && params[<1] == '/') {
|
|
params = params[..<2];
|
|
selfclosing = 1;
|
|
} else if (tag[<1] == '/') {
|
|
tag = tag[..<2];
|
|
selfclosing = 1;
|
|
}
|
|
newnode = new_XMLNode;
|
|
|
|
if (currentnode) {
|
|
t = "/"+ tag;
|
|
nodestack += ({ currentnode });
|
|
if (mappingp(currentnode[t])) {
|
|
// transform
|
|
currentnode[t] = ({ currentnode[t], newnode });
|
|
currentnode = currentnode[t][<1];
|
|
} else if (pointerp(currentnode[t])) {
|
|
// append
|
|
currentnode[t] += ({ newnode });
|
|
currentnode = currentnode[t][<1];
|
|
} else {
|
|
// create
|
|
currentnode[t] = newnode;
|
|
currentnode = currentnode[t];
|
|
}
|
|
} else {
|
|
currentnode = newnode;
|
|
}
|
|
currentnode[Tag] = tag;
|
|
#if 1
|
|
# ifndef JABBER_PARSE
|
|
// this will still not be able to handle something like
|
|
// <img src='18072006.jpg' alt="5er & s'Weggli" />
|
|
// but who sends something like that?
|
|
ptmp = regexplode(params, "[a-zA-Z0-9]+=\"[^\"]*\"");
|
|
if (sizeof(ptmp) < 2 || sizeof(ptmp) % 2)
|
|
ptmp = regexplode(params, "[a-zA-Z0-9]+='[^']*'");
|
|
# else
|
|
// this method breaks on something like
|
|
// <img src="18072006.jpg" alt="5er & s'Weggli" />
|
|
ptmp = regexplode(params, "[a-zA-Z0-9]+=(\"|')[^\"']*(\"|')");
|
|
# endif
|
|
for (int i = 1; i < sizeof(ptmp); i += 2) {
|
|
int where = index(ptmp[i], '=');
|
|
|
|
key = ptmp[i][..where-1];
|
|
val = ptmp[i][where+1..];
|
|
|
|
if (val[0] != val[<1]) {
|
|
XML_ERROR("xml-not-well-formed", "Mismatching quotes")
|
|
PT(("%O %O %O %O\n", ME, key, val, ptmp))
|
|
}
|
|
val = val[1..<2];
|
|
currentnode["@"+ key] = val;
|
|
}
|
|
#else
|
|
// this approach cannot handle param="string with spaces"
|
|
foreach(string pa: explode(params, " ")) {
|
|
if(sscanf(pa, "%s=\"%s\"", key, val) == 2 ||
|
|
sscanf(pa, "%s=\'%s\'", key, val) == 2 ) {
|
|
currentnode["@"+ key] = val;
|
|
|
|
}
|
|
}
|
|
#endif
|
|
if (selfclosing) {
|
|
if (sizeof(nodestack) == 0){
|
|
#ifdef JABBER_PARSE
|
|
currentnode[NodeLen] = length;
|
|
# ifdef JABBER_TRANSPARENCY
|
|
ixbuf = lasta = 0;
|
|
innerxml = ixbuf;
|
|
# endif
|
|
// handle stuff
|
|
funcall(nodeHandler, currentnode);
|
|
currentnode = 0;
|
|
length = 0;
|
|
#else
|
|
PT(("nodestack empty\n"))
|
|
#endif
|
|
} else {
|
|
currentnode = nodestack[<1];
|
|
nodestack = nodestack[..<2];
|
|
}
|
|
#ifdef JABBER_PARSE
|
|
} else if (currentnode[Tag] == "stream:stream") {
|
|
open_stream(currentnode);
|
|
nodestack = ({ }); // ?
|
|
currentnode = 0;
|
|
# ifdef JABBER_TRANSPARENCY
|
|
} else // if (currentnode[Tag] == "iq"
|
|
// || currentnode[Tag] == "presence"
|
|
// || currentnode[Tag] == "message") {
|
|
if (sizeof(nodestack) == 0) {
|
|
ixbuf = ""; lasta = 0;
|
|
P4((" <%s> ", currentnode[Tag]))
|
|
# endif
|
|
#endif
|
|
}
|
|
}
|
|
#ifndef JABBER_PARSE
|
|
}
|
|
return currentnode;
|
|
#endif
|
|
}
|
|
|
|
#else /* !defined(__EXPAT__) || defined(JABBER_PARSE) */
|
|
|
|
volatile mixed node = 0;
|
|
volatile mixed *nodestack = ({ });
|
|
|
|
void onStart(string elem, string *params) {
|
|
string t = "/"+ elem;
|
|
|
|
if (node) {
|
|
nodestack += ({ node });
|
|
if (!node[t]) {
|
|
/* no child with that name */
|
|
node[t] = new_XMLNode;
|
|
node = node[t];
|
|
} else {
|
|
if (!nodelistp(node[t])) {
|
|
/* just a single node with that name, convert it
|
|
*/
|
|
node[t] = ({ node[t] });
|
|
}
|
|
node[t] += ({ new_XMLNode });
|
|
node = node[t][<1];
|
|
}
|
|
} else {
|
|
node = new_XMLNode;
|
|
nodestack = ({ });
|
|
}
|
|
node[Tag] = elem;
|
|
// TODO: das hier funktioniert mit der neuen API nicht so
|
|
node[Param] = params;
|
|
}
|
|
|
|
void onEnd(string elem) {
|
|
if (sizeof(nodestack) > 0) {
|
|
node = nodestack[<1];
|
|
nodestack = nodestack[..<2];
|
|
}
|
|
/* else we are finished? */
|
|
}
|
|
|
|
void onText(string text) {
|
|
if (node[Cdata])
|
|
node[Cdata] += text;
|
|
else
|
|
node[Cdata] = text;
|
|
}
|
|
|
|
xmlparse(a) {
|
|
PT(("expat xmlparse\n"))
|
|
int d;
|
|
node = 0;
|
|
nodestack = ({ });
|
|
d = expat_parse(a, #'onStart, #'onEnd, #'onText);
|
|
return node;
|
|
}
|
|
|
|
#endif
|