mirror of
git://git.psyced.org/git/psyced
synced 2024-08-15 03:25:10 +00:00
let the past begone in cvs land. welcome to igit igit!
This commit is contained in:
commit
4e601cf1c7
509 changed files with 77963 additions and 0 deletions
37
world/net/xml/common.c
Normal file
37
world/net/xml/common.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
// $Id: common.c,v 1.10 2007/08/15 09:47:02 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// yes even we can't avoid having XML parsing classes
|
||||
// used by jabber and RSS (place/news)
|
||||
|
||||
// hmm.. why are you including interface.h directly?
|
||||
// it comes automatically with net.h
|
||||
#include <interface.h>
|
||||
|
||||
#include "xml.h"
|
||||
|
||||
string xmlquote(string s) {
|
||||
// return xml escaped version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
// looks like these only need to be quoted if
|
||||
// the string is to be used in a attribute/param
|
||||
// but it doesnt hurt anyway so...
|
||||
s = replace(s, "\"", """);
|
||||
s = replace(s, "'", "'");
|
||||
return s;
|
||||
}
|
||||
|
||||
string xmlunquote(string s) {
|
||||
// return unquoted xml version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
s = replace(s, """, "\"");
|
||||
s = replace(s, "'", "'");
|
||||
// should this take care of ß-style thingies
|
||||
// s = regreplace(s, "ß", 223);
|
||||
s = regreplace(s, "&#[0-9][0-9][0-9];",
|
||||
(: return sprintf("%c", to_int($1[2..<2])); :), 1);
|
||||
return s;
|
||||
}
|
344
world/net/xml/parse.c
Normal file
344
world/net/xml/parse.c
Normal file
|
@ -0,0 +1,344 @@
|
|||
// $Id: parse.c,v 1.51 2008/03/29 20:36:44 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// this code is employed to parse both XML and XMPP.
|
||||
// if expat has been provided at compiling time, it will try to use it.
|
||||
//
|
||||
|
||||
// until you fix that TODO (please do!)
|
||||
#undef __EXPAT__
|
||||
|
||||
#ifdef JABBER_PARSE
|
||||
volatile XMLNode currentnode = 0;
|
||||
volatile XMLNode *nodestack = ({ });
|
||||
volatile int length = 0;
|
||||
closure nodeHandler = #'jabberMsg;
|
||||
# ifdef JABBER_TRANSPARENCY
|
||||
volatile string innerxml, lasta, ixbuf;
|
||||
# endif
|
||||
#else
|
||||
# include <net.h>
|
||||
# include <xml.h>
|
||||
inherit NET_PATH "xml/common";
|
||||
volatile string charset;
|
||||
|
||||
# define XML_ERROR(code, long) \
|
||||
P0(("XML parse in %O: %s\n", ME, long))
|
||||
#endif
|
||||
|
||||
#if !defined(__EXPAT__) || defined(JABBER_PARSE)
|
||||
// DOM style XML parser
|
||||
xmlparse(a) {
|
||||
#ifndef JABBER_PARSE
|
||||
XMLNode currentnode = 0;
|
||||
XMLNode *nodestack = ({ });
|
||||
#endif
|
||||
string t, tag, data = "", params = "";
|
||||
int pos, close;
|
||||
int list;
|
||||
|
||||
#ifdef JABBER_PARSE
|
||||
# ifdef _flag_log_sockets_XMPP
|
||||
D0( log_file("RAW_XMPP", "\n» %O\t%s", ME, a); )
|
||||
# endif
|
||||
length += sizeof(a);
|
||||
pos = index(a, '<', pos) + 1;
|
||||
data = xmlunquote(a[0..pos -2]);
|
||||
close = strlen(a) - 1;
|
||||
# ifdef JABBER_TRANSPARENCY
|
||||
if (ixbuf) {
|
||||
if (lasta) ixbuf += lasta;
|
||||
lasta = a;
|
||||
}
|
||||
# endif
|
||||
#else
|
||||
pos = 0;
|
||||
close = -1;
|
||||
// jabber parser doesn't while, so it has one indent step less
|
||||
while(pos = index(a, '<', pos) + 1) {
|
||||
data += xmlunquote(a[close + 1..pos - 2]);
|
||||
#endif
|
||||
#if 1 //def HANDLE_CDATA
|
||||
//
|
||||
// http:/www.techjamaica.com/forums/external.php?type=rss2
|
||||
// uses <![CDATA[<p>this is<br/>embedded html</p>]]> syntax
|
||||
// to embed potentially broken html into xml. in fact most
|
||||
// blogs produce this sort of rss code these days.
|
||||
//
|
||||
// do we want to support ![CDATA[ ]] for XMPP, too? ... then fix here!
|
||||
if (a[pos..pos+7] == "![CDATA[") {
|
||||
pos += 8;
|
||||
close = strstr(a, "]]>", pos);
|
||||
data += xmlunquote(a[pos..close-1]);
|
||||
close += 2;
|
||||
pos = close; // this may seem optional.. but?
|
||||
P4(("%O unCDATAfied %O\n", ME, data));
|
||||
# ifndef JABBER_PARSE
|
||||
continue;
|
||||
# else
|
||||
// ok, so this doesn't hurt at least..
|
||||
// but should return here? and what?
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
#ifndef JABBER_PARSE
|
||||
close = index(a, '>', pos);
|
||||
#endif
|
||||
tag = a[pos..close-1];
|
||||
pos = close+1; // do not reparse seen things (opt)
|
||||
sscanf(tag, "%s%t%s", tag, params); //|| (params = 0);
|
||||
if (tag == "") return -1;
|
||||
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?')) {
|
||||
#ifndef JABBER_PARSE
|
||||
// charset handling currently limited to news parsers
|
||||
if (lower_case(tag) == "?xml" &&
|
||||
(sscanf(params, "%sencoding=\"%s\"%s", t, charset, t) >= 2 ||
|
||||
sscanf(params, "%sencoding=\'%s\'%s", t, charset, t) >= 2)) {
|
||||
charset = upper_case(charset);
|
||||
if (charset != SYSTEM_CHARSET) {
|
||||
// ok, we believe it's working :)
|
||||
PT(("%O converting from charset %O\n", ME, charset))
|
||||
iconv(a, charset, SYSTEM_CHARSET);
|
||||
}
|
||||
}
|
||||
else {
|
||||
PT(("%O skipping funny %O tag (%O)\n", ME, tag, params))
|
||||
}
|
||||
#endif
|
||||
#ifdef JABBER_PARSE
|
||||
} else if (strlen(tag) && tag == "/stream:stream"){
|
||||
// close_stream();
|
||||
// quit();
|
||||
#endif
|
||||
// tag is a close tag
|
||||
} else if (strlen(tag) && tag[0] == '/') {
|
||||
P4(("should be closing tag %O and am closing %O\n",
|
||||
currentnode[Tag], tag[1..]))
|
||||
if (!currentnode || currentnode[Tag] != tag[1..]) {
|
||||
XML_ERROR("xml-not-well-formed",
|
||||
"Unbalanced XML encountered");
|
||||
PT(("%O closing %O instead of tag in %O\n", ME, tag,
|
||||
currentnode))
|
||||
#ifdef JABBER_PARSE
|
||||
// this will trigger disconnect in calling object
|
||||
return;
|
||||
#endif
|
||||
} else {
|
||||
// schliessender tag gefunden, die haben keine Parameter
|
||||
if (strlen(data) && data != "\r\n" && data != "\n"){
|
||||
// we just concatenate the cdata!
|
||||
if (!stringp(currentnode[Cdata]))
|
||||
currentnode[Cdata] = data;
|
||||
else
|
||||
currentnode[Cdata] += data;
|
||||
}
|
||||
data = "";
|
||||
#ifdef JABBER_PARSE
|
||||
# ifdef JABBER_TRANSPARENCY
|
||||
// the two ifs can be optimized if we like this
|
||||
// approach better than three comparisons
|
||||
if (sizeof(nodestack) == 0) {
|
||||
// if (tag == "/iq"
|
||||
// || tag == "/presence"
|
||||
// || tag == "/message") {
|
||||
innerxml = ixbuf;
|
||||
ixbuf = lasta = 0;
|
||||
P4((" <%s>\n", tag))
|
||||
P4(("innerxml body %O\n", innerxml))
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
if (sizeof(nodestack) == 0) {
|
||||
#ifdef JABBER_PARSE
|
||||
currentnode[NodeLen] = length;
|
||||
// handle stuff
|
||||
funcall(nodeHandler, currentnode);
|
||||
currentnode = 0;
|
||||
length = 0;
|
||||
#else
|
||||
// we can probably break/return here
|
||||
break;
|
||||
#endif
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
}
|
||||
} else { // opening tag
|
||||
int selfclosing;
|
||||
XMLNode newnode;
|
||||
string key, val;
|
||||
mixed *ptmp;
|
||||
|
||||
if (currentnode && data && data != "\r\n" && data != "\n") {
|
||||
// we just concatenate the cdata!
|
||||
// watch out, nearly identical code above
|
||||
if (!stringp(currentnode[Cdata]))
|
||||
currentnode[Cdata] = data;
|
||||
else
|
||||
currentnode[Cdata] += data;
|
||||
}
|
||||
data = "";
|
||||
|
||||
if (strlen(params) && params[<1] == '/') {
|
||||
params = params[..<2];
|
||||
selfclosing = 1;
|
||||
} else if (tag[<1] == '/') {
|
||||
tag = tag[..<2];
|
||||
selfclosing = 1;
|
||||
}
|
||||
newnode = new_XMLNode;
|
||||
|
||||
if (currentnode) {
|
||||
t = "/"+ tag;
|
||||
nodestack += ({ currentnode });
|
||||
if (mappingp(currentnode[t])) {
|
||||
// transform
|
||||
currentnode[t] = ({ currentnode[t], newnode });
|
||||
currentnode = currentnode[t][<1];
|
||||
} else if (pointerp(currentnode[t])) {
|
||||
// append
|
||||
currentnode[t] += ({ newnode });
|
||||
currentnode = currentnode[t][<1];
|
||||
} else {
|
||||
// create
|
||||
currentnode[t] = newnode;
|
||||
currentnode = currentnode[t];
|
||||
}
|
||||
} else {
|
||||
currentnode = newnode;
|
||||
}
|
||||
currentnode[Tag] = tag;
|
||||
#if 1//def EXPERIMENTAL // yay, things change fast!
|
||||
# ifndef JABBER_PARSE
|
||||
// this will still not be able to handle something like
|
||||
// <img src='18072006.jpg' alt="5er & s'Weggli" />
|
||||
// but who sends something like that?
|
||||
ptmp = regexplode(params, "[a-zA-Z0-9]+=\"[^\"]*\"");
|
||||
if (sizeof(ptmp) < 2 || sizeof(ptmp) % 2)
|
||||
ptmp = regexplode(params, "[a-zA-Z0-9]+='[^']*'");
|
||||
# else
|
||||
// this method breaks on something like
|
||||
// <img src="18072006.jpg" alt="5er & s'Weggli" />
|
||||
ptmp = regexplode(params, "[a-zA-Z0-9]+=(\"|')[^\"']*(\"|')");
|
||||
# endif
|
||||
for (int i = 1; i < sizeof(ptmp); i += 2) {
|
||||
int where = index(ptmp[i], '=');
|
||||
|
||||
key = ptmp[i][..where-1];
|
||||
val = ptmp[i][where+1..];
|
||||
|
||||
if (val[0] != val[<1]) {
|
||||
XML_ERROR("xml-not-well-formed", "Mismatching quotes")
|
||||
PT(("%O %O %O %O\n", ME, key, val, ptmp))
|
||||
}
|
||||
val = val[1..<2];
|
||||
currentnode["@"+ key] = val;
|
||||
}
|
||||
#else
|
||||
// this approach cannot handle param="string with spaces"
|
||||
foreach(string pa: explode(params, " ")) {
|
||||
if(sscanf(pa, "%s=\"%s\"", key, val) == 2 ||
|
||||
sscanf(pa, "%s=\'%s\'", key, val) == 2 ) {
|
||||
currentnode["@"+ key] = val;
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (selfclosing) {
|
||||
if (sizeof(nodestack) == 0){
|
||||
#ifdef JABBER_PARSE
|
||||
currentnode[NodeLen] = length;
|
||||
# ifdef JABBER_TRANSPARENCY
|
||||
ixbuf = lasta = 0;
|
||||
innerxml = ixbuf;
|
||||
#endif
|
||||
// handle stuff
|
||||
funcall(nodeHandler, currentnode);
|
||||
currentnode = 0;
|
||||
length = 0;
|
||||
#else
|
||||
PT(("nodestack empty\n"))
|
||||
#endif
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
#ifdef JABBER_PARSE
|
||||
} else if (currentnode[Tag] == "stream:stream") {
|
||||
open_stream(currentnode);
|
||||
nodestack = ({ }); // ?
|
||||
currentnode = 0;
|
||||
# ifdef JABBER_TRANSPARENCY
|
||||
} else // if (currentnode[Tag] == "iq"
|
||||
// || currentnode[Tag] == "presence"
|
||||
// || currentnode[Tag] == "message") {
|
||||
if (sizeof(nodestack) == 0) {
|
||||
ixbuf = ""; lasta = 0;
|
||||
P4((" <%s> ", currentnode[Tag]))
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifndef JABBER_PARSE
|
||||
}
|
||||
return currentnode;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* !defined(__EXPAT__) || defined(JABBER_PARSE) */
|
||||
|
||||
volatile mixed node = 0;
|
||||
volatile mixed *nodestack = ({ });
|
||||
|
||||
void onStart(string elem, string *params) {
|
||||
string t = "/"+ elem;
|
||||
|
||||
if (node) {
|
||||
nodestack += ({ node });
|
||||
if (!node[t]) {
|
||||
/* no child with that name */
|
||||
node[t] = new_XMLNode;
|
||||
node = node[t];
|
||||
} else {
|
||||
if (!nodelistp(node[t])) {
|
||||
/* just a single node with that name, convert it
|
||||
*/
|
||||
node[t] = ({ node[t] });
|
||||
}
|
||||
node[t] += ({ new_XMLNode });
|
||||
node = node[t][<1];
|
||||
}
|
||||
} else {
|
||||
node = new_XMLNode;
|
||||
nodestack = ({ });
|
||||
}
|
||||
node[Tag] = elem;
|
||||
// TODO: das hier funktioniert mit der neuen API nicht so
|
||||
node[Param] = params;
|
||||
}
|
||||
|
||||
void onEnd(string elem) {
|
||||
if (sizeof(nodestack) > 0) {
|
||||
node = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
/* else we are finished? */
|
||||
}
|
||||
|
||||
void onText(string text) {
|
||||
if (node[Cdata])
|
||||
node[Cdata] += text;
|
||||
else
|
||||
node[Cdata] = text;
|
||||
}
|
||||
|
||||
xmlparse(a) {
|
||||
PT(("expat xmlparse\n"))
|
||||
int d;
|
||||
node = 0;
|
||||
nodestack = ({ });
|
||||
d = expat_parse(a, #'onStart, #'onEnd, #'onText);
|
||||
return node;
|
||||
}
|
||||
|
||||
#endif
|
65
world/net/xml/parse.c-retro
Normal file
65
world/net/xml/parse.c-retro
Normal file
|
@ -0,0 +1,65 @@
|
|||
// $Id: parse.c,v 1.10 2005/03/14 10:23:28 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// the actual RSS parser
|
||||
// why the file is called "parse.c" i don't know
|
||||
// it certainly wouldn't be a good idea to have several parsers in one file
|
||||
// so you may as well rename it into rss.c? TODO
|
||||
//
|
||||
#include <net.h>
|
||||
inherit NET_PATH "xml/common";
|
||||
|
||||
xmlparse(str) {
|
||||
// DOM-style XML parser for parsing RSS and RDF
|
||||
// see CHANGESTODO for discussion on how to make it compliant
|
||||
mapping dom;
|
||||
string namespace;
|
||||
string tag, lasttag, data, params;
|
||||
int pos, close;
|
||||
|
||||
pos = 0;
|
||||
close = -1;
|
||||
namespace = "";
|
||||
dom = ([ ]);
|
||||
while(pos = strstr(str, "<", pos) + 1){
|
||||
// D2(D("looping xmlparser...\n");)
|
||||
data = xmlunquote(str[close + 1..pos - 2]);
|
||||
close = strstr(str, ">", pos);
|
||||
sscanf(str[pos..close - 1], "%s%t%s", tag, params) || tag = str[pos..close - 1];
|
||||
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?' || tag[0..2] == "rdf" || tag[0..2] == "rss")) {
|
||||
if(tag[0..2] == "rdf" || tag[0..2] == "rss")
|
||||
dom["type"] = tag[0..2];
|
||||
} else if (strlen(tag) && tag[0] == '/') {
|
||||
// D2(D("closing " + tag + "\n");)
|
||||
// closing tag
|
||||
if (tag[1..] != lasttag){
|
||||
// D2(D("warning: XML may be malformed\n");)
|
||||
;
|
||||
} else {
|
||||
// handle data
|
||||
if (data != "\n") {
|
||||
if (stringp(dom[namespace]) )
|
||||
dom[namespace] = ({ dom[namespace], data });
|
||||
else if (pointerp(dom[namespace]))
|
||||
dom[namespace] += ({ data });
|
||||
else
|
||||
dom[namespace] = data;
|
||||
}
|
||||
namespace = namespace[..<strlen(lasttag) + 2];
|
||||
lasttag = explode(namespace, "_")[<1];
|
||||
}
|
||||
} else {
|
||||
// open tag
|
||||
if ((params && params[<1] == '/') || tag[<1] == '/') {
|
||||
// better than before, but not really
|
||||
// correct to simply skip it
|
||||
continue;
|
||||
}
|
||||
// D2(D("opening " + tag + "\n");)
|
||||
namespace += "_" + tag;
|
||||
lasttag = tag;
|
||||
}
|
||||
pos = close;
|
||||
}
|
||||
// P2(("DOM: %O\n", dom))
|
||||
return dom;
|
||||
}
|
32
world/net/xml/t/common.c
Normal file
32
world/net/xml/t/common.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
// $Id: common.c,v 1.8 2005/03/14 10:23:28 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// yes even we can't avoid having XML parsing classes
|
||||
// used by jabber and RSS (place/news)
|
||||
//
|
||||
#include <interface.h>
|
||||
|
||||
#include "xml.h"
|
||||
|
||||
string xmlquote(string s) {
|
||||
// return xml escaped version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
s = replace(s, "\"", """);
|
||||
s = replace(s, "'", "'");
|
||||
return s;
|
||||
}
|
||||
|
||||
string xmlunquote(string s) {
|
||||
// return unquoted xml version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
s = replace(s, """, "\"");
|
||||
s = replace(s, "'", "'");
|
||||
// should this take care of ß-style thingies
|
||||
// s = regreplace(s, "ß", 223);
|
||||
s = regreplace(s, "&#[0-9][0-9][0-9];",
|
||||
(: return sprintf("%c", to_int($1[2..<2])); :), 1);
|
||||
return s;
|
||||
}
|
65
world/net/xml/t/parse.c
Normal file
65
world/net/xml/t/parse.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
// $Id: parse.c,v 1.10 2005/03/14 10:23:28 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// the actual RSS parser
|
||||
// why the file is called "parse.c" i don't know
|
||||
// it certainly wouldn't be a good idea to have several parsers in one file
|
||||
// so you may as well rename it into rss.c? TODO
|
||||
//
|
||||
#include <net.h>
|
||||
inherit NET_PATH "xml/common";
|
||||
|
||||
rssparse(str) {
|
||||
// DOM-style XML parser for parsing RSS and RDF
|
||||
// see CHANGESTODO for discussion on how to make it compliant
|
||||
mapping dom;
|
||||
string namespace;
|
||||
string tag, lasttag, data, params;
|
||||
int pos, close;
|
||||
|
||||
pos = 0;
|
||||
close = -1;
|
||||
namespace = "";
|
||||
dom = ([ ]);
|
||||
while(pos = strstr(str, "<", pos) + 1){
|
||||
// D2(D("looping xmlparser...\n");)
|
||||
data = xmlunquote(str[close + 1..pos - 2]);
|
||||
close = strstr(str, ">", pos);
|
||||
sscanf(str[pos..close - 1], "%s%t%s", tag, params) || tag = str[pos..close - 1];
|
||||
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?' || tag[0..2] == "rdf" || tag[0..2] == "rss")) {
|
||||
if(tag[0..2] == "rdf" || tag[0..2] == "rss")
|
||||
dom["type"] = tag[0..2];
|
||||
} else if (strlen(tag) && tag[0] == '/') {
|
||||
// D2(D("closing " + tag + "\n");)
|
||||
// closing tag
|
||||
if (tag[1..] != lasttag){
|
||||
// D2(D("warning: XML may be malformed\n");)
|
||||
;
|
||||
} else {
|
||||
// handle data
|
||||
if (data != "\n") {
|
||||
if (stringp(dom[namespace]) )
|
||||
dom[namespace] = ({ dom[namespace], data });
|
||||
else if (pointerp(dom[namespace]))
|
||||
dom[namespace] += ({ data });
|
||||
else
|
||||
dom[namespace] = data;
|
||||
}
|
||||
namespace = namespace[..<strlen(lasttag) + 2];
|
||||
lasttag = explode(namespace, "_")[<1];
|
||||
}
|
||||
} else {
|
||||
// open tag
|
||||
if ((params && params[<1] == '/') || tag[<1] == '/') {
|
||||
// better than before, but not really
|
||||
// correct to simply skip it
|
||||
continue;
|
||||
}
|
||||
// D2(D("opening " + tag + "\n");)
|
||||
namespace += "_" + tag;
|
||||
lasttag = tag;
|
||||
}
|
||||
pos = close;
|
||||
}
|
||||
// P2(("DOM: %O\n", dom))
|
||||
return dom;
|
||||
}
|
32
world/net/xml/t/t/common.c
Normal file
32
world/net/xml/t/t/common.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
// $Id: common.c,v 1.8 2005/03/14 10:23:28 lynx Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// yes even we can't avoid having XML parsing classes
|
||||
// used by jabber and RSS (place/news)
|
||||
//
|
||||
#include <interface.h>
|
||||
|
||||
#include "xml.h"
|
||||
|
||||
string xmlquote(string s) {
|
||||
// return xml escaped version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
s = replace(s, "\"", """);
|
||||
s = replace(s, "'", "'");
|
||||
return s;
|
||||
}
|
||||
|
||||
string xmlunquote(string s) {
|
||||
// return unquoted xml version of s
|
||||
s = replace(s, "&", "&");
|
||||
s = replace(s, "<", "<");
|
||||
s = replace(s, ">", ">");
|
||||
s = replace(s, """, "\"");
|
||||
s = replace(s, "'", "'");
|
||||
// should this take care of ß-style thingies
|
||||
// s = regreplace(s, "ß", 223);
|
||||
s = regreplace(s, "&#[0-9][0-9][0-9];",
|
||||
(: return sprintf("%c", to_int($1[2..<2])); :), 1);
|
||||
return s;
|
||||
}
|
110
world/net/xml/t/t/parse.c
Normal file
110
world/net/xml/t/t/parse.c
Normal file
|
@ -0,0 +1,110 @@
|
|||
// $Id: parse.c,v 1.12 2005/06/07 07:03:02 fippo Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// the actual RSS parser
|
||||
// why the file is called "parse.c" i don't know
|
||||
// it certainly wouldn't be a good idea to have several parsers in one file
|
||||
// so you may as well rename it into rss.c? TODO
|
||||
//
|
||||
#include <net.h>
|
||||
#include <xml.h>
|
||||
inherit NET_PATH "xml/common";
|
||||
|
||||
// DOM style XML parser
|
||||
xmlparse(a) {
|
||||
// this one is very similar to the jabber parser
|
||||
// from a syntax point of view
|
||||
string tag, data, params;
|
||||
int pos, close;
|
||||
int list;
|
||||
|
||||
XMLNode currentnode = 0;
|
||||
XMLNode nodestack = ({ });
|
||||
params = "";
|
||||
pos = 0;
|
||||
close = -1;
|
||||
|
||||
while(pos = strstr(a, "<", pos) + 1) {
|
||||
data = xmlunquote(a[close + 1..pos - 2]);
|
||||
close = strstr(a, ">", pos);
|
||||
|
||||
sscanf(a[pos..close - 1], "%s%t%s", tag, params) || tag = a[pos..close-1];
|
||||
if(tag == "") return -1;
|
||||
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?')){
|
||||
// P2(("skipping tag starting with ! or ?\n"))
|
||||
} else if (strlen(tag) && tag[0] == '/'){
|
||||
P4(("should be closing tag %O and am closing %O\n",
|
||||
currentnode[Tag], tag[1..]))
|
||||
if (!currentnode || currentnode[Tag] != tag[1..]) {
|
||||
// unbalanced xml?
|
||||
} else {
|
||||
// schliessender tag gefunden, die haben keine Parameter
|
||||
if (strlen(data) && data != "\r\n" && data != "\n"){
|
||||
// not sure if this works correct
|
||||
unless(pointerp(currentnode[Cdata]))
|
||||
currentnode[Cdata] = data;
|
||||
else
|
||||
currentnode[Cdata] += ({ data });
|
||||
}
|
||||
if (sizeof(nodestack) == 0) {
|
||||
// we can probably break/return here
|
||||
break;
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
}
|
||||
} else { // opening tag
|
||||
int selfclosing;
|
||||
mixed newnode;
|
||||
string key, val;
|
||||
|
||||
if (strlen(params) && params[<1] == '/') {
|
||||
params = params[..<2];
|
||||
selfclosing = 1;
|
||||
newnode = new_XMLNode;
|
||||
} else if (tag[<1] == '/') {
|
||||
tag = tag[..<2];
|
||||
selfclosing = 1;
|
||||
newnode = new_XMLNode;
|
||||
} else {
|
||||
newnode = new_XMLNode;
|
||||
}
|
||||
if(currentnode){
|
||||
nodestack += ({ currentnode });
|
||||
if (pointerp(currentnode[Child][tag])) {
|
||||
unless (nodelistp(currentnode[Child][tag])) {
|
||||
// tranform
|
||||
currentnode[Child][tag] = ({ currentnode[Child][tag], newnode });
|
||||
} else {
|
||||
// append
|
||||
currentnode[Child][tag] += ({ newnode });
|
||||
}
|
||||
currentnode = currentnode[Child][tag][<1];
|
||||
} else {
|
||||
currentnode[Child][tag] = newnode;
|
||||
currentnode = currentnode[Child][tag];
|
||||
}
|
||||
} else {
|
||||
currentnode = newnode;
|
||||
}
|
||||
currentnode[Tag] = tag;
|
||||
foreach(string pa: explode(params, " ")) {
|
||||
if(sscanf(pa, "%s=\"%s\"", key, val) == 2 ||
|
||||
sscanf(pa, "%s=\'%s\'", key, val) == 2 ) {
|
||||
currentnode[Param][key] = val;
|
||||
|
||||
}
|
||||
}
|
||||
if (selfclosing) {
|
||||
if (sizeof(nodestack) == 0){
|
||||
PT(("nodestack empty\n"))
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return currentnode;
|
||||
}
|
||||
|
110
world/net/xml/t/tmp
Normal file
110
world/net/xml/t/tmp
Normal file
|
@ -0,0 +1,110 @@
|
|||
// $Id: parse.c,v 1.12 2005/06/07 07:03:02 fippo Exp $ // vim:syntax=lpc
|
||||
//
|
||||
// the actual RSS parser
|
||||
// why the file is called "parse.c" i don't know
|
||||
// it certainly wouldn't be a good idea to have several parsers in one file
|
||||
// so you may as well rename it into rss.c? TODO
|
||||
//
|
||||
#include <net.h>
|
||||
#include <xml.h>
|
||||
inherit NET_PATH "xml/common";
|
||||
|
||||
// DOM style XML parser
|
||||
xmlparse(a) {
|
||||
// this one is very similar to the jabber parser
|
||||
// from a syntax point of view
|
||||
string tag, data, params;
|
||||
int pos, close;
|
||||
int list;
|
||||
|
||||
XMLNode currentnode = 0;
|
||||
XMLNode nodestack = ({ });
|
||||
params = "";
|
||||
pos = 0;
|
||||
close = -1;
|
||||
|
||||
while(pos = strstr(a, "<", pos) + 1) {
|
||||
data = xmlunquote(a[close + 1..pos - 2]);
|
||||
close = strstr(a, ">", pos);
|
||||
|
||||
sscanf(a[pos..close - 1], "%s%t%s", tag, params) || tag = a[pos..close-1];
|
||||
if(tag == "") return -1;
|
||||
if (strlen(tag) && (tag[0] == '!' || tag[0] == '?')){
|
||||
// P2(("skipping tag starting with ! or ?\n"))
|
||||
} else if (strlen(tag) && tag[0] == '/'){
|
||||
P4(("should be closing tag %O and am closing %O\n",
|
||||
currentnode[Tag], tag[1..]))
|
||||
if (!currentnode || currentnode[Tag] != tag[1..]) {
|
||||
// unbalanced xml?
|
||||
} else {
|
||||
// schliessender tag gefunden, die haben keine Parameter
|
||||
if (strlen(data) && data != "\r\n" && data != "\n"){
|
||||
// not sure if this works correct
|
||||
unless(pointerp(currentnode[Cdata]))
|
||||
currentnode[Cdata] = data;
|
||||
else
|
||||
currentnode[Cdata] += ({ data });
|
||||
}
|
||||
if (sizeof(nodestack) == 0) {
|
||||
// we can probably break/return here
|
||||
break;
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
}
|
||||
} else { // opening tag
|
||||
int selfclosing;
|
||||
mixed newnode;
|
||||
string key, val;
|
||||
|
||||
if (strlen(params) && params[<1] == '/') {
|
||||
params = params[..<2];
|
||||
selfclosing = 1;
|
||||
newnode = new_XMLNode;
|
||||
} else if (tag[<1] == '/') {
|
||||
tag = tag[..<2];
|
||||
selfclosing = 1;
|
||||
newnode = new_XMLNode;
|
||||
} else {
|
||||
newnode = new_XMLNode;
|
||||
}
|
||||
if(currentnode){
|
||||
nodestack += ({ currentnode });
|
||||
if (pointerp(currentnode[Child][tag])) {
|
||||
unless (nodelistp(currentnode[Child][tag])) {
|
||||
// tranform
|
||||
currentnode[Child][tag] = ({ currentnode[Child][tag], newnode });
|
||||
} else {
|
||||
// append
|
||||
currentnode[Child][tag] += ({ newnode });
|
||||
}
|
||||
currentnode = currentnode[Child][tag][<1];
|
||||
} else {
|
||||
currentnode[Child][tag] = newnode;
|
||||
currentnode = currentnode[Child][tag];
|
||||
}
|
||||
} else {
|
||||
currentnode = newnode;
|
||||
}
|
||||
currentnode[Tag] = tag;
|
||||
foreach(string pa: explode(params, " ")) {
|
||||
if(sscanf(pa, "%s=\"%s\"", key, val) == 2 ||
|
||||
sscanf(pa, "%s=\'%s\'", key, val) == 2 ) {
|
||||
currentnode[Param][key] = val;
|
||||
|
||||
}
|
||||
}
|
||||
if (selfclosing) {
|
||||
if (sizeof(nodestack) == 0){
|
||||
PT(("nodestack empty\n"))
|
||||
} else {
|
||||
currentnode = nodestack[<1];
|
||||
nodestack = nodestack[..<2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return currentnode;
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue