diff --git a/bench/.gitignore b/bench/.gitignore index bffe52f..fd2f6c3 100644 --- a/bench/.gitignore +++ b/bench/.gitignore @@ -1,3 +1,4 @@ *.html *.pdf results/ +packets/binary/[0-9]* diff --git a/bench/Makefile b/bench/Makefile index e263ac0..cecdfe3 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -7,7 +7,7 @@ INIT = (setq load-path (cons \"/usr/share/emacs/site-lisp/org-mode\" load-path) org-babel-tangle-pad-newline nil \ org-src-preserve-indentation t) \ (require 'org-install) - +#' ORG = benchmark.org html: diff --git a/bench/benchmark.org b/bench/benchmark.org index 3bcce3b..6d1c2a7 100644 --- a/bench/benchmark.org +++ b/bench/benchmark.org @@ -25,8 +25,9 @@ Here's a way to model this in PSYC: ** A message with JSON-unfriendly characters This message contains some characters which are -impractical to encode in JSON. Let's see how much -performance impact this has. +impractical to encode in JSON. We should probably +put a lot more inside to actually see an impact +on performance. #+INCLUDE: packets/json-unfriendly.xml src xml #+INCLUDE: packets/json-unfriendly.json src js @@ -34,7 +35,8 @@ performance impact this has. ** A message with XML-unfriendly characters Same test with characters which aren't practical -in the XML syntax. +in the XML syntax, yet we should put more of +them inside. #+INCLUDE: packets/xml-unfriendly.xml src xml #+INCLUDE: packets/xml-unfriendly.json src js @@ -129,26 +131,90 @@ We'll use the latter here: Parsing time of 1 000 000 packets, in milliseconds. a simple strlen scan of the respective message is provided for comparison. -| input: | PSYC | | JSON | | | XML | | -| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| user prof | 55 | 608 | 4715 | 17468 | 7350 | 12377 | 2477 | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| / | < | > | < | > | < | | > | +| input: | PSYC | | JSON | | | XML | | +| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | +|-----------------+--------+---------+--------+-----------+------------+--------+----------| +| user profile | 55 | 608 | 4715 | 16503 | 7350 | 12377 | 2477 | +| psyc-unfriendly | 70 | 286 | 2892 | 12567 | 5538 | 8659 | 1896 | +| json-unfriendly | 49 | 430 | 2328 | 10006 | 5141 | 7875 | 1751 | +| xml-unfriendly | 37 | 296 | 2156 | 9591 | 5571 | 8769 | 1765 | +|-----------------+--------+---------+--------+-----------+------------+--------+----------| +| / | < | > | < | > | < | | > | +| | | | | | | | | Pure syntax comparisons above, protocol performance comparisons below: -| input: | PSYC | | JSON | | | XMPP | | -| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| presence | 30 | 246 | 2463 | 10197 | 4997 | 7557 | 1719 | -| chat msg | 41 | 320 | | | 5997 | 9777 | 1893 | -| activity | 42 | 366 | 4666 | 16846 | 13357 | 28858 | 4419 | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| / | < | > | < | > | < | | > | +| input: | PSYC | | JSON | | | XMPP | | +| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | +|-----------------+--------+---------+--------+-----------+------------+--------+----------| +| presence | 30 | 236 | 2463 | 10016 | 4997 | 7557 | 1719 | +| chat msg | 40 | 295 | 2147 | 9526 | 5997 | 9777 | 1893 | +| activity | 42 | 353 | 4666 | 16327 | 13357 | 28858 | 4356 | +|-----------------+--------+---------+--------+-----------+------------+--------+----------| +| / | < | > | < | > | < | | > | + +Parsing large amounts of binary data. For JSON & XML base64 encoding was used. +Note that the results below include only the parsing time, base64 decoding was +not performed. + +| input: | PSYC | | JSON | | | XML | | +| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | +|---------+--------+---------+--------+------------+------------+-----------+----------| +| 7K | 92 | 77 | 14459 | 98000 | 11445 | 19299 | 8701 | +| 70K | 53 | 77 | 14509 | 1003900 | 96209 | 167738 | 74296 | +| 700K | 42 | 77 | 14551 | 10616000 | 842025 | 1909428 | 729419 | +| 7M | 258 | 78 | 14555 | 120810000 | 12466610 | 16751363 | 7581169 | +| 70M | 304 | 80 | 14534 | 1241000000 | 169622110 | 296017820 | 75308906 | +|---------+--------+---------+--------+------------+------------+-----------+----------| +| / | < | > | < | > | < | | > | +| | | | | | | | | + These tests were performed on a 2.53 GHz Intel(R) Core(TM)2 Duo P9500 CPU. +* Criticism + +Are we comparing apples and oranges? Yes and no, depends on what you +need. XML is a syntax best suited for complex structured data in +well-defined formats - especially good for text mark-up. JSON is a syntax +intended to hold arbitrarily structured data suitable for immediate +inclusion in javascript source codes. The PSYC syntax is an evolved +derivate of RFC 822, the syntax used by HTTP and E-Mail, and is therefore +limited in the kind and depth of data structures that can be represented +with it, but in exchange it is highly performant at doing just that. +In fact we are looking into suitable syntax extensions to represent +generic structures and semantic signatures, but for now PSYC only +provides for simple typed values and lists of typed values. + +Another aspect is the availability of these formats for spontaneous +use. You could generate and parse JSON yourself but you have to be +careful about escaping. XML can be rendered manually if you know your +data will not break the syntax, but you can't really parse it without +a bullet proof parser. PSYC is easy to render and parse yourself for +simple tasks, as long as your body does not contain "\n|\n" and your +variables do not contain newlines. + +After all it is up to you to find out which format fulfils your +requirements the best. We use PSYC for the majority of messaging where +JSON and XMPP aren't efficient and opaque enough, but we employ XML and +JSON as payloads within PSYC for data that doesn't fit the PSYC model. +For some reason all three formats are being used for messaging, although +only PSYC was actually designed for that purpose. + +* Caveats + +In every case we'll compare performance of parsing and re-rendering +these messages, but consider also that the applicative processing +of an XML DOM tree is more complicated than just accessing +certain elements in a JSON data structure or PSYC variable +mapping. + +For a speed check in real world conditions which also consider the +complexity of processing incoming messages we should compare +the performance of a chat client using the two protocols, +for instance by using libpurple with XMPP and PSYC accounts. +To this purpose we first need to integrate libpsyc into libpurple. + * Conclusions The Internet has developed two major breeds of protocol formats. @@ -166,46 +232,6 @@ combines the compactness and efficiency of binary protocols with the extensibility of text-based protocols and still provides for enough data structuring to rarely require the use of other data formats. -* Criticism - -Are we comparing apples and oranges? Yes and no, depends on what you -need. XML is a syntax best suited for complex structured data in -well-defined formats - especially good for text mark-up. JSON is a syntax -intended to hold arbitrarily structured data suitable for immediate -inclusion in javascript source codes. The PSYC syntax is an evolved -derivate of RFC 822, the syntax used by HTTP and E-Mail, and is therefore -limited in the kind and depth of data structures that can be represented -with it, but in exchange it is highly performant at doing just that. - -So it is up to you to find out which format fulfils your -requirements the best. We use PSYC for the majority of messaging where -JSON and XMPP aren't efficient and opaque enough, but we employ XML and -JSON as payloads within PSYC for data that doesn't fit the PSYC model. -For some reason all three formats are being used for messaging, although -only PSYC was actually designed for that purpose. - -Another aspect is the availability of these formats for spontaneous -use. You could generate and parse JSON yourself but you have to be -careful about escaping. XML can be rendered manually if you know your -data will not break the syntax, but you can't really parse it without -a bullet proof parser. PSYC is easy to render and parse yourself for -simple tasks, as long as your body does not contain "\n|\n" and your -variables do not contain newlines. - -* Caveats - -In every case we'll compare performance of parsing and re-rendering -these messages, but consider also that the applicative processing -of an XML DOM tree is more complicated than just accessing -certain elements in a JSON data structure or PSYC variable -mapping. - -For a speed check in real world conditions which also consider the -complexity of processing incoming messages we should compare -the performance of a chat client using the two protocols, -for instance by using libpurple with XMPP and PSYC accounts. -To this purpose we first need to integrate libpsyc into libpurple. - * Futures After a month of development libpsyc is already performing pretty @@ -232,17 +258,19 @@ specialized parsers and renderers to be provided. * Appendix ** Tools used -libpsyc: +*** libpsyc + +: make bench + +which uses the following commands: : test/testStrlen -sc 1000000 -f $file : test/testPsycSpeed -sc 1000000 -f $file : test/testJson -snc 1000000 -f $file : test/testJsonGlib -snc 1000000 -f $file -xmlbench: +*** xmlbench : parse/libxml-sax 1000000 $file : parse/libxml 1000000 $file : parse/rapidxml 1000000 $file - -See also "make bench" diff --git a/bench/packets/binary/json-footer b/bench/packets/binary/json-footer new file mode 100644 index 0000000..6b822c2 --- /dev/null +++ b/bench/packets/binary/json-footer @@ -0,0 +1 @@ +"} \ No newline at end of file diff --git a/bench/packets/binary/json-header b/bench/packets/binary/json-header new file mode 100644 index 0000000..e045aed --- /dev/null +++ b/bench/packets/binary/json-header @@ -0,0 +1 @@ +{"data":" \ No newline at end of file diff --git a/bench/packets/binary/psyc-content b/bench/packets/binary/psyc-content new file mode 100644 index 0000000..7ab1318 --- /dev/null +++ b/bench/packets/binary/psyc-content @@ -0,0 +1,2 @@ +:_foo bar +_data diff --git a/bench/packets/binary/psyc-header b/bench/packets/binary/psyc-header new file mode 100644 index 0000000..e69de29 diff --git a/bench/packets/binary/xml-footer b/bench/packets/binary/xml-footer new file mode 100644 index 0000000..f09e8d4 --- /dev/null +++ b/bench/packets/binary/xml-footer @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/bench/packets/binary/xml-header b/bench/packets/binary/xml-header new file mode 100644 index 0000000..3f35021 --- /dev/null +++ b/bench/packets/binary/xml-header @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/Makefile b/test/Makefile index 66c2445..f7f49ea 100644 --- a/test/Makefile +++ b/test/Makefile @@ -50,23 +50,6 @@ test: ${TARGETS} x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x -bench: bench-psyc bench-json bench-xml - -bench-dir: - @mkdir -p ../bench/results - -bench-psyc: bench-dir testStrlen testPsycSpeed - for f in ../bench/packets/*.psyc; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done - for f in ../bench/packets/*.psyc; do bf=`basename $$f`; echo libpsyc: $$f; ./testPsycSpeed -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done - -bench-json: bench-dir testStrlen testJson testJsonGlib - for f in ../bench/packets/*.json; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done - for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-c: $$bf; ./testJson -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done - for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-glib: $$bf; ./testJsonGlib -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf-glib; done - -bench-xml: bench-dir testStrlen - for f in ../bench/packets/*.xml; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done - .NOTPARALLEL: nettestrun nettest: nettestfull nettestsplit @@ -96,3 +79,60 @@ srvstart: srvkill: pkill -x testPsyc + + +bench: bench-genpkts bench-psyc bench-json bench-xml + +bench-dir: + @mkdir -p ../bench/results + +bench-psyc: bench-dir testStrlen testPsycSpeed + for f in ../bench/packets/*.psyc ../bench/packets/binary/*.psyc; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done + for f in ../bench/packets/*.psyc ../bench/packets/binary/*.psyc; do bf=`basename $$f`; echo libpsyc: $$f; ./testPsycSpeed -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done + +bench-json: bench-dir testStrlen testJson testJsonGlib + for f in ../bench/packets/*.json ../bench/packets/binary/*.json; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done + for f in ../bench/packets/*.json ../bench/packets/binary/*.json; do bf=`basename $$f`; echo json-c: $$bf; ./testJson -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done + for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-glib: $$bf; ./testJsonGlib -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf-glib; done + +bench-xml: bench-dir testStrlen + for f in ../bench/packets/*.xml ../bench/packets/binary/*.xml; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done + +bench-genpkts: + @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000 of=../bench/packets/binary/7K.psyc + @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=70000 of=../bench/packets/binary/70K.psyc + @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=700000 of=../bench/packets/binary/700K.psyc + @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 of=../bench/packets/binary/7000K.psyc + @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 count=10 of=../bench/packets/binary/70000K.psyc +# @${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 count=100 of=../bench/packets/binary/700000K.psyc + + @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000 of=../bench/packets/binary/7K.json + @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=70000 of=../bench/packets/binary/70K.json + @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=700000 of=../bench/packets/binary/700K.json + @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 of=../bench/packets/binary/7000K.json + @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 count=10 of=../bench/packets/binary/70000K.json +# @${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 count=100 of=../bench/packets/binary/700000K.json + + @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000 of=../bench/packets/binary/7K.xml + @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=70000 of=../bench/packets/binary/70K.xml + @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=700000 of=../bench/packets/binary/700K.xml + @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 of=../bench/packets/binary/7000K.xml + @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 count=10 of=../bench/packets/binary/70000K.xml +# @${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 count=100 of=../bench/packets/binary/700000K.xml + +bs = 1 +count = 1 +genpkt: + @[[ -n "${of}" ]] + [[ -f "${of}" ]] || ( \ + cat ${header} >${of}; \ + perl -le 'my @se=stat(q('${content}')); print $$se[7] + ${bs} * ${count} + 1' >>${of}; \ + cat ${content} >>${of}; \ + dd if=/dev/urandom of=${of} bs=${bs} count=${count} oflag=append conv=notrunc; \ + echo -ne "\n|\n" >>${of} ) + +bs = 1 +count = 1 +genb64: + @[[ -n "${of}" ]] + [[ -f "${of}" ]] || (cat ${header} >${of}; dd if=/dev/urandom bs=${bs} count=${count} | base64 -w0 >>${of}; cat ${footer} >>${of}) diff --git a/test/testJsonGlib.c b/test/testJsonGlib.c index 91fe9b5..2ef2e3e 100644 --- a/test/testJsonGlib.c +++ b/test/testJsonGlib.c @@ -32,7 +32,7 @@ void test_init (int i) { int test_input (int i, char *recvbuf, size_t nbytes) { JsonNode *root; - GError *error; + GError *error = NULL; char *str; size_t len; int r, ret; @@ -41,8 +41,7 @@ int test_input (int i, char *recvbuf, size_t nbytes) { if (!ret) { printf("Parse error\n"); - exit_code = 1; - return -1; + exit(1); } root = json_parser_get_root(parser);