Merge commit 'origin'

2024-08-15 03:19:02 +00:00 · 2011-05-24 20:15:52 +02:00 · 2011-05-24 20:15:52 +02:00 · c582d495b8
commit c582d495b8
parent e3af43deda 66bb15fc37
11 changed files with 156 additions and 82 deletions
--- a/bench/.gitignore
+++ b/bench/.gitignore
@ -1,3 +1,4 @@
 *.html
 *.pdf
 results/
+packets/binary/[0-9]*
--- a/bench/Makefile
+++ b/bench/Makefile
@ -7,7 +7,7 @@ INIT = (setq load-path (cons \"/usr/share/emacs/site-lisp/org-mode\" load-path)
             org-babel-tangle-pad-newline nil \
             org-src-preserve-indentation t) \
       (require 'org-install)
-
+#'
 ORG = benchmark.org

 html:
--- a/bench/benchmark.org
+++ b/bench/benchmark.org
@ -25,8 +25,9 @@ Here's a way to model this in PSYC:

 ** A message with JSON-unfriendly characters
 This message contains some characters which are
-impractical to encode in JSON. Let's see how much
-performance impact this has.
+impractical to encode in JSON. We should probably
+put a lot more inside to actually see an impact
+on performance.

 #+INCLUDE: packets/json-unfriendly.xml src xml
 #+INCLUDE: packets/json-unfriendly.json src js
@ -34,7 +35,8 @@ performance impact this has.

 ** A message with XML-unfriendly characters
 Same test with characters which aren't practical
-in the XML syntax.
+in the XML syntax, yet we should put more of
+them inside.

 #+INCLUDE: packets/xml-unfriendly.xml src xml
 #+INCLUDE: packets/xml-unfriendly.json src js
@ -129,26 +131,90 @@ We'll use the latter here:
 Parsing time of 1 000 000 packets, in milliseconds.
 a simple strlen scan of the respective message is provided for comparison.

-| input:    |   PSYC |         |   JSON |           |            |    XML |          |
-| parser:   | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml |
-|-----------+--------+---------+--------+-----------+------------+--------+----------|
-| user prof |     55 |     608 |   4715 |     17468 |       7350 |  12377 |     2477 |
-|-----------+--------+---------+--------+-----------+------------+--------+----------|
-| /         |      < |       > |      < |         > |          < |        |        > |
+| input:          |   PSYC |         |   JSON |           |            |    XML |          |
+| parser:         | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml |
+|-----------------+--------+---------+--------+-----------+------------+--------+----------|
+| user profile    |     55 |     608 |   4715 |     16503 |       7350 |  12377 |     2477 |
+| psyc-unfriendly |     70 |     286 |   2892 |     12567 |       5538 |   8659 |     1896 |
+| json-unfriendly |     49 |     430 |   2328 |     10006 |       5141 |   7875 |     1751 |
+| xml-unfriendly  |     37 |     296 |   2156 |      9591 |       5571 |   8769 |     1765 |
+|-----------------+--------+---------+--------+-----------+------------+--------+----------|
+| /               |      < |       > |      < |         > |          < |        |        > |
+|                 |    <r> |     <r> |    <r> |       <r> |        <r> |    <r> |      <r> |

 Pure syntax comparisons above, protocol performance comparisons below:

-| input:    |   PSYC |         |   JSON |           |            |   XMPP |          |
-| parser:   | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml |
-|-----------+--------+---------+--------+-----------+------------+--------+----------|
-| presence  |     30 |     246 |   2463 |     10197 |       4997 |   7557 |     1719 |
-| chat msg  |     41 |     320 |        |           |       5997 |   9777 |     1893 |
-| activity  |     42 |     366 |   4666 |     16846 |      13357 |  28858 |     4419 |
-|-----------+--------+---------+--------+-----------+------------+--------+----------|
-| /         |      < |       > |      < |         > |          < |        |        > |
+| input:          |   PSYC |         |   JSON |           |            |   XMPP |          |
+| parser:         | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml |
+|-----------------+--------+---------+--------+-----------+------------+--------+----------|
+| presence        |     30 |     236 |   2463 |     10016 |       4997 |   7557 |     1719 |
+| chat msg        |     40 |     295 |   2147 |      9526 |       5997 |   9777 |     1893 |
+| activity        |     42 |     353 |   4666 |     16327 |      13357 |  28858 |     4356 |
+|-----------------+--------+---------+--------+-----------+------------+--------+----------|
+| /               |      < |       > |      < |         > |          < |        |        > |
+
+Parsing large amounts of binary data. For JSON & XML base64 encoding was used.
+Note that the results below include only the parsing time, base64 decoding was
+not performed.
+
+| input:  |   PSYC |         |   JSON |            |            |       XML |          |
+| parser: | strlen | libpsyc | json-c |  json-glib | libxml sax |    libxml | rapidxml |
+|---------+--------+---------+--------+------------+------------+-----------+----------|
+| 7K      |     92 |      77 |  14459 |      98000 |      11445 |     19299 |     8701 |
+| 70K     |     53 |      77 |  14509 |    1003900 |      96209 |    167738 |    74296 |
+| 700K    |     42 |      77 |  14551 |   10616000 |     842025 |   1909428 |   729419 |
+| 7M      |    258 |      78 |  14555 |  120810000 |   12466610 |  16751363 |  7581169 |
+| 70M     |    304 |      80 |  14534 | 1241000000 |  169622110 | 296017820 | 75308906 |
+|---------+--------+---------+--------+------------+------------+-----------+----------|
+| /       |      < |       > |      < |          > |          < |           |        > |
+| <r>     |        |         |        |            |            |           |          |
+

 These tests were performed on a 2.53 GHz Intel(R) Core(TM)2 Duo P9500 CPU.

+* Criticism
+
+Are we comparing apples and oranges? Yes and no, depends on what you
+need. XML is a syntax best suited for complex structured data in
+well-defined formats - especially good for text mark-up. JSON is a syntax
+intended to hold arbitrarily structured data suitable for immediate
+inclusion in javascript source codes. The PSYC syntax is an evolved
+derivate of RFC 822, the syntax used by HTTP and E-Mail, and is therefore
+limited in the kind and depth of data structures that can be represented
+with it, but in exchange it is highly performant at doing just that.
+In fact we are looking into suitable syntax extensions to represent
+generic structures and semantic signatures, but for now PSYC only
+provides for simple typed values and lists of typed values.
+
+Another aspect is the availability of these formats for spontaneous
+use. You could generate and parse JSON yourself but you have to be
+careful about escaping. XML can be rendered manually if you know your
+data will not break the syntax, but you can't really parse it without
+a bullet proof parser. PSYC is easy to render and parse yourself for
+simple tasks, as long as your body does not contain "\n|\n" and your
+variables do not contain newlines.
+
+After all it is up to you to find out which format fulfils your
+requirements the best. We use PSYC for the majority of messaging where
+JSON and XMPP aren't efficient and opaque enough, but we employ XML and
+JSON as payloads within PSYC for data that doesn't fit the PSYC model.
+For some reason all three formats are being used for messaging, although
+only PSYC was actually designed for that purpose.
+
+* Caveats
+
+In every case we'll compare performance of parsing and re-rendering
+these messages, but consider also that the applicative processing
+of an XML DOM tree is more complicated than just accessing
+certain elements in a JSON data structure or PSYC variable
+mapping.
+
+For a speed check in real world conditions which also consider the
+complexity of processing incoming messages we should compare
+the performance of a chat client using the two protocols,
+for instance by using libpurple with XMPP and PSYC accounts.
+To this purpose we first need to integrate libpsyc into libpurple.
+
 * Conclusions

 The Internet has developed two major breeds of protocol formats.
@ -166,46 +232,6 @@ combines the compactness and efficiency of binary protocols with the
 extensibility of text-based protocols and still provides for enough
 data structuring to rarely require the use of other data formats.

-* Criticism
-
-Are we comparing apples and oranges? Yes and no, depends on what you
-need. XML is a syntax best suited for complex structured data in
-well-defined formats - especially good for text mark-up. JSON is a syntax
-intended to hold arbitrarily structured data suitable for immediate
-inclusion in javascript source codes. The PSYC syntax is an evolved
-derivate of RFC 822, the syntax used by HTTP and E-Mail, and is therefore
-limited in the kind and depth of data structures that can be represented
-with it, but in exchange it is highly performant at doing just that.
-
-So it is up to you to find out which format fulfils your
-requirements the best. We use PSYC for the majority of messaging where
-JSON and XMPP aren't efficient and opaque enough, but we employ XML and
-JSON as payloads within PSYC for data that doesn't fit the PSYC model.
-For some reason all three formats are being used for messaging, although
-only PSYC was actually designed for that purpose.
-
-Another aspect is the availability of these formats for spontaneous
-use. You could generate and parse JSON yourself but you have to be
-careful about escaping. XML can be rendered manually if you know your
-data will not break the syntax, but you can't really parse it without
-a bullet proof parser. PSYC is easy to render and parse yourself for
-simple tasks, as long as your body does not contain "\n|\n" and your
-variables do not contain newlines.
-
-* Caveats
-
-In every case we'll compare performance of parsing and re-rendering
-these messages, but consider also that the applicative processing
-of an XML DOM tree is more complicated than just accessing
-certain elements in a JSON data structure or PSYC variable
-mapping.
-
-For a speed check in real world conditions which also consider the
-complexity of processing incoming messages we should compare
-the performance of a chat client using the two protocols,
-for instance by using libpurple with XMPP and PSYC accounts.
-To this purpose we first need to integrate libpsyc into libpurple.
-
 * Futures

 After a month of development libpsyc is already performing pretty
@ -232,17 +258,19 @@ specialized parsers and renderers to be provided.
 * Appendix
 ** Tools used

-libpsyc:
+*** libpsyc
+
+: make bench
+
+which uses the following commands:

 : test/testStrlen -sc 1000000 -f $file
 : test/testPsycSpeed -sc 1000000 -f $file
 : test/testJson -snc 1000000 -f $file
 : test/testJsonGlib -snc 1000000 -f $file

-xmlbench:
+*** xmlbench

 : parse/libxml-sax 1000000 $file
 : parse/libxml 1000000 $file
 : parse/rapidxml 1000000 $file
-
-See also "make bench"
--- a/bench/packets/binary/json-footer
+++ b/bench/packets/binary/json-footer
@ -0,0 +1 @@
+"}
--- a/bench/packets/binary/json-header
+++ b/bench/packets/binary/json-header
@ -0,0 +1 @@
+{"data":"
--- a/bench/packets/binary/psyc-content
+++ b/bench/packets/binary/psyc-content
@ -0,0 +1,2 @@
+:_foo	bar
+_data
--- a/bench/packets/binary/psyc-header
+++ b/bench/packets/binary/psyc-header
--- a/bench/packets/binary/xml-footer
+++ b/bench/packets/binary/xml-footer
@ -0,0 +1 @@
+</data>
--- a/bench/packets/binary/xml-header
+++ b/bench/packets/binary/xml-header
@ -0,0 +1 @@
+<data>
--- a/test/Makefile
+++ b/test/Makefile
@ -50,23 +50,6 @@ test: ${TARGETS}
 	x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -f $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x
 	x=0; for f in packets/[0-9]*; do echo ">> $$f"; ./testPsyc -rf $$f | ${DIFF} -u $$f -; x=$$((x+$$?)); done; exit $$x

-bench: bench-psyc bench-json bench-xml
-
-bench-dir:
-	@mkdir -p ../bench/results
-
-bench-psyc: bench-dir testStrlen testPsycSpeed
-	for f in ../bench/packets/*.psyc; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
-	for f in ../bench/packets/*.psyc; do bf=`basename $$f`; echo libpsyc: $$f; ./testPsycSpeed -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done
-
-bench-json: bench-dir testStrlen testJson testJsonGlib
-	for f in ../bench/packets/*.json; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
-	for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-c: $$bf; ./testJson -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done
-	for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-glib: $$bf; ./testJsonGlib -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf-glib; done
-
-bench-xml: bench-dir testStrlen
-	for f in ../bench/packets/*.xml; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
-
 .NOTPARALLEL: nettestrun

 nettest: nettestfull nettestsplit
@ -96,3 +79,60 @@ srvstart:

 srvkill:
 	pkill -x testPsyc
+
+
+bench: bench-genpkts bench-psyc bench-json bench-xml
+
+bench-dir:
+	@mkdir -p ../bench/results
+
+bench-psyc: bench-dir testStrlen testPsycSpeed
+	for f in ../bench/packets/*.psyc ../bench/packets/binary/*.psyc; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
+	for f in ../bench/packets/*.psyc ../bench/packets/binary/*.psyc; do bf=`basename $$f`; echo libpsyc: $$f; ./testPsycSpeed -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done
+
+bench-json: bench-dir testStrlen testJson testJsonGlib
+	for f in ../bench/packets/*.json ../bench/packets/binary/*.json; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
+	for f in ../bench/packets/*.json ../bench/packets/binary/*.json; do bf=`basename $$f`; echo json-c: $$bf; ./testJson -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf; done
+	for f in ../bench/packets/*.json; do bf=`basename $$f`; echo json-glib: $$bf; ./testJsonGlib -snc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf-glib; done
+
+bench-xml: bench-dir testStrlen
+	for f in ../bench/packets/*.xml ../bench/packets/binary/*.xml; do bf=`basename $$f`; echo strlen: $$bf; ./testStrlen -sc 1000000 -f $$f | ${TEE} -a ../bench/results/$$bf.strlen; done
+
+bench-genpkts:
+	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000 of=../bench/packets/binary/7K.psyc
+	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=70000 of=../bench/packets/binary/70K.psyc
+	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=700000 of=../bench/packets/binary/700K.psyc
+	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 of=../bench/packets/binary/7000K.psyc
+	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 count=10 of=../bench/packets/binary/70000K.psyc
+#	@${MAKE} genpkt header=../bench/packets/binary/psyc-header content=../bench/packets/binary/psyc-content bs=7000000 count=100 of=../bench/packets/binary/700000K.psyc
+
+	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000 of=../bench/packets/binary/7K.json
+	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=70000 of=../bench/packets/binary/70K.json
+	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=700000 of=../bench/packets/binary/700K.json
+	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 of=../bench/packets/binary/7000K.json
+	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 count=10 of=../bench/packets/binary/70000K.json
+#	@${MAKE} genb64 header=../bench/packets/binary/json-header footer=../bench/packets/binary/json-footer bs=7000000 count=100 of=../bench/packets/binary/700000K.json
+
+	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000 of=../bench/packets/binary/7K.xml
+	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=70000 of=../bench/packets/binary/70K.xml
+	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=700000 of=../bench/packets/binary/700K.xml
+	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 of=../bench/packets/binary/7000K.xml
+	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 count=10 of=../bench/packets/binary/70000K.xml
+#	@${MAKE} genb64 header=../bench/packets/binary/xml-header footer=../bench/packets/binary/xml-footer bs=7000000 count=100 of=../bench/packets/binary/700000K.xml
+
+bs = 1
+count = 1
+genpkt:
+	@[[ -n "${of}" ]]
+	[[ -f "${of}" ]] || ( \
+		cat ${header} >${of}; \
+		perl -le 'my @se=stat(q('${content}')); print $$se[7] + ${bs} * ${count} + 1' >>${of}; \
+		cat ${content} >>${of}; \
+		dd if=/dev/urandom of=${of} bs=${bs} count=${count} oflag=append conv=notrunc; \
+		echo -ne "\n|\n" >>${of} )
+
+bs = 1
+count = 1
+genb64:
+	@[[ -n "${of}" ]]
+	[[ -f "${of}" ]] || (cat ${header} >${of}; dd if=/dev/urandom bs=${bs} count=${count} | base64 -w0 >>${of}; cat ${footer} >>${of})
--- a/test/testJsonGlib.c
+++ b/test/testJsonGlib.c
@ -32,7 +32,7 @@ void test_init (int i) {

 int test_input (int i, char *recvbuf, size_t nbytes) {
 	JsonNode *root;
-	GError *error;
+	GError *error = NULL;
 	char *str;
 	size_t len;
 	int r, ret;
@ -41,8 +41,7 @@ int test_input (int i, char *recvbuf, size_t nbytes) {

 	if (!ret) {
 		printf("Parse error\n");
-		exit_code = 1;
-		return -1;
+		exit(1);
 	}

 	root = json_parser_get_root(parser);