diff --git a/bench/.gitignore b/bench/.gitignore index f93db6e..23f832b 100644 --- a/bench/.gitignore +++ b/bench/.gitignore @@ -1,3 +1,2 @@ *.html *.pdf -packets/ diff --git a/bench/Makefile b/bench/Makefile index 2aa0ca3..e263ac0 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -8,22 +8,7 @@ INIT = (setq load-path (cons \"/usr/share/emacs/site-lisp/org-mode\" load-path) org-src-preserve-indentation t) \ (require 'org-install) -ORG = benchmark.org results.org - -wiki2org: - perl -pe '\ - s/^= (.*) =\s*$$/#+TITLE: $$1\n/; \ - s/^== (.*) ==\s*$$/* $$1/; \ - s/^=== (.*) ===\s*$$/** $$1/; \ - s/^{{{/#+BEGIN_SRC/; \ - s/^}}}/#+END_SRC/ \ - ' benchmark.wiki >benchmark.org - -packets: - emacs -Q --batch --eval \ - "(progn ${INIT} (find-file \"benchmark.org\") \ - (org-babel-tangle) (kill-buffer))" - perl -pi -e 'print "\n" unless $$p; $$p=1' packets/user_profile.psyc +ORG = benchmark.org html: for f in ${ORG}; do \ diff --git a/bench/benchmark.org b/bench/benchmark.org index 7532afa..20d1c9c 100644 --- a/bench/benchmark.org +++ b/bench/benchmark.org @@ -19,30 +19,17 @@ Since presence packets are by far the dominant messaging content in the XMPP network, we'll start with one of them. Here's an example from paragraph 4.4.2 of RFC 6121. -#+BEGIN_SRC xml :tangle packets/presence.xml - - away - -#+END_SRC +#+INCLUDE: packets/presence.xml src xml And here's the same information in a JSON rendition: -#+BEGIN_SRC js :tangle packets/presence.json -["presence",{"from":"juliet@example.com/balcony","to":"benvolio@example.net"},{"show":"away"}] -#+END_SRC +#INCLUDE: packets/presence.json src js Here's the equivalent PSYC packet in verbose form (since it is a multicast, the single recipients do not need to be mentioned): -#+BEGIN_SRC psyc :tangle packets/presence.psyc -:_context psyc://example.com/~juliet - -=_degree_availability 4 -_notice_presence -| -#+END_SRC +#+INCLUDE: packets/presence.psyc src psyc And the same in compact form: @@ -58,133 +45,38 @@ np XML: -#+BEGIN_SRC xml :tangle packets/chat_msg.xml - - Art thou not Romeo, and a Montague? - -#+END_SRC +#+INCLUDE: packets/chat_msg.xml src xml PSYC: -#+BEGIN_SRC psyc :tangle packets/chat_msg.psyc -:_source psyc://example.com/~juliet -:_target psyc://example.net/~romeo - -_message_private -Art thou not Romeo, and a Montague? -| -#+END_SRC +#+INCLUDE: packets/chat_msg.psyc src psyc ** A new status updated activity Example taken from http://onesocialweb.org/spec/1.0/osw-activities.html You could call this XML namespace hell: -#+BEGIN_SRC xml :tangle packets/activity.xml - - - - - - to be or not to be ? - http://activitystrea.ms/schema/1.0/post - - http://onesocialweb.org/spec/1.0/object/status - to be or not to be ? - - - - http://onesocialweb.org/spec/1.0/acl/action/view - - - - - - - - -#+END_SRC +#+INCLUDE: packets/activity.xml src xml http://activitystrea.ms/head/json-activity.html proposes a JSON encoding of this. We'll have to add a routing header to it. -#+BEGIN_SRC js :tangle packets/activity.json -["activity",{"from":"hamlet@denmark.lit/snsclient"},{"verb":"post", -"title":"to be or not to be ?","object":{"type":"status", -"content":"to be or not to be ?","contentType":"text/plain"}}] -#+END_SRC +#+INCLUDE: packets/activity.json src js http://about.psyc.eu/Activity suggests a PSYC mapping for activity streams. Should a "status post" be considered equivalent to a presence description announcement or just a message in the "microblogging" channel? We'll use the latter here: -#+BEGIN_SRC psyc :tangle packets/activity.psyc -:_context psyc://denmark.lit/~hamlet#_follow - -:_subject to be or not to be ? -:_type_content text/plain -_message -to be or not to be ? -| -#+END_SRC +#+INCLUDE: packets/activity.psyc src psyc ** A message with JSON-unfriendly characters -#+BEGIN_SRC xml :tangle packets/json-unfriendly.xml - - "Neither, fair saint, if either thee dislike.", he said. -And -the -rest -is -history. - -#+END_SRC +#+INCLUDE: packets/json-unfriendly.xml src xml ** A message with XML-unfriendly characters -#+BEGIN_SRC xml :tangle packets/xml-unfriendly.xml - - Wherefore art thou, Romeo? - - PročeŽ jsi ty, Romeo? - - -#+END_SRC +#+INCLUDE: packets/xml-unfriendly.xml src xml ** A message with PSYC-unfriendly strings -#+BEGIN_SRC xml :tangle packets/psyc-unfriendly.xml - - I implore you with a pointless -newline in a header variable - Wherefore art thou, Romeo? -| -And for practicing purposes we added a PSYC packet delimiter. - -#+END_SRC +#+INCLUDE: packets/psyc-unfriendly.xml src xml ** A packet containing a JPEG photograph ... TBD ... @@ -194,41 +86,31 @@ In this test we'll not consider XMPP at all and simply compare the efficiency of the three syntaxes at serializing a typical user data base storage information. We'll again start with XML: -#+BEGIN_SRC xml :tangle packets/user_profile.xml - - Silvio Berlusconi - Premier - I -
- Via del Colosseo, 1 - 00100 - Roma -
- http://example.org -
-#+END_SRC +#+INCLUDE: packets/user_profile.xml src xml In JSON this would look like this: -#+BEGIN_SRC js :tangle packets/user_profile.json -["UserProfile",{"Name":"Silvio Berlusconi","JobTitle":"Premier","Country":"I","Address": -{"Street":"Via del Colosseo, 1","PostalCode":"00100","City":"Roma"},"Page":"http://example.org"}] -#+END_SRC +#+INCLUDE: packets/user_profile.json src js Here's a way to model this in PSYC: -#+BEGIN_SRC psyc :tangle packets/user_profile.psyc +#+INCLUDE: packets/user_profile.psyc src psyc -:_name Silvio Berlusconi -:_title_job Premier -:_country I -:_address_street Via del Colosseo, 1 -:_address_code_postal 00100 -:_address_city Roma -:_page http://example.org -_profile_user -| -#+END_SRC +* Results + +Parsing time of 1 000 000 packets in milliseconds: + +| input: | PSYC | | JSON | | | XML | | +| parser: | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | +|-----------+--------+---------+--------+-----------+------------+--------+----------| +| presence | 30 | 246 | 2463 | 10197 | 4997 | 7557 | 1719 | +| chat msg | 41 | 320 | | | 5997 | 9777 | 1893 | +| activity | 42 | 366 | 4666 | 16846 | 13357 | 28858 | 4419 | +| user prof | 55 | 608 | 4715 | 17468 | 7350 | 12377 | 2477 | +|-----------+--------+---------+--------+-----------+------------+--------+----------| +| / | < | > | < | > | < | | > | + +These tests were performed on a 2.53 GHz Intel(R) Core(TM)2 Duo P9500 CPU. * Conclusions ... TBD ... @@ -268,3 +150,19 @@ After a month of development libpsyc is already performing pretty well, but we presume various optimizations, like rewriting parts in assembler, are possible. + +* Appendix +** Tools used + +libpsyc: + +: test/testStrlen -sc 1000000 -f $file +: test/testPsycSpeed -sc 1000000 -f $file +: test/testJson -snc 1000000 -f $file +: test/testJsonGlib -snc 1000000 -f $file + +xmlbench: + +: parse/libxml-sax 1000000 $file +: parse/libxml 1000000 $file +: parse/rapidxml 1000000 $file diff --git a/bench/benchmark.wiki b/bench/benchmark.wiki deleted file mode 100644 index 498b76d..0000000 --- a/bench/benchmark.wiki +++ /dev/null @@ -1,269 +0,0 @@ -= libpsyc Performance Benchmarks = - -In this document we present the results of performance benchmarks -of libpsyc compared with libjson-glib and libxml2. - -== Procedure == - -We'll use typical messages from the XMPP ("stanzas" in Jabber -lingo) and compare them with equivalent JSON encodings, -verbose and compact PSYC formats. - -In some cases we will additionally compare PSYC packets to -a more efficient XML encoding based on PSYC methods, to have -a more accurate comparison of the actual PSYC and XML -syntaxes, rather than the protocol structures of PSYC and XMPP. - -== The Benchmarks == - -=== A presence packet === - -Since presence packets are by far the dominant messaging content -in the XMPP network, we'll start with one of them. -Here's an example from paragraph 4.4.2 of RFC 6121. - -{{{ - - away - -}}} - -And here's the same information in a JSON rendition: - -{{{ -["presence",{"from":"juliet@example.com/balcony","to":"benvolio@example.net"},{"show":"away"}] -}}} - -Here's the equivalent PSYC packet in verbose form -(since it is a multicast, the single recipients do not -need to be mentioned): - -{{{ -:_context psyc://example.com/~juliet - -=_degree_availability 4 -_notice_presence -| -}}} - -And the same in compact form: - -{{{ -:c psyc://example.com/~juliet - -=da 4 -np -| -}}} - -=== An average chat message === - -{{{ - - Art thou not Romeo, and a Montague? - -}}} - -=== A new status updated activity === - -Example taken from http://onesocialweb.org/spec/1.0/osw-activities.html -You could call this XML namespace hell: - -{{{ - - - - - - to be or not to be ? - http://activitystrea.ms/schema/1.0/post - - http://onesocialweb.org/spec/1.0/object/status - to be or not to be ? - - - - http://onesocialweb.org/spec/1.0/acl/action/view - - - - - - - - -}}} - -http://activitystrea.ms/head/json-activity.html proposes a JSON encoding -of this. We'll have to add a routing header to it. - -{{{ -["activity",{"from":"hamlet@denmark.lit/snsclient"},{"verb":"post", -"title":"to be or not to be ?","object":{"type":"status", -"content":"to be or not to be ?","contentType":"text/plain"}] -}}} - -http://about.psyc.eu/Activity suggests a PSYC mapping for activity -streams. Should a "status post" be considered equivalent to a presence -description announcement or just a message in the "microblogging" channel? -We'll use the latter here: - -{{{ -:_context psyc://denmark.lit/~hamlet#_follow - -:_subject to be or not to be ? -:_type_content text/plain -_message -to be or not to be ? -| -}}} - -=== A message with JSON-unfriendly characters === - -{{{ - - "Neither, fair saint, if either thee dislike.", he said. -And -the -rest -is -history. - -}}} - -=== A message with XML-unfriendly characters === - -{{{ - - Wherefore art thou, Romeo? - - PročeŽ jsi ty, Romeo? - - -}}} - -=== A message with PSYC-unfriendly strings === - -{{{ - - I implore you with a pointless -newline in a header variable - Wherefore art thou, Romeo? -| -And for practicing purposes we added a PSYC packet delimiter. - -}}} - -=== A packet containing a JPEG photograph === - -... TBD ... - -=== A random data structure === - -In this test we'll not consider XMPP at all and simply compare the -efficiency of the three syntaxes at serializing a typical user data base -storage information. We'll again start with XML: - -{{{ - - Silvio Berlusconi - Premier - I -
- Via del Colosseo, 1 - 00100 - Roma -
- http://example.org -
-}}} - -In JSON this would look like this: - -{{{ -["UserProfile",{"Name":"Silvio Berlusconi","JobTitle":"Premier","Country":"I","Address": -{"Street":"Via del Colosseo, 1","PostalCode":"00100","City":"Roma"},"Page":"http://example.org"}] -}}} - -Here's a way to model this in PSYC: - -{{{ -:_name Silvio Berlusconi -:_title_job Premier -:_country I -:_address_street Via del Colosseo, 1 -:_address_code_postal 00100 -:_address_city Roma -:_page http://example.org -_profile_user -| -}}} - -== Conclusions == - -... TBD ... - -== Criticism == - -Are we comparing apples and oranges? Yes and no, depends on what you -need. XML is a syntax best suited for complex structured data in -well-defined formats - especially good for text mark-up. JSON is a syntax -intended to hold arbitrarily structured data suitable for immediate -inclusion in javascript source codes. The PSYC syntax is an evolved -derivate of RFC 822, the syntax used by HTTP and E-Mail, and is therefore -limited in the kind and depth of data structures that can be represented -with it, but in exchange it is highly performant at doing just that. - -So it is up to you to find out which of the three formats fulfils your -requirements the best. We use PSYC for the majority of messaging where -JSON and XMPP aren't efficient and opaque enough, but we employ XML and -JSON as payloads within PSYC for data that doesn't fit the PSYC model. -For some reason all three formats are being used for messaging, although -only PSYC was actually designed for that purpose. - -== Caveats == - -In every case we'll compare performance of parsing and re-rendering -these messages, but consider also that the applicative processing -of an XML DOM tree is more complicated than just accessing -certain elements in a JSON data structure or PSYC variable -mapping. - -For a speed check in real world conditions which also consider the -complexity of processing incoming messages we should compare -the performance of a chat client using the two protocols, -for instance by using libpurple with XMPP and PSYC accounts. -To this purpose we first need to integrate libpsyc into libpurple. - -== Futures == - -After a month of development libpsyc is already performing pretty -well, but we presume various optimizations, like rewriting parts -in assembler, are possible. - diff --git a/bench/packets/activity.json b/bench/packets/activity.json new file mode 100644 index 0000000..d6e9abc --- /dev/null +++ b/bench/packets/activity.json @@ -0,0 +1,3 @@ +["activity",{"from":"hamlet@denmark.lit/snsclient"},{"verb":"post", +"title":"to be or not to be ?","object":{"type":"status", +"content":"to be or not to be ?","contentType":"text/plain"}}] diff --git a/bench/packets/activity.psyc b/bench/packets/activity.psyc new file mode 100644 index 0000000..bb61c79 --- /dev/null +++ b/bench/packets/activity.psyc @@ -0,0 +1,7 @@ +:_context psyc://denmark.lit/~hamlet#_follow + +:_subject to be or not to be ? +:_type_content text/plain +_message +to be or not to be ? +| diff --git a/bench/packets/activity.xml b/bench/packets/activity.xml new file mode 100644 index 0000000..babdc17 --- /dev/null +++ b/bench/packets/activity.xml @@ -0,0 +1,27 @@ + + + + + + to be or not to be ? + http://activitystrea.ms/schema/1.0/post + + http://onesocialweb.org/spec/1.0/object/status + to be or not to be ? + + + + http://onesocialweb.org/spec/1.0/acl/action/view + + + + + + + + diff --git a/bench/packets/chat_msg.psyc b/bench/packets/chat_msg.psyc new file mode 100644 index 0000000..b39ad33 --- /dev/null +++ b/bench/packets/chat_msg.psyc @@ -0,0 +1,6 @@ +:_source psyc://example.com/~juliet +:_target psyc://example.net/~romeo + +_message_private +Art thou not Romeo, and a Montague? +| diff --git a/bench/packets/chat_msg.xml b/bench/packets/chat_msg.xml new file mode 100644 index 0000000..f7a6b83 --- /dev/null +++ b/bench/packets/chat_msg.xml @@ -0,0 +1,8 @@ + + Art thou not Romeo, and a Montague? + diff --git a/bench/packets/json-unfriendly.xml b/bench/packets/json-unfriendly.xml new file mode 100644 index 0000000..86465cc --- /dev/null +++ b/bench/packets/json-unfriendly.xml @@ -0,0 +1,13 @@ + + "Neither, fair saint, if either thee dislike.", he said. +And +the +rest +is +history. + diff --git a/bench/packets/presence.json b/bench/packets/presence.json new file mode 100644 index 0000000..3260aa3 --- /dev/null +++ b/bench/packets/presence.json @@ -0,0 +1 @@ +["presence",{"from":"juliet@example.com/balcony","to":"benvolio@example.net"},{"show":"away"}] diff --git a/bench/packets/presence.psyc b/bench/packets/presence.psyc new file mode 100644 index 0000000..3eb0bfe --- /dev/null +++ b/bench/packets/presence.psyc @@ -0,0 +1,5 @@ +:_context psyc://example.com/~juliet + +=_degree_availability 4 +_notice_presence +| diff --git a/bench/packets/presence.xml b/bench/packets/presence.xml new file mode 100644 index 0000000..8d074d8 --- /dev/null +++ b/bench/packets/presence.xml @@ -0,0 +1,4 @@ + + away + diff --git a/bench/packets/psyc-unfriendly.xml b/bench/packets/psyc-unfriendly.xml new file mode 100644 index 0000000..3b5d1a6 --- /dev/null +++ b/bench/packets/psyc-unfriendly.xml @@ -0,0 +1,12 @@ + + I implore you with a pointless +newline in a header variable + Wherefore art thou, Romeo? +| +And for practicing purposes we added a PSYC packet delimiter. + diff --git a/bench/packets/user_profile.json b/bench/packets/user_profile.json new file mode 100644 index 0000000..fd69439 --- /dev/null +++ b/bench/packets/user_profile.json @@ -0,0 +1,2 @@ +["UserProfile",{"Name":"Silvio Berlusconi","JobTitle":"Premier","Country":"I","Address": +{"Street":"Via del Colosseo, 1","PostalCode":"00100","City":"Roma"},"Page":"http://example.org"}] diff --git a/bench/packets/user_profile.psyc b/bench/packets/user_profile.psyc new file mode 100644 index 0000000..5b926d0 --- /dev/null +++ b/bench/packets/user_profile.psyc @@ -0,0 +1,10 @@ + +:_name Silvio Berlusconi +:_title_job Premier +:_country I +:_address_street Via del Colosseo, 1 +:_address_code_postal 00100 +:_address_city Roma +:_page http://example.org +_profile_user +| diff --git a/bench/packets/user_profile.xml b/bench/packets/user_profile.xml new file mode 100644 index 0000000..c2d5c18 --- /dev/null +++ b/bench/packets/user_profile.xml @@ -0,0 +1,11 @@ + + Silvio Berlusconi + Premier + I +
+ Via del Colosseo, 1 + 00100 + Roma +
+ http://example.org +
diff --git a/bench/packets/xml-unfriendly.xml b/bench/packets/xml-unfriendly.xml new file mode 100644 index 0000000..e70cb72 --- /dev/null +++ b/bench/packets/xml-unfriendly.xml @@ -0,0 +1,11 @@ + + Wherefore art thou, Romeo? + + PročeŽ jsi ty, Romeo? + + diff --git a/bench/results.org b/bench/results.org deleted file mode 100644 index c393254..0000000 --- a/bench/results.org +++ /dev/null @@ -1,26 +0,0 @@ -#+TITLE: Benchmark results -#+OPTIONS: ^:{} toc:nil - -* Results - -Parsing time of 1 000 000 packets in milliseconds: - -| input | PSYC | | JSON | | | XML | | -| parser | strlen | libpsyc | json-c | json-glib | libxml sax | libxml | rapidxml | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| presence | 30 | 246 | 2463 | 10197 | 4997 | 7557 | 1719 | -| chat msg | 41 | 320 | | | 5997 | 9777 | 1893 | -| activity | 42 | 366 | 4666 | 16846 | 13357 | 28858 | 4419 | -| user prof | 55 | 608 | 4715 | 17468 | 7350 | 12377 | 2477 | -|-----------+--------+---------+--------+-----------+------------+--------+----------| -| / | < | > | < | > | < | | > | - -* Commands used - -: ./testPsycSpeed -sc 1000000 -f $file -: ./testJson -snc 1000000 -f $file -: ./testJsonGlib -snc 1000000 -f $file -: ./testStrlen -sc 1000000 -f $file -: ./rapidxml 1000000 $file -: ./libxml 1000000 $file -: ./libxml-sax 1000000 $file