mirror of
git://git.psyc.eu/libpsyc
synced 2024-08-15 03:19:02 +00:00
1104 lines
32 KiB
HTML
1104 lines
32 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
|
<head>
|
|
<title>libpsyc Performance Benchmarks</title>
|
|
<!-- 2015-08-14 Fri 10:43 -->
|
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
|
<meta name="generator" content="Org-mode" />
|
|
<style type="text/css">
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
.title { text-align: center; }
|
|
.todo { font-family: monospace; color: red; }
|
|
.done { color: green; }
|
|
.tag { background-color: #eee; font-family: monospace;
|
|
padding: 2px; font-size: 80%; font-weight: normal; }
|
|
.timestamp { color: #bebebe; }
|
|
.timestamp-kwd { color: #5f9ea0; }
|
|
.right { margin-left: auto; margin-right: 0px; text-align: right; }
|
|
.left { margin-left: 0px; margin-right: auto; text-align: left; }
|
|
.center { margin-left: auto; margin-right: auto; text-align: center; }
|
|
.underline { text-decoration: underline; }
|
|
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
|
|
p.verse { margin-left: 3%; }
|
|
pre {
|
|
border: 1px solid #ccc;
|
|
box-shadow: 3px 3px 3px #eee;
|
|
padding: 8pt;
|
|
font-family: monospace;
|
|
overflow: auto;
|
|
margin: 1.2em;
|
|
}
|
|
pre.src {
|
|
position: relative;
|
|
overflow: visible;
|
|
padding-top: 1.2em;
|
|
}
|
|
pre.src:before {
|
|
display: none;
|
|
position: absolute;
|
|
background-color: white;
|
|
top: -10px;
|
|
right: 10px;
|
|
padding: 3px;
|
|
border: 1px solid black;
|
|
}
|
|
pre.src:hover:before { display: inline;}
|
|
pre.src-sh:before { content: 'sh'; }
|
|
pre.src-bash:before { content: 'sh'; }
|
|
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
|
|
pre.src-R:before { content: 'R'; }
|
|
pre.src-perl:before { content: 'Perl'; }
|
|
pre.src-java:before { content: 'Java'; }
|
|
pre.src-sql:before { content: 'SQL'; }
|
|
|
|
table { border-collapse:collapse; }
|
|
caption.t-above { caption-side: top; }
|
|
caption.t-bottom { caption-side: bottom; }
|
|
td, th { vertical-align:top; }
|
|
th.right { text-align: center; }
|
|
th.left { text-align: center; }
|
|
th.center { text-align: center; }
|
|
td.right { text-align: right; }
|
|
td.left { text-align: left; }
|
|
td.center { text-align: center; }
|
|
dt { font-weight: bold; }
|
|
.footpara:nth-child(2) { display: inline; }
|
|
.footpara { display: block; }
|
|
.footdef { margin-bottom: 1em; }
|
|
.figure { padding: 1em; }
|
|
.figure p { text-align: center; }
|
|
.inlinetask {
|
|
padding: 10px;
|
|
border: 2px solid gray;
|
|
margin: 10px;
|
|
background: #ffffcc;
|
|
}
|
|
#org-div-home-and-up
|
|
{ text-align: right; font-size: 70%; white-space: nowrap; }
|
|
textarea { overflow-x: auto; }
|
|
.linenr { font-size: smaller }
|
|
.code-highlighted { background-color: #ffff00; }
|
|
.org-info-js_info-navigation { border-style: none; }
|
|
#org-info-js_console-label
|
|
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
|
|
.org-info-js_search-highlight
|
|
{ background-color: #ffff00; color: #000000; font-weight: bold; }
|
|
/*]]>*/-->
|
|
</style>
|
|
<script type="text/javascript">
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2013 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
function CodeHighlightOn(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(null != target) {
|
|
elem.cacheClassElem = elem.className;
|
|
elem.cacheClassTarget = target.className;
|
|
target.className = "code-highlighted";
|
|
elem.className = "code-highlighted";
|
|
}
|
|
}
|
|
function CodeHighlightOff(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(elem.cacheClassElem)
|
|
elem.className = elem.cacheClassElem;
|
|
if(elem.cacheClassTarget)
|
|
target.className = elem.cacheClassTarget;
|
|
}
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/javascript" src="http://orgmode.org/mathjax/MathJax.js"></script>
|
|
<script type="text/javascript">
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
MathJax.Hub.Config({
|
|
// Only one of the two following lines, depending on user settings
|
|
// First allows browser-native MathML display, second forces HTML/CSS
|
|
// config: ["MMLorHTML.js"], jax: ["input/TeX"],
|
|
jax: ["input/TeX", "output/HTML-CSS"],
|
|
extensions: ["tex2jax.js","TeX/AMSmath.js","TeX/AMSsymbols.js",
|
|
"TeX/noUndefined.js"],
|
|
tex2jax: {
|
|
inlineMath: [ ["\\(","\\)"] ],
|
|
displayMath: [ ['$$','$$'], ["\\[","\\]"], ["\\begin{displaymath}","\\end{displaymath}"] ],
|
|
skipTags: ["script","noscript","style","textarea","pre","code"],
|
|
ignoreClass: "tex2jax_ignore",
|
|
processEscapes: false,
|
|
processEnvironments: true,
|
|
preview: "TeX"
|
|
},
|
|
showProcessingMessages: true,
|
|
displayAlign: "center",
|
|
displayIndent: "2em",
|
|
|
|
"HTML-CSS": {
|
|
scale: 100,
|
|
availableFonts: ["STIX","TeX"],
|
|
preferredFont: "TeX",
|
|
webFont: "TeX",
|
|
imageFont: "TeX",
|
|
showMathMenu: true,
|
|
},
|
|
MMLorHTML: {
|
|
prefer: {
|
|
MSIE: "MML",
|
|
Firefox: "MML",
|
|
Opera: "HTML",
|
|
other: "HTML"
|
|
}
|
|
}
|
|
});
|
|
/*]]>*///-->
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<div id="content">
|
|
<h1 class="title">libpsyc Performance Benchmarks</h1>
|
|
<div id="table-of-contents">
|
|
<h2>Table of Contents</h2>
|
|
<div id="text-table-of-contents">
|
|
<ul>
|
|
<li><a href="#sec-1">1. PSYC, JSON, XML Syntax Benchmarks</a>
|
|
<ul>
|
|
<li><a href="#sec-1-1">1.1. User Profile</a></li>
|
|
<li><a href="#sec-1-2">1.2. A message with JSON-unfriendly characters</a></li>
|
|
<li><a href="#sec-1-3">1.3. A message with XML-unfriendly characters</a></li>
|
|
<li><a href="#sec-1-4">1.4. A message with PSYC-unfriendly strings</a></li>
|
|
<li><a href="#sec-1-5">1.5. Packets containing binary data</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#sec-2">2. PSYC vs XMPP Protocol Benchmarks</a>
|
|
<ul>
|
|
<li><a href="#sec-2-1">2.1. A presence packet</a></li>
|
|
<li><a href="#sec-2-2">2.2. An average chat message</a></li>
|
|
<li><a href="#sec-2-3">2.3. A new status updated activity</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#sec-3">3. Results</a></li>
|
|
<li><a href="#sec-4">4. Explanations</a></li>
|
|
<li><a href="#sec-5">5. Criticism</a></li>
|
|
<li><a href="#sec-6">6. Ease of Implementation</a></li>
|
|
<li><a href="#sec-7">7. Conclusions</a></li>
|
|
<li><a href="#sec-8">8. Futures</a></li>
|
|
<li><a href="#sec-9">9. Related Work</a></li>
|
|
<li><a href="#sec-10">10. Further Reading</a></li>
|
|
<li><a href="#sec-11">11. Appendix</a>
|
|
<ul>
|
|
<li><a href="#sec-11-1">11.1. Tools used</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<style type="text/css"> body { padding: 44px; } </style>
|
|
|
|
<p>
|
|
In this document we present the results of performance benchmarks
|
|
of libpsyc compared to json-c, libjson-glib, rapidxml and libxml2.
|
|
</p>
|
|
|
|
<div id="outline-container-sec-1" class="outline-2">
|
|
<h2 id="sec-1"><span class="section-number-2">1</span> PSYC, JSON, XML Syntax Benchmarks</h2>
|
|
<div class="outline-text-2" id="text-1">
|
|
<p>
|
|
First we look at the mere performance of the PSYC syntax
|
|
compared to equivalent XML and JSON encodings. We'll
|
|
look at actual XMPP messaging later.
|
|
</p>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-1-1" class="outline-3">
|
|
<h3 id="sec-1-1"><span class="section-number-3">1.1</span> User Profile</h3>
|
|
<div class="outline-text-3" id="text-1-1">
|
|
<p>
|
|
In this test we'll compare the efficiency of the three
|
|
syntaxes at serializing a typical user data base
|
|
storage information. Let's start with XML:
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><UserProfile>
|
|
<Name>Silvio Berlusconi</Name>
|
|
<JobTitle>Premier</JobTitle>
|
|
<Country>I</Country>
|
|
<Address>
|
|
<Street>Via del Colosseo, 1</Street>
|
|
<PostalCode>00100</PostalCode>
|
|
<City>Roma</City>
|
|
</Address>
|
|
<Page>http://example.org</Page>
|
|
</UserProfile>
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
In JSON this could look like this:
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["UserProfile",{"Name":"Silvio Berlusconi","JobTitle":"Premier","Country":"I","Address":
|
|
{"Street":"Via del Colosseo, 1","PostalCode":"00100","City":"Roma"},"Page":"http://example.org"}]
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
Here's a way to model this in PSYC (verbose mode):
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_name Silvio Berlusconi
|
|
:_title_job Premier
|
|
:_country I
|
|
:_address_street Via del Colosseo, 1
|
|
:_address_code_postal 00100
|
|
:_address_city Roma
|
|
:_page http://example.org
|
|
_profile_user
|
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-1-2" class="outline-3">
|
|
<h3 id="sec-1-2"><span class="section-number-3">1.2</span> A message with JSON-unfriendly characters</h3>
|
|
<div class="outline-text-3" id="text-1-2">
|
|
<p>
|
|
This message contains some characters which are
|
|
impractical to encode in JSON. We should probably
|
|
put a lot more inside to actually see an impact
|
|
on performance. <b>TODO</b>
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><message from='romeo@example.net/orchard' to='juliet@example.com/balcony'>
|
|
<body>"Neither, fair saint, if either thee dislike.", he said.
|
|
And
|
|
the
|
|
rest
|
|
is
|
|
history.</body>
|
|
</message>
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["message",{"from":"romeo@example.net/orchard","to":"juliet@example.com/balcony"},
|
|
"\"Neither, fair saint, if either thee dislike.\", he said.\nAnd\nthe\nrest\nis\nhistory."]
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_source psyc://example.com/~romeo
|
|
:_target psyc://example.net/~juliet
|
|
|
|
_message
|
|
"Neither, fair saint, if either thee dislike.", he said.
|
|
And
|
|
the
|
|
rest
|
|
is
|
|
history.
|
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-1-3" class="outline-3">
|
|
<h3 id="sec-1-3"><span class="section-number-3">1.3</span> A message with XML-unfriendly characters</h3>
|
|
<div class="outline-text-3" id="text-1-3">
|
|
<p>
|
|
Same test with characters which aren't practical
|
|
in the XML syntax, yet we should put more of
|
|
them inside. <b>TODO</b>
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><message from='juliet@example.com/balcony' to='romeo@example.net'>
|
|
<body>Pro&#x010D;e&#x017D; jsi ty, Romeo?</body>
|
|
</message>
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["message",{"from":"juliet@example.com/balcony","to":"romeo@example.net"},
|
|
"Pro\u010de\u017d jsi ty, Romeo?"]
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_source psyc://example.com/~juliet
|
|
:_target psyc://example.net/~romeo
|
|
|
|
_message
|
|
Pro&#x010D;e&#x017D; jsi ty, Romeo?
|
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-1-4" class="outline-3">
|
|
<h3 id="sec-1-4"><span class="section-number-3">1.4</span> A message with PSYC-unfriendly strings</h3>
|
|
<div class="outline-text-3" id="text-1-4">
|
|
<p>
|
|
PSYC prefixes data with length as soon as it
|
|
exceeds certain sizes or contains certain strings.
|
|
In the case of short messages this is less
|
|
efficient than scanning the values without lengths.
|
|
Also, lengths are harder to edit by hand.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><message from='juliet@example.com/balcony' to='romeo@example.net'>
|
|
<subject>I implore you with a pointless
|
|
newline in a header variable</subject>
|
|
<body>Wherefore art thou, Romeo?
|
|
|
|
|
And for practicing purposes we added a PSYC packet delimiter.</body>
|
|
</message>
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["message",{"from":"juliet@example.com/balcony","to":"romeo@example.net",
|
|
"subject":"I implore you with a pointless\nnewline in a header variable"},
|
|
"Wherefore art thou, Romeo?\n|\nAnd for practicing purposes we added a PSYC packet delimiter."]
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_source psyc://example.com/~juliet
|
|
:_target psyc://example.net/~romeo
|
|
173
|
|
:_subject 59 I implore you with a pointless
|
|
newline in a header variable
|
|
_message
|
|
Wherefore art thou, Romeo?
|
|
|
|
|
And for practicing purposes we added a PSYC packet delimiter.
|
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-1-5" class="outline-3">
|
|
<h3 id="sec-1-5"><span class="section-number-3">1.5</span> Packets containing binary data</h3>
|
|
<div class="outline-text-3" id="text-1-5">
|
|
<p>
|
|
We'll use a generator of random binary data to
|
|
see how well the formats behave with different
|
|
sizes of data. We'll consider 7000 as a possible
|
|
size of an icon, 70000 for an avatar, 700000
|
|
for a photograph, 7000000 for a piece of music,
|
|
70000000 for a large project and
|
|
700000000 for the contents of a CD.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-2" class="outline-2">
|
|
<h2 id="sec-2"><span class="section-number-2">2</span> PSYC vs XMPP Protocol Benchmarks</h2>
|
|
<div class="outline-text-2" id="text-2">
|
|
<p>
|
|
These tests use typical messages from the XMPP ("stanzas" in
|
|
Jabber lingo) and compare them with equivalent JSON encodings
|
|
and PSYC formats.
|
|
</p>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-2-1" class="outline-3">
|
|
<h3 id="sec-2-1"><span class="section-number-3">2.1</span> A presence packet</h3>
|
|
<div class="outline-text-3" id="text-2-1">
|
|
<p>
|
|
Since presence packets are by far the dominant messaging content
|
|
in the XMPP network, we'll start with one of them.
|
|
Here's an example from paragraph 4.4.2 of RFC 6121.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><presence from='juliet@example.com/balcony'
|
|
to='benvolio@example.net'>
|
|
<show>away</show>
|
|
</presence>
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
And here's the same information in a JSON rendition:
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["presence",{"from":"juliet@example.com/balcony","to":"benvolio@example.net"},{"show":"away"}]
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
Here's the equivalent PSYC packet in verbose mode
|
|
(since it is a multicast, the single recipients do not
|
|
need to be mentioned):
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_context psyc://example.com/~juliet
|
|
|
|
=_degree_availability 4
|
|
_notice_presence
|
|
|
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
And this is the same message in PSYC's compact form, but since compact mode
|
|
hasn't been implemented nor deployed yet, you should only consider this
|
|
for future projects:
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:c psyc://example.com/~juliet
|
|
|
|
=da 4
|
|
np
|
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-2-2" class="outline-3">
|
|
<h3 id="sec-2-2"><span class="section-number-3">2.2</span> An average chat message</h3>
|
|
<div class="outline-text-3" id="text-2-2">
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><message from='juliet@example.com/balcony' to='romeo@example.net' type='chat'>
|
|
<body>Art thou not Romeo, and a Montague?</body>
|
|
</message>
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["message",{"from":"juliet@example.com/balcony","to":"romeo@example.net"},
|
|
"Art thou not Romeo, and a Montague?"]
|
|
</pre>
|
|
</div>
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_source psyc://example.com/~juliet
|
|
:_target xmpp:romeo@example.net
|
|
|
|
_message
|
|
Art thou not Romeo, and a Montague?
|
|
|
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
Little difference: PSYC by default doesn't mention a "resource" in XMPP terms,
|
|
instead it allows for more addressing schemes than just PSYC.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-2-3" class="outline-3">
|
|
<h3 id="sec-2-3"><span class="section-number-3">2.3</span> A new status updated activity</h3>
|
|
<div class="outline-text-3" id="text-2-3">
|
|
<p>
|
|
Example taken from <a href="http://onesocialweb.org/spec/1.0/osw-activities.html">http://onesocialweb.org/spec/1.0/osw-activities.html</a>
|
|
You could call this XML namespace hell.. :-)
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-xml"><iq type='set'
|
|
from='hamlet@denmark.lit/snsclient'
|
|
to='hamlet@denmark.lit'
|
|
id='osw1'>
|
|
<pubsub xmlns='http://jabber.org/protocol/pubsub'>
|
|
<publish node='urn:xmpp:microblog:0'>
|
|
<item>
|
|
<entry xmlns="http://www.w3.org/2005/Atom"
|
|
xmlns:activity="http://activitystrea.ms/spec/1.0/"
|
|
xmlns:osw="http://onesocialweb.org/spec/1.0/">
|
|
<title>to be or not to be ?</title>
|
|
<activity:verb>http://activitystrea.ms/schema/1.0/post</activity:verb>
|
|
<activity:object>
|
|
<activity:object-type>http://onesocialweb.org/spec/1.0/object/status</activity:object-type>
|
|
<content type="text/plain">to be or not to be ?</content>
|
|
</activity:object>
|
|
<osw:acl-rule>
|
|
<osw:acl-action permission="http://onesocialweb.org/spec/1.0/acl/permission/grant">
|
|
http://onesocialweb.org/spec/1.0/acl/action/view
|
|
</osw:acl-action>
|
|
<osw:acl-subject type="http://onesocialweb.org/spec/1.0/acl/subject/everyone"/>
|
|
</osw:acl-rule>
|
|
</entry>
|
|
</item>
|
|
</publish>
|
|
</pubsub>
|
|
</iq>
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<a href="http://activitystrea.ms/head/json-activity.html">http://activitystrea.ms/head/json-activity.html</a> proposes a JSON encoding
|
|
of this. We'll have to add a routing header to it.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-js">["activity",{"from":"hamlet@denmark.lit/snsclient"},{"verb":"post",
|
|
"title":"to be or not to be ?","object":{"type":"status",
|
|
"content":"to be or not to be ?","contentType":"text/plain"}}]
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<a href="http://about.psyc.eu/Activity">http://about.psyc.eu/Activity</a> suggests a PSYC mapping for activity
|
|
streams. Should a "status post" be considered equivalent to a presence
|
|
description announcement or just a message in the "microblogging" channel?
|
|
We'll use the latter here:
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
|
|
<pre class="src src-psyc">:_context psyc://denmark.lit/~hamlet#_follow
|
|
|
|
:_subject to be or not to be ?
|
|
:_type_content text/plain
|
|
_message
|
|
to be or not to be ?
|
|
|
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
It's nice about XML namespaces how they can by definition never collide,
|
|
but this degree of engineering perfection causes us a lot of overhead.
|
|
The PSYC approach is to just extend the name of the method - as long as
|
|
people use differing method names, protocol extensions can exist next
|
|
to each other happily. Method name unicity cannot mathematically be ensured,
|
|
but it's enough to append your company name to make it unlikely for anyone
|
|
else on earth to have the same name. How this kind of safety is delivered
|
|
when using the JSON syntax of ActivityStreams is unclear. Apparently it was
|
|
no longer an important design criterion.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-3" class="outline-2">
|
|
<h2 id="sec-3"><span class="section-number-2">3</span> Results</h2>
|
|
<div class="outline-text-2" id="text-3">
|
|
<p>
|
|
Parsing time of 1 000 000 packets, in milliseconds.
|
|
A simple strlen() scan of the respective message is provided for comparison.
|
|
These tests were performed on a 2.53 GHz Intel(R) Core(TM)2 Duo P9500 CPU.
|
|
</p>
|
|
|
|
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="left" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="left"> </th>
|
|
<th scope="col" class="right">strlen</th>
|
|
<th scope="col" class="right">libpsyc</th>
|
|
<th scope="col" class="right">json-c</th>
|
|
<th scope="col" class="right">json-glib</th>
|
|
<th scope="col" class="right">libxml sax</th>
|
|
<th scope="col" class="right">libxml</th>
|
|
<th scope="col" class="right">rapidxml</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="left">user profile</td>
|
|
<td class="right">55</td>
|
|
<td class="right">608</td>
|
|
<td class="right">4715</td>
|
|
<td class="right">16503</td>
|
|
<td class="right">7350</td>
|
|
<td class="right">12377</td>
|
|
<td class="right">2477</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="left">psyc-unfriendly</td>
|
|
<td class="right">70</td>
|
|
<td class="right">286</td>
|
|
<td class="right">2892</td>
|
|
<td class="right">12567</td>
|
|
<td class="right">5538</td>
|
|
<td class="right">8659</td>
|
|
<td class="right">1896</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="left">json-unfriendly</td>
|
|
<td class="right">49</td>
|
|
<td class="right">430</td>
|
|
<td class="right">2328</td>
|
|
<td class="right">10006</td>
|
|
<td class="right">5141</td>
|
|
<td class="right">7875</td>
|
|
<td class="right">1751</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="left">xml-unfriendly</td>
|
|
<td class="right">37</td>
|
|
<td class="right">296</td>
|
|
<td class="right">2156</td>
|
|
<td class="right">9591</td>
|
|
<td class="right">5571</td>
|
|
<td class="right">8769</td>
|
|
<td class="right">1765</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<p>
|
|
Pure syntax comparisons above, protocol performance comparisons below:
|
|
</p>
|
|
|
|
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="left" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
|
|
<col class="center" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="left"> </th>
|
|
<th scope="col" class="right">strlen</th>
|
|
<th scope="col" class="right">libpsyc</th>
|
|
<th scope="col" class="center">libpsyc compact</th>
|
|
<th scope="col" class="right">json-c</th>
|
|
<th scope="col" class="right">json-glib</th>
|
|
<th scope="col" class="right">libxml sax</th>
|
|
<th scope="col" class="right">libxml</th>
|
|
<th scope="col" class="right">rapidxml</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="left">presence</td>
|
|
<td class="right">30</td>
|
|
<td class="right">236</td>
|
|
<td class="center">122</td>
|
|
<td class="right">2463</td>
|
|
<td class="right">10016</td>
|
|
<td class="right">4997</td>
|
|
<td class="right">7557</td>
|
|
<td class="right">1719</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="left">chat msg</td>
|
|
<td class="right">40</td>
|
|
<td class="right">295</td>
|
|
<td class="center">258</td>
|
|
<td class="right">2147</td>
|
|
<td class="right">9526</td>
|
|
<td class="right">5911</td>
|
|
<td class="right">8999</td>
|
|
<td class="right">1850</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="left">activity</td>
|
|
<td class="right">42</td>
|
|
<td class="right">353</td>
|
|
<td class="center">279</td>
|
|
<td class="right">4666</td>
|
|
<td class="right">16327</td>
|
|
<td class="right">13357</td>
|
|
<td class="right">28858</td>
|
|
<td class="right">4356</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<p>
|
|
Parsing large amounts of binary data. For JSON & XML base64 encoding was used.
|
|
Note that the results below include only the parsing time, base64 decoding was
|
|
not performed.
|
|
</p>
|
|
|
|
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
|
|
<colgroup>
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
|
|
<col class="right" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="right"> </th>
|
|
<th scope="col" class="right">strlen</th>
|
|
<th scope="col" class="right">libpsyc</th>
|
|
<th scope="col" class="right">json-c</th>
|
|
<th scope="col" class="right">json-glib</th>
|
|
<th scope="col" class="right">libxml sax</th>
|
|
<th scope="col" class="right">libxml</th>
|
|
<th scope="col" class="right">rapidxml</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="right">7K</td>
|
|
<td class="right">978</td>
|
|
<td class="right">77</td>
|
|
<td class="right">18609</td>
|
|
<td class="right">98000</td>
|
|
<td class="right">11445</td>
|
|
<td class="right">19299</td>
|
|
<td class="right">8701</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="right">70K</td>
|
|
<td class="right">9613</td>
|
|
<td class="right">77</td>
|
|
<td class="right">187540</td>
|
|
<td class="right">1003900</td>
|
|
<td class="right">96209</td>
|
|
<td class="right">167738</td>
|
|
<td class="right">74296</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="right">700K</td>
|
|
<td class="right">95888</td>
|
|
<td class="right">77</td>
|
|
<td class="right">1883500</td>
|
|
<td class="right">10616000</td>
|
|
<td class="right">842025</td>
|
|
<td class="right">1909428</td>
|
|
<td class="right">729419</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="right">7M</td>
|
|
<td class="right">1347300</td>
|
|
<td class="right">78</td>
|
|
<td class="right">26359000</td>
|
|
<td class="right">120810000</td>
|
|
<td class="right">12466610</td>
|
|
<td class="right">16751363</td>
|
|
<td class="right">7581169</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="right">70M</td>
|
|
<td class="right">14414000</td>
|
|
<td class="right">80</td>
|
|
<td class="right">357010000</td>
|
|
<td class="right">1241000000</td>
|
|
<td class="right">169622110</td>
|
|
<td class="right">296017820</td>
|
|
<td class="right">75308906</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<p>
|
|
In each case we compared performance of parsing and re-rendering
|
|
these messages, but consider also that the applicative processing
|
|
of an XML DOM tree is more complicated than just accessing
|
|
certain elements in a JSON data structure or PSYC variable mapping.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-4" class="outline-2">
|
|
<h2 id="sec-4"><span class="section-number-2">4</span> Explanations</h2>
|
|
<div class="outline-text-2" id="text-4">
|
|
<p>
|
|
As you can tell the PSYC data format outpaces its rivals in all circumstances.
|
|
Extremely so when delivering binary data as PSYC simply returns the starting
|
|
point and the length of the given buffer while the other parsers have to scan
|
|
for the end of the transmission, but also with many simpler operations, when
|
|
PSYC quickly figures out where the data starts and ends and passes such
|
|
information back to the application while the other formats are forced to
|
|
generate a copy of the data in order to process possibly embedded special
|
|
character sequences. PSYC essentially operates like a binary data protocol
|
|
even though it is actually text-based.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-5" class="outline-2">
|
|
<h2 id="sec-5"><span class="section-number-2">5</span> Criticism</h2>
|
|
<div class="outline-text-2" id="text-5">
|
|
<p>
|
|
Are we comparing apples and oranges? Yes and no, depends on what you
|
|
need. XML is a syntax best suited for complex structured data in
|
|
well-defined formats - especially good for text mark-up. JSON is a syntax
|
|
intended to hold arbitrarily structured data suitable for immediate
|
|
inclusion in Javascript source codes. The PSYC syntax is an evolved
|
|
derivate of RFC 822, the syntax used by HTTP and E-Mail. It is currently
|
|
limited in the kind and depth of data structures that can be represented
|
|
with it, but it is highly efficient in exchange.
|
|
</p>
|
|
|
|
<p>
|
|
In fact we are currently looking into suitable syntax extensions to represent
|
|
generic structures and semantic signatures, but for now PSYC only
|
|
provides for simple typed values and lists of typed values.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-6" class="outline-2">
|
|
<h2 id="sec-6"><span class="section-number-2">6</span> Ease of Implementation</h2>
|
|
<div class="outline-text-2" id="text-6">
|
|
<p>
|
|
Another aspect is the availability of these formats for spontaneous
|
|
use. You could generate and parse JSON yourself but you have to be
|
|
careful about escaping. XML can be rendered manually if you know your
|
|
data will not break the syntax, but you shouldn't dare to parse it without
|
|
a bullet proof parser. PSYC is easy to render and parse yourself for
|
|
simple tasks, as long as the body does not contain "\n|\n" and your
|
|
variables do not contain newlines.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-7" class="outline-2">
|
|
<h2 id="sec-7"><span class="section-number-2">7</span> Conclusions</h2>
|
|
<div class="outline-text-2" id="text-7">
|
|
<p>
|
|
After all it is up to you to find out which format fulfils your
|
|
requirements the best. We use PSYC for the majority of messaging where
|
|
JSON and XMPP aren't efficient and opaque enough, but we employ XML and
|
|
JSON as payloads within PSYC for data that doesn't fit the PSYC model.
|
|
For some reason all three formats are being used for messaging, although
|
|
only PSYC was actually designed for that purpose.
|
|
</p>
|
|
|
|
<p>
|
|
The Internet has developed two major breeds of protocol formats.
|
|
The binary ones are extremely efficient but in most cases you have
|
|
to recompile all instances each time you change something
|
|
while the plain-text ones are reaching out for achieving perfection
|
|
in data representation while leaving the path of efficiency. Some
|
|
protocols such as HTTP and SIP are in-between these two schools,
|
|
offering both a text-based extensible syntax (it's actually easier to
|
|
add a header to HTTP than to come up with a namespace for XMPP…)
|
|
and the ability to deliver binary data. But these protocols do not
|
|
come with native data structure support. PSYC is a protocol that
|
|
combines the compactness and efficiency of binary protocols with the
|
|
extensibility of text-based protocols and still provides for enough
|
|
data structuring to rarely require the use of other data formats.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-8" class="outline-2">
|
|
<h2 id="sec-8"><span class="section-number-2">8</span> Futures</h2>
|
|
<div class="outline-text-2" id="text-8">
|
|
<p>
|
|
After a month of development libpsyc is already performing pretty
|
|
well, but we presume various optimizations, like rewriting parts
|
|
in assembler, are possible.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-9" class="outline-2">
|
|
<h2 id="sec-9"><span class="section-number-2">9</span> Related Work</h2>
|
|
<div class="outline-text-2" id="text-9">
|
|
<p>
|
|
If this didn't help, you can also look into:
|
|
</p>
|
|
|
|
<ul class="org-ul">
|
|
<li>Adobe AMF
|
|
</li>
|
|
<li>ASN.1
|
|
</li>
|
|
<li>BSON
|
|
</li>
|
|
<li>Cisco Etch
|
|
</li>
|
|
<li>Efficient XML
|
|
</li>
|
|
<li>Facebook Thrift
|
|
</li>
|
|
<li>Google Protocol Buffers
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
The drawback of these binary formats is, unlike PSYC, JSON and XML
|
|
you can't edit them manually and you can't produce valid messages
|
|
by replacing variables in a simple text template. You depend on
|
|
specialized parsers and renderers to be provided.
|
|
</p>
|
|
|
|
<p>
|
|
There's also
|
|
</p>
|
|
|
|
<ul class="org-ul">
|
|
<li>Bittorrent's bencode
|
|
</li>
|
|
</ul>
|
|
|
|
<p>
|
|
This format is formally text-based, but not easy to read as it doesn't
|
|
have any visual separators and isn't easy to edit as everything is
|
|
prefixed by lengths even for very short items.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-10" class="outline-2">
|
|
<h2 id="sec-10"><span class="section-number-2">10</span> Further Reading</h2>
|
|
<div class="outline-text-2" id="text-10">
|
|
<p>
|
|
<a href="http://about.psyc.eu/Spec:Syntax">http://about.psyc.eu/Spec:Syntax</a> provides you with the ABNF grammar
|
|
of the PSYC 1.0 syntax. You may also be interested in PSYC's decentralized
|
|
state mechanism provided by the +/-/= operators.
|
|
</p>
|
|
|
|
<p>
|
|
See <a href="http://about.psyc.eu/XML">http://about.psyc.eu/XML</a> and <a href="http://about.psyc.eu/JSON">http://about.psyc.eu/JSON</a> for more
|
|
biased information on the respective formats.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-sec-11" class="outline-2">
|
|
<h2 id="sec-11"><span class="section-number-2">11</span> Appendix</h2>
|
|
<div class="outline-text-2" id="text-11">
|
|
</div><div id="outline-container-sec-11-1" class="outline-3">
|
|
<h3 id="sec-11-1"><span class="section-number-3">11.1</span> Tools used</h3>
|
|
<div class="outline-text-3" id="text-11-1">
|
|
<p>
|
|
This document and its benchmarks are distributed with libpsyc.
|
|
See <a href="http://about.psyc.eu/libpsyc">http://about.psyc.eu/libpsyc</a> on how to obtain it.
|
|
</p>
|
|
|
|
<p>
|
|
The benchmarks can be run with the following command
|
|
(xmlbench is needed for the xml tests):
|
|
</p>
|
|
|
|
<pre class="example">
|
|
make bench
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="postamble" class="status">
|
|
<p class="date">Created: 2015-08-14 Fri 10:43</p>
|
|
<p class="creator"><a href="http://www.gnu.org/software/emacs/">Emacs</a> 24.4.1 (<a href="http://orgmode.org">Org</a> mode 8.2.6)</p>
|
|
<p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
|
|
</div>
|
|
</body>
|
|
</html>
|