#!/usr/bin/env node /* note: it can crash a net while running on kernel-space */ /*jshint es5:false, asi:true, quotmark:false, eqeqeq:false, forin: false */ /* * (c) 2011-14 Tim Becker, see file LICENSE for details */ /* * Provides functionality for bencoding and decoding as use in * bittorrent and described in: http://www.bittorrent.org/beps/bep_0003.html * * Encoding is as follows: * * var benc = require('bncode'), * exmp = {} * * exmp.bla = "blup" * exmp.foo = "bar" * exmp.one = 1 * exmp.woah = {} * exmp.woah.arr = [] * exmp.woah.arr.push(1) * exmp.woah.arr.push(2) * exmp.woah.arr.push(3) * exmp.str = new Buffer("Buffers work too") * * var bencBuffer = benc.encode(exmp) i * * // d3:bla4:blup3:foo3:bar3:onei1e4:woahd3:arr \ * // li1ei2ei3eee3:str16:Buffers work tooe * * * Decoding will work in progressively, e.g. if you're receiving partial * bencoded strings on the network: * * var benc = require("bncode"), * buf = null * * decoder = new bncode.decoder() * while (buf = receiveData()) { * decoder.decode(buf) * } * * log(decoder.result()) * * * Or "all in one" * * var benc = require("bncode"), * buf = getBuffer(), * dec = benc.decode(buf) * * log(dec.bla) * * * There are some subtleties concerning bencoded strings. These are * decoded as Buffer objects because they are just strings of raw bytes * and as such would wreak havoc with multi byte strings in javascript. * * The exception to this is strings that appear as keys in bencoded * dicts. These are decoded as Javascript Strings, as they should always * be strings of (ascii) characters and if they weren't decoded as JS * Strings, dict's would map to Javascript objects with properties. * */ exports.encode = Bencode exports.decoder = Bdecode exports.decode = decode exports.Stream = Stream var inherits = require('util').inherits var Transform = require('stream').Transform var I = 'i'.charCodeAt(0) var L = 'l'.charCodeAt(0) var E = 'e'.charCodeAt(0) var D = 'd'.charCodeAt(0) var COLON = ':'.charCodeAt(0) var DASH = '-'.charCodeAt(0) var STATE_INITIAL = 0 var STATE_STATE_STRING_LEN = STATE_INITIAL + 1 var STATE_STRING = STATE_STATE_STRING_LEN + 1 var STATE_COLON = STATE_STRING + 1 var STATE_STATE_INTEGER = STATE_COLON + 1 var STATE_INTEGER = STATE_STATE_INTEGER + 1 /* * This is the internal state machine for taking apart bencoded strings, * it's not exposed in the eports. It's constructed with four callbacks * that get fired when: * * cb: a value (string or number) is encountered * cb_list: a begin list element is encountered * cb_dict: a beginning of dictionary is encountered. * cd_end: an end element, wheter dict or list is encountered * * Once constructed, the machine may be fed with buffers containing * partial bencoded string. Call `consistent` to check whether the * current state is consistent, e.g. not smack-dap in the middle of * a string or a number and if the dict, list and end calls balance * * * The functionality being so rudimentary requires some more state and * logic in the code executing the machine, for this see Context, below. * */ function BdecodeSMachine (cb, cb_list, cb_dict, cb_end) { var depth = 0 var state = STATE_INITIAL this.consistent = function () { return state === STATE_INITIAL && depth === 0 } var strLen = 0 var str = '' var _int = 0 var neg = false this.parse = function (buffer, encoding) { if (typeof buffer === 'string') { buffer = new Buffer(buffer, encoding || 'utf8') } for (var pos = 0; pos !== buffer.length; ++pos) { switch (state) { case STATE_INITIAL: switch (buffer[pos]) { case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: state = STATE_STATE_STRING_LEN strLen = 0 strLen += buffer[pos] - 0x30 break case I: state = STATE_STATE_INTEGER _int = 0 neg = false break case L: state = STATE_INITIAL depth += 1 cb_list() break case D: state = STATE_INITIAL depth += 1 cb_dict() break case E: state = STATE_INITIAL depth -= 1 if (depth < 0) { throw new Error('end with no beginning: ' + pos) } else { cb_end() } break } break case STATE_STATE_STRING_LEN: if (integer(buffer[pos])) { strLen *= 10 strLen += buffer[pos] - 0x30 } else { str = new Buffer(strLen) pos -=1 state = STATE_COLON } break case STATE_COLON: if (buffer[pos] !== COLON) { throw new Error('not a colon at: ' + pos.toString(16)) } state = STATE_STRING // in case this is a zero length string, there's // no bytes to be collected. if (0 === strLen) { cb(new Buffer(0)) state = STATE_INITIAL } break case STATE_STRING: if (0 === strLen) { cb(str) state = STATE_INITIAL } else { //str += String.fromCharCode(buffer[pos]) // not unicode safe.. str[str.length-strLen] = buffer[pos] strLen -= 1 if (0 === strLen) { cb(str) state = STATE_INITIAL } } break case STATE_STATE_INTEGER: state = STATE_INTEGER if (buffer[pos] === DASH) { neg = true // handle neg and zero within value. break } // else fall through case STATE_INTEGER: if (integer(buffer[pos])) { _int *= 10 _int += buffer[pos] - 0x30 } else if (buffer[pos] === E) { var ret = neg ? 0 - _int : _int cb(ret) state = STATE_INITIAL } else { throw new Error('not part of int at:'+pos.toString(16)) } break } // switch state } // for buffer } // function parse function integer (value) { // check that value is a number and that // its value is ascii integer. if (typeof value !== 'number') { return false } return between(value, 0x30, 0x39) } function between (val, min, max) { return (min <= val && val <= max) } } // end BdecodeSMachine /* * The exported decode functionality. */ function Bdecode () { // markers var DICTIONARY_START = {} var LIST_START = {} var Context = function () { var self = this var stack = [] this.cb = function (o) { stack.push(o) } this.cb_list = function () { self.cb(LIST_START) } this.cb_dict = function () { self.cb(DICTIONARY_START) } this.cb_end = function () { // unwind the stack until either a DICTIONARY_START or LIST_START is // found, create arr or hash, stick unwound stack on, push arr or hash // back onto stack var obj = null var tmp_stack = [] while ((obj = stack.pop()) !== undefined) { if (LIST_START === obj) { var obj2 = null var list = [] while((obj2 = tmp_stack.pop()) !== undefined) { list.push(obj2) } self.cb(list) break } else if (DICTIONARY_START === obj) { var key = null var val = null var dic = {} while ((key = tmp_stack.pop()) !== undefined && (val = tmp_stack.pop()) !== undefined) { dic[key.toString()] = val } if (key !== undefined && dic[key] === undefined) { throw new Error('uneven number of keys and values A') } self.cb(dic) break } else { tmp_stack.push(obj) } } if (tmp_stack.length > 0) { // could this case even occur? throw new Error('uneven number of keys and values B') } } this.result = function () { return stack } } var self = this var ctx = new Context() var smachine = new BdecodeSMachine(ctx.cb, ctx.cb_list, ctx.cb_dict, ctx.cb_end) this.result = function () { if (!smachine.consistent()) { throw new Error('not in consistent state. More bytes coming?') } return ctx.result() } this.decode = function (buf, encoding) { smachine.parse(buf, encoding) } } function Bencode (obj) { var self = this var to_encode = obj var buffer = null switch (typeof obj) { case 'string': return encodeString(obj) case 'number': return encodeNumber(obj) case 'object': if (obj instanceof Array) { return encodeList(obj) } else if (Buffer.isBuffer(obj)) { return encodeBuffer(obj) } else { // assume it's a hash return encodeDict(obj) } } function encodeString (obj) { var blen = Buffer.byteLength(obj) var len = blen.toString(10) var buf = new Buffer(len.length + 1 + blen) buf.write(len, 0, 'ascii') buf.write(':', len.length, 'ascii') buf.write(obj, len.length + 1, 'utf8') return buf } function encodeNumber (num) { var n = num.toString(10) var buf = new Buffer(n.length + 2) buf.write('i', 0) buf.write(n, 1) buf.write('e', n.length + 1) return buf } function encodeDict (obj) { var func = function (obj, pos) { var keys = Object.keys(obj).sort() keys.forEach(function (key) { var val = new Bencode(obj[key]) key = new Bencode(key) ensure(key.length + val.length, pos) key.copy(buffer, pos, 0) pos += key.length val.copy(buffer, pos, 0) pos += val.length }) return pos } return assemble(obj, 'd', func) } function encodeList (obj) { var func = function(obj, pos) { obj.forEach(function (o) { var elem = new Bencode(o) ensure(elem.length, pos) elem.copy(buffer, pos, 0) pos += elem.length }) return pos } return assemble(obj, 'l', func) } function encodeBuffer (obj) { var len = obj.length.toString(10) var buf = new Buffer(len.length + 1 + obj.length) buf.write(len, 0, 'ascii') buf.write(':', len.length, 'ascii') obj.copy(buf, len.length + 1, 0) return buf } function assemble (obj, prefix, func) { var pos = 0 ensure(1024, 0) buffer.write(prefix, pos++) pos = func(obj, pos) ensure(1, pos) buffer.write('e', pos++) return buffer.slice(0, pos) } function ensure (num, pos) { if (!buffer) { buffer = new Buffer(num) } else { if (buffer.length > num + pos + 1) { return } else { var buf2 = new Buffer(buffer.length + num) buffer.copy(buf2, 0, 0) buffer = buf2 } } } } function decode (buffer, encoding) { var decoder = new Bdecode() decoder.decode(buffer, encoding) return decoder.result()[0] } function Stream (options) { options = options || {} options.objectMode = true Transform.call(this, options) this._decoder = new Bdecode() } inherits(Stream, Transform) Stream.prototype._transform = function (chunk, encoding, callback) { try { this._decoder.decode(chunk, encoding) callback(null) } catch(err) { callback(err) } } Stream.prototype._flush = function (callback) { this.push(this._decoder.result()[0]) callback(null) }