[6a3a178] | 1 | // Copyright Joyent, Inc. and other Node contributors.
|
---|
| 2 | //
|
---|
| 3 | // Permission is hereby granted, free of charge, to any person obtaining a
|
---|
| 4 | // copy of this software and associated documentation files (the
|
---|
| 5 | // "Software"), to deal in the Software without restriction, including
|
---|
| 6 | // without limitation the rights to use, copy, modify, merge, publish,
|
---|
| 7 | // distribute, sublicense, and/or sell copies of the Software, and to permit
|
---|
| 8 | // persons to whom the Software is furnished to do so, subject to the
|
---|
| 9 | // following conditions:
|
---|
| 10 | //
|
---|
| 11 | // The above copyright notice and this permission notice shall be included
|
---|
| 12 | // in all copies or substantial portions of the Software.
|
---|
| 13 | //
|
---|
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
---|
| 15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
---|
| 16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
---|
| 17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
---|
| 18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
---|
| 19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
---|
| 20 | // USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
| 21 |
|
---|
| 22 | 'use strict';
|
---|
| 23 |
|
---|
| 24 | /*<replacement>*/
|
---|
| 25 |
|
---|
| 26 | var Buffer = require('safe-buffer').Buffer;
|
---|
| 27 | /*</replacement>*/
|
---|
| 28 |
|
---|
| 29 | var isEncoding = Buffer.isEncoding || function (encoding) {
|
---|
| 30 | encoding = '' + encoding;
|
---|
| 31 | switch (encoding && encoding.toLowerCase()) {
|
---|
| 32 | case 'hex':case 'utf8':case 'utf-8':case 'ascii':case 'binary':case 'base64':case 'ucs2':case 'ucs-2':case 'utf16le':case 'utf-16le':case 'raw':
|
---|
| 33 | return true;
|
---|
| 34 | default:
|
---|
| 35 | return false;
|
---|
| 36 | }
|
---|
| 37 | };
|
---|
| 38 |
|
---|
| 39 | function _normalizeEncoding(enc) {
|
---|
| 40 | if (!enc) return 'utf8';
|
---|
| 41 | var retried;
|
---|
| 42 | while (true) {
|
---|
| 43 | switch (enc) {
|
---|
| 44 | case 'utf8':
|
---|
| 45 | case 'utf-8':
|
---|
| 46 | return 'utf8';
|
---|
| 47 | case 'ucs2':
|
---|
| 48 | case 'ucs-2':
|
---|
| 49 | case 'utf16le':
|
---|
| 50 | case 'utf-16le':
|
---|
| 51 | return 'utf16le';
|
---|
| 52 | case 'latin1':
|
---|
| 53 | case 'binary':
|
---|
| 54 | return 'latin1';
|
---|
| 55 | case 'base64':
|
---|
| 56 | case 'ascii':
|
---|
| 57 | case 'hex':
|
---|
| 58 | return enc;
|
---|
| 59 | default:
|
---|
| 60 | if (retried) return; // undefined
|
---|
| 61 | enc = ('' + enc).toLowerCase();
|
---|
| 62 | retried = true;
|
---|
| 63 | }
|
---|
| 64 | }
|
---|
| 65 | };
|
---|
| 66 |
|
---|
| 67 | // Do not cache `Buffer.isEncoding` when checking encoding names as some
|
---|
| 68 | // modules monkey-patch it to support additional encodings
|
---|
| 69 | function normalizeEncoding(enc) {
|
---|
| 70 | var nenc = _normalizeEncoding(enc);
|
---|
| 71 | if (typeof nenc !== 'string' && (Buffer.isEncoding === isEncoding || !isEncoding(enc))) throw new Error('Unknown encoding: ' + enc);
|
---|
| 72 | return nenc || enc;
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | // StringDecoder provides an interface for efficiently splitting a series of
|
---|
| 76 | // buffers into a series of JS strings without breaking apart multi-byte
|
---|
| 77 | // characters.
|
---|
| 78 | exports.StringDecoder = StringDecoder;
|
---|
| 79 | function StringDecoder(encoding) {
|
---|
| 80 | this.encoding = normalizeEncoding(encoding);
|
---|
| 81 | var nb;
|
---|
| 82 | switch (this.encoding) {
|
---|
| 83 | case 'utf16le':
|
---|
| 84 | this.text = utf16Text;
|
---|
| 85 | this.end = utf16End;
|
---|
| 86 | nb = 4;
|
---|
| 87 | break;
|
---|
| 88 | case 'utf8':
|
---|
| 89 | this.fillLast = utf8FillLast;
|
---|
| 90 | nb = 4;
|
---|
| 91 | break;
|
---|
| 92 | case 'base64':
|
---|
| 93 | this.text = base64Text;
|
---|
| 94 | this.end = base64End;
|
---|
| 95 | nb = 3;
|
---|
| 96 | break;
|
---|
| 97 | default:
|
---|
| 98 | this.write = simpleWrite;
|
---|
| 99 | this.end = simpleEnd;
|
---|
| 100 | return;
|
---|
| 101 | }
|
---|
| 102 | this.lastNeed = 0;
|
---|
| 103 | this.lastTotal = 0;
|
---|
| 104 | this.lastChar = Buffer.allocUnsafe(nb);
|
---|
| 105 | }
|
---|
| 106 |
|
---|
| 107 | StringDecoder.prototype.write = function (buf) {
|
---|
| 108 | if (buf.length === 0) return '';
|
---|
| 109 | var r;
|
---|
| 110 | var i;
|
---|
| 111 | if (this.lastNeed) {
|
---|
| 112 | r = this.fillLast(buf);
|
---|
| 113 | if (r === undefined) return '';
|
---|
| 114 | i = this.lastNeed;
|
---|
| 115 | this.lastNeed = 0;
|
---|
| 116 | } else {
|
---|
| 117 | i = 0;
|
---|
| 118 | }
|
---|
| 119 | if (i < buf.length) return r ? r + this.text(buf, i) : this.text(buf, i);
|
---|
| 120 | return r || '';
|
---|
| 121 | };
|
---|
| 122 |
|
---|
| 123 | StringDecoder.prototype.end = utf8End;
|
---|
| 124 |
|
---|
| 125 | // Returns only complete characters in a Buffer
|
---|
| 126 | StringDecoder.prototype.text = utf8Text;
|
---|
| 127 |
|
---|
| 128 | // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
|
---|
| 129 | StringDecoder.prototype.fillLast = function (buf) {
|
---|
| 130 | if (this.lastNeed <= buf.length) {
|
---|
| 131 | buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);
|
---|
| 132 | return this.lastChar.toString(this.encoding, 0, this.lastTotal);
|
---|
| 133 | }
|
---|
| 134 | buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);
|
---|
| 135 | this.lastNeed -= buf.length;
|
---|
| 136 | };
|
---|
| 137 |
|
---|
| 138 | // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
|
---|
| 139 | // continuation byte. If an invalid byte is detected, -2 is returned.
|
---|
| 140 | function utf8CheckByte(byte) {
|
---|
| 141 | if (byte <= 0x7F) return 0;else if (byte >> 5 === 0x06) return 2;else if (byte >> 4 === 0x0E) return 3;else if (byte >> 3 === 0x1E) return 4;
|
---|
| 142 | return byte >> 6 === 0x02 ? -1 : -2;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | // Checks at most 3 bytes at the end of a Buffer in order to detect an
|
---|
| 146 | // incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
|
---|
| 147 | // needed to complete the UTF-8 character (if applicable) are returned.
|
---|
| 148 | function utf8CheckIncomplete(self, buf, i) {
|
---|
| 149 | var j = buf.length - 1;
|
---|
| 150 | if (j < i) return 0;
|
---|
| 151 | var nb = utf8CheckByte(buf[j]);
|
---|
| 152 | if (nb >= 0) {
|
---|
| 153 | if (nb > 0) self.lastNeed = nb - 1;
|
---|
| 154 | return nb;
|
---|
| 155 | }
|
---|
| 156 | if (--j < i || nb === -2) return 0;
|
---|
| 157 | nb = utf8CheckByte(buf[j]);
|
---|
| 158 | if (nb >= 0) {
|
---|
| 159 | if (nb > 0) self.lastNeed = nb - 2;
|
---|
| 160 | return nb;
|
---|
| 161 | }
|
---|
| 162 | if (--j < i || nb === -2) return 0;
|
---|
| 163 | nb = utf8CheckByte(buf[j]);
|
---|
| 164 | if (nb >= 0) {
|
---|
| 165 | if (nb > 0) {
|
---|
| 166 | if (nb === 2) nb = 0;else self.lastNeed = nb - 3;
|
---|
| 167 | }
|
---|
| 168 | return nb;
|
---|
| 169 | }
|
---|
| 170 | return 0;
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | // Validates as many continuation bytes for a multi-byte UTF-8 character as
|
---|
| 174 | // needed or are available. If we see a non-continuation byte where we expect
|
---|
| 175 | // one, we "replace" the validated continuation bytes we've seen so far with
|
---|
| 176 | // a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding
|
---|
| 177 | // behavior. The continuation byte check is included three times in the case
|
---|
| 178 | // where all of the continuation bytes for a character exist in the same buffer.
|
---|
| 179 | // It is also done this way as a slight performance increase instead of using a
|
---|
| 180 | // loop.
|
---|
| 181 | function utf8CheckExtraBytes(self, buf, p) {
|
---|
| 182 | if ((buf[0] & 0xC0) !== 0x80) {
|
---|
| 183 | self.lastNeed = 0;
|
---|
| 184 | return '\ufffd';
|
---|
| 185 | }
|
---|
| 186 | if (self.lastNeed > 1 && buf.length > 1) {
|
---|
| 187 | if ((buf[1] & 0xC0) !== 0x80) {
|
---|
| 188 | self.lastNeed = 1;
|
---|
| 189 | return '\ufffd';
|
---|
| 190 | }
|
---|
| 191 | if (self.lastNeed > 2 && buf.length > 2) {
|
---|
| 192 | if ((buf[2] & 0xC0) !== 0x80) {
|
---|
| 193 | self.lastNeed = 2;
|
---|
| 194 | return '\ufffd';
|
---|
| 195 | }
|
---|
| 196 | }
|
---|
| 197 | }
|
---|
| 198 | }
|
---|
| 199 |
|
---|
| 200 | // Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
|
---|
| 201 | function utf8FillLast(buf) {
|
---|
| 202 | var p = this.lastTotal - this.lastNeed;
|
---|
| 203 | var r = utf8CheckExtraBytes(this, buf, p);
|
---|
| 204 | if (r !== undefined) return r;
|
---|
| 205 | if (this.lastNeed <= buf.length) {
|
---|
| 206 | buf.copy(this.lastChar, p, 0, this.lastNeed);
|
---|
| 207 | return this.lastChar.toString(this.encoding, 0, this.lastTotal);
|
---|
| 208 | }
|
---|
| 209 | buf.copy(this.lastChar, p, 0, buf.length);
|
---|
| 210 | this.lastNeed -= buf.length;
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
|
---|
| 214 | // partial character, the character's bytes are buffered until the required
|
---|
| 215 | // number of bytes are available.
|
---|
| 216 | function utf8Text(buf, i) {
|
---|
| 217 | var total = utf8CheckIncomplete(this, buf, i);
|
---|
| 218 | if (!this.lastNeed) return buf.toString('utf8', i);
|
---|
| 219 | this.lastTotal = total;
|
---|
| 220 | var end = buf.length - (total - this.lastNeed);
|
---|
| 221 | buf.copy(this.lastChar, 0, end);
|
---|
| 222 | return buf.toString('utf8', i, end);
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | // For UTF-8, a replacement character is added when ending on a partial
|
---|
| 226 | // character.
|
---|
| 227 | function utf8End(buf) {
|
---|
| 228 | var r = buf && buf.length ? this.write(buf) : '';
|
---|
| 229 | if (this.lastNeed) return r + '\ufffd';
|
---|
| 230 | return r;
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | // UTF-16LE typically needs two bytes per character, but even if we have an even
|
---|
| 234 | // number of bytes available, we need to check if we end on a leading/high
|
---|
| 235 | // surrogate. In that case, we need to wait for the next two bytes in order to
|
---|
| 236 | // decode the last character properly.
|
---|
| 237 | function utf16Text(buf, i) {
|
---|
| 238 | if ((buf.length - i) % 2 === 0) {
|
---|
| 239 | var r = buf.toString('utf16le', i);
|
---|
| 240 | if (r) {
|
---|
| 241 | var c = r.charCodeAt(r.length - 1);
|
---|
| 242 | if (c >= 0xD800 && c <= 0xDBFF) {
|
---|
| 243 | this.lastNeed = 2;
|
---|
| 244 | this.lastTotal = 4;
|
---|
| 245 | this.lastChar[0] = buf[buf.length - 2];
|
---|
| 246 | this.lastChar[1] = buf[buf.length - 1];
|
---|
| 247 | return r.slice(0, -1);
|
---|
| 248 | }
|
---|
| 249 | }
|
---|
| 250 | return r;
|
---|
| 251 | }
|
---|
| 252 | this.lastNeed = 1;
|
---|
| 253 | this.lastTotal = 2;
|
---|
| 254 | this.lastChar[0] = buf[buf.length - 1];
|
---|
| 255 | return buf.toString('utf16le', i, buf.length - 1);
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | // For UTF-16LE we do not explicitly append special replacement characters if we
|
---|
| 259 | // end on a partial character, we simply let v8 handle that.
|
---|
| 260 | function utf16End(buf) {
|
---|
| 261 | var r = buf && buf.length ? this.write(buf) : '';
|
---|
| 262 | if (this.lastNeed) {
|
---|
| 263 | var end = this.lastTotal - this.lastNeed;
|
---|
| 264 | return r + this.lastChar.toString('utf16le', 0, end);
|
---|
| 265 | }
|
---|
| 266 | return r;
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | function base64Text(buf, i) {
|
---|
| 270 | var n = (buf.length - i) % 3;
|
---|
| 271 | if (n === 0) return buf.toString('base64', i);
|
---|
| 272 | this.lastNeed = 3 - n;
|
---|
| 273 | this.lastTotal = 3;
|
---|
| 274 | if (n === 1) {
|
---|
| 275 | this.lastChar[0] = buf[buf.length - 1];
|
---|
| 276 | } else {
|
---|
| 277 | this.lastChar[0] = buf[buf.length - 2];
|
---|
| 278 | this.lastChar[1] = buf[buf.length - 1];
|
---|
| 279 | }
|
---|
| 280 | return buf.toString('base64', i, buf.length - n);
|
---|
| 281 | }
|
---|
| 282 |
|
---|
| 283 | function base64End(buf) {
|
---|
| 284 | var r = buf && buf.length ? this.write(buf) : '';
|
---|
| 285 | if (this.lastNeed) return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed);
|
---|
| 286 | return r;
|
---|
| 287 | }
|
---|
| 288 |
|
---|
| 289 | // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
|
---|
| 290 | function simpleWrite(buf) {
|
---|
| 291 | return buf.toString(this.encoding);
|
---|
| 292 | }
|
---|
| 293 |
|
---|
| 294 | function simpleEnd(buf) {
|
---|
| 295 | return buf && buf.length ? this.write(buf) : '';
|
---|
| 296 | } |
---|