1 | /*
|
---|
2 | * utrie 1.0.2 <https://github.com/niklasvh/utrie>
|
---|
3 | * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
|
---|
4 | * Released under MIT License
|
---|
5 | */
|
---|
6 | var chars$1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
|
---|
7 | // Use a lookup table to find the index.
|
---|
8 | var lookup$1 = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256);
|
---|
9 | for (var i$1 = 0; i$1 < chars$1.length; i$1++) {
|
---|
10 | lookup$1[chars$1.charCodeAt(i$1)] = i$1;
|
---|
11 | }
|
---|
12 | var decode = function (base64) {
|
---|
13 | var bufferLength = base64.length * 0.75, len = base64.length, i, p = 0, encoded1, encoded2, encoded3, encoded4;
|
---|
14 | if (base64[base64.length - 1] === '=') {
|
---|
15 | bufferLength--;
|
---|
16 | if (base64[base64.length - 2] === '=') {
|
---|
17 | bufferLength--;
|
---|
18 | }
|
---|
19 | }
|
---|
20 | var buffer = typeof ArrayBuffer !== 'undefined' &&
|
---|
21 | typeof Uint8Array !== 'undefined' &&
|
---|
22 | typeof Uint8Array.prototype.slice !== 'undefined'
|
---|
23 | ? new ArrayBuffer(bufferLength)
|
---|
24 | : new Array(bufferLength);
|
---|
25 | var bytes = Array.isArray(buffer) ? buffer : new Uint8Array(buffer);
|
---|
26 | for (i = 0; i < len; i += 4) {
|
---|
27 | encoded1 = lookup$1[base64.charCodeAt(i)];
|
---|
28 | encoded2 = lookup$1[base64.charCodeAt(i + 1)];
|
---|
29 | encoded3 = lookup$1[base64.charCodeAt(i + 2)];
|
---|
30 | encoded4 = lookup$1[base64.charCodeAt(i + 3)];
|
---|
31 | bytes[p++] = (encoded1 << 2) | (encoded2 >> 4);
|
---|
32 | bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2);
|
---|
33 | bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63);
|
---|
34 | }
|
---|
35 | return buffer;
|
---|
36 | };
|
---|
37 | var polyUint16Array = function (buffer) {
|
---|
38 | var length = buffer.length;
|
---|
39 | var bytes = [];
|
---|
40 | for (var i = 0; i < length; i += 2) {
|
---|
41 | bytes.push((buffer[i + 1] << 8) | buffer[i]);
|
---|
42 | }
|
---|
43 | return bytes;
|
---|
44 | };
|
---|
45 | var polyUint32Array = function (buffer) {
|
---|
46 | var length = buffer.length;
|
---|
47 | var bytes = [];
|
---|
48 | for (var i = 0; i < length; i += 4) {
|
---|
49 | bytes.push((buffer[i + 3] << 24) | (buffer[i + 2] << 16) | (buffer[i + 1] << 8) | buffer[i]);
|
---|
50 | }
|
---|
51 | return bytes;
|
---|
52 | };
|
---|
53 |
|
---|
54 | /** Shift size for getting the index-2 table offset. */
|
---|
55 | var UTRIE2_SHIFT_2 = 5;
|
---|
56 | /** Shift size for getting the index-1 table offset. */
|
---|
57 | var UTRIE2_SHIFT_1 = 6 + 5;
|
---|
58 | /**
|
---|
59 | * Shift size for shifting left the index array values.
|
---|
60 | * Increases possible data size with 16-bit index values at the cost
|
---|
61 | * of compactability.
|
---|
62 | * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
|
---|
63 | */
|
---|
64 | var UTRIE2_INDEX_SHIFT = 2;
|
---|
65 | /**
|
---|
66 | * Difference between the two shift sizes,
|
---|
67 | * for getting an index-1 offset from an index-2 offset. 6=11-5
|
---|
68 | */
|
---|
69 | var UTRIE2_SHIFT_1_2 = UTRIE2_SHIFT_1 - UTRIE2_SHIFT_2;
|
---|
70 | /**
|
---|
71 | * The part of the index-2 table for U+D800..U+DBFF stores values for
|
---|
72 | * lead surrogate code _units_ not code _points_.
|
---|
73 | * Values for lead surrogate code _points_ are indexed with this portion of the table.
|
---|
74 | * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
|
---|
75 | */
|
---|
76 | var UTRIE2_LSCP_INDEX_2_OFFSET = 0x10000 >> UTRIE2_SHIFT_2;
|
---|
77 | /** Number of entries in a data block. 32=0x20 */
|
---|
78 | var UTRIE2_DATA_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_2;
|
---|
79 | /** Mask for getting the lower bits for the in-data-block offset. */
|
---|
80 | var UTRIE2_DATA_MASK = UTRIE2_DATA_BLOCK_LENGTH - 1;
|
---|
81 | var UTRIE2_LSCP_INDEX_2_LENGTH = 0x400 >> UTRIE2_SHIFT_2;
|
---|
82 | /** Count the lengths of both BMP pieces. 2080=0x820 */
|
---|
83 | var UTRIE2_INDEX_2_BMP_LENGTH = UTRIE2_LSCP_INDEX_2_OFFSET + UTRIE2_LSCP_INDEX_2_LENGTH;
|
---|
84 | /**
|
---|
85 | * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
|
---|
86 | * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
|
---|
87 | */
|
---|
88 | var UTRIE2_UTF8_2B_INDEX_2_OFFSET = UTRIE2_INDEX_2_BMP_LENGTH;
|
---|
89 | var UTRIE2_UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; /* U+0800 is the first code point after 2-byte UTF-8 */
|
---|
90 | /**
|
---|
91 | * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
|
---|
92 | * Variable length, for code points up to highStart, where the last single-value range starts.
|
---|
93 | * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
|
---|
94 | * (For 0x100000 supplementary code points U+10000..U+10ffff.)
|
---|
95 | *
|
---|
96 | * The part of the index-2 table for supplementary code points starts
|
---|
97 | * after this index-1 table.
|
---|
98 | *
|
---|
99 | * Both the index-1 table and the following part of the index-2 table
|
---|
100 | * are omitted completely if there is only BMP data.
|
---|
101 | */
|
---|
102 | var UTRIE2_INDEX_1_OFFSET = UTRIE2_UTF8_2B_INDEX_2_OFFSET + UTRIE2_UTF8_2B_INDEX_2_LENGTH;
|
---|
103 | /**
|
---|
104 | * Number of index-1 entries for the BMP. 32=0x20
|
---|
105 | * This part of the index-1 table is omitted from the serialized form.
|
---|
106 | */
|
---|
107 | var UTRIE2_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UTRIE2_SHIFT_1;
|
---|
108 | /** Number of entries in an index-2 block. 64=0x40 */
|
---|
109 | var UTRIE2_INDEX_2_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_1_2;
|
---|
110 | /** Mask for getting the lower bits for the in-index-2-block offset. */
|
---|
111 | var UTRIE2_INDEX_2_MASK = UTRIE2_INDEX_2_BLOCK_LENGTH - 1;
|
---|
112 | var slice16 = function (view, start, end) {
|
---|
113 | if (view.slice) {
|
---|
114 | return view.slice(start, end);
|
---|
115 | }
|
---|
116 | return new Uint16Array(Array.prototype.slice.call(view, start, end));
|
---|
117 | };
|
---|
118 | var slice32 = function (view, start, end) {
|
---|
119 | if (view.slice) {
|
---|
120 | return view.slice(start, end);
|
---|
121 | }
|
---|
122 | return new Uint32Array(Array.prototype.slice.call(view, start, end));
|
---|
123 | };
|
---|
124 | var createTrieFromBase64 = function (base64, _byteLength) {
|
---|
125 | var buffer = decode(base64);
|
---|
126 | var view32 = Array.isArray(buffer) ? polyUint32Array(buffer) : new Uint32Array(buffer);
|
---|
127 | var view16 = Array.isArray(buffer) ? polyUint16Array(buffer) : new Uint16Array(buffer);
|
---|
128 | var headerLength = 24;
|
---|
129 | var index = slice16(view16, headerLength / 2, view32[4] / 2);
|
---|
130 | var data = view32[5] === 2
|
---|
131 | ? slice16(view16, (headerLength + view32[4]) / 2)
|
---|
132 | : slice32(view32, Math.ceil((headerLength + view32[4]) / 4));
|
---|
133 | return new Trie(view32[0], view32[1], view32[2], view32[3], index, data);
|
---|
134 | };
|
---|
135 | var Trie = /** @class */ (function () {
|
---|
136 | function Trie(initialValue, errorValue, highStart, highValueIndex, index, data) {
|
---|
137 | this.initialValue = initialValue;
|
---|
138 | this.errorValue = errorValue;
|
---|
139 | this.highStart = highStart;
|
---|
140 | this.highValueIndex = highValueIndex;
|
---|
141 | this.index = index;
|
---|
142 | this.data = data;
|
---|
143 | }
|
---|
144 | /**
|
---|
145 | * Get the value for a code point as stored in the Trie.
|
---|
146 | *
|
---|
147 | * @param codePoint the code point
|
---|
148 | * @return the value
|
---|
149 | */
|
---|
150 | Trie.prototype.get = function (codePoint) {
|
---|
151 | var ix;
|
---|
152 | if (codePoint >= 0) {
|
---|
153 | if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
|
---|
154 | // Ordinary BMP code point, excluding leading surrogates.
|
---|
155 | // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index.
|
---|
156 | // 16 bit data is stored in the index array itself.
|
---|
157 | ix = this.index[codePoint >> UTRIE2_SHIFT_2];
|
---|
158 | ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
---|
159 | return this.data[ix];
|
---|
160 | }
|
---|
161 | if (codePoint <= 0xffff) {
|
---|
162 | // Lead Surrogate Code Point. A Separate index section is stored for
|
---|
163 | // lead surrogate code units and code points.
|
---|
164 | // The main index has the code unit data.
|
---|
165 | // For this function, we need the code point data.
|
---|
166 | // Note: this expression could be refactored for slightly improved efficiency, but
|
---|
167 | // surrogate code points will be so rare in practice that it's not worth it.
|
---|
168 | ix = this.index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
|
---|
169 | ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
---|
170 | return this.data[ix];
|
---|
171 | }
|
---|
172 | if (codePoint < this.highStart) {
|
---|
173 | // Supplemental code point, use two-level lookup.
|
---|
174 | ix = UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH + (codePoint >> UTRIE2_SHIFT_1);
|
---|
175 | ix = this.index[ix];
|
---|
176 | ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
|
---|
177 | ix = this.index[ix];
|
---|
178 | ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
|
---|
179 | return this.data[ix];
|
---|
180 | }
|
---|
181 | if (codePoint <= 0x10ffff) {
|
---|
182 | return this.data[this.highValueIndex];
|
---|
183 | }
|
---|
184 | }
|
---|
185 | // Fall through. The code point is outside of the legal range of 0..0x10ffff.
|
---|
186 | return this.errorValue;
|
---|
187 | };
|
---|
188 | return Trie;
|
---|
189 | }());
|
---|
190 |
|
---|
191 | /*
|
---|
192 | * base64-arraybuffer 1.0.2 <https://github.com/niklasvh/base64-arraybuffer>
|
---|
193 | * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
|
---|
194 | * Released under MIT License
|
---|
195 | */
|
---|
196 | var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
|
---|
197 | // Use a lookup table to find the index.
|
---|
198 | var lookup = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256);
|
---|
199 | for (var i = 0; i < chars.length; i++) {
|
---|
200 | lookup[chars.charCodeAt(i)] = i;
|
---|
201 | }
|
---|
202 | var encode = function (arraybuffer) {
|
---|
203 | var bytes = new Uint8Array(arraybuffer), i, len = bytes.length, base64 = '';
|
---|
204 | for (i = 0; i < len; i += 3) {
|
---|
205 | base64 += chars[bytes[i] >> 2];
|
---|
206 | base64 += chars[((bytes[i] & 3) << 4) | (bytes[i + 1] >> 4)];
|
---|
207 | base64 += chars[((bytes[i + 1] & 15) << 2) | (bytes[i + 2] >> 6)];
|
---|
208 | base64 += chars[bytes[i + 2] & 63];
|
---|
209 | }
|
---|
210 | if (len % 3 === 2) {
|
---|
211 | base64 = base64.substring(0, base64.length - 1) + '=';
|
---|
212 | }
|
---|
213 | else if (len % 3 === 1) {
|
---|
214 | base64 = base64.substring(0, base64.length - 2) + '==';
|
---|
215 | }
|
---|
216 | return base64;
|
---|
217 | };
|
---|
218 |
|
---|
219 | /**
|
---|
220 | * Trie2 constants, defining shift widths, index array lengths, etc.
|
---|
221 | *
|
---|
222 | * These are needed for the runtime macros but users can treat these as
|
---|
223 | * implementation details and skip to the actual public API further below.
|
---|
224 | */
|
---|
225 | // const UTRIE2_OPTIONS_VALUE_BITS_MASK = 0x000f;
|
---|
226 | /** Number of code points per index-1 table entry. 2048=0x800 */
|
---|
227 | var UTRIE2_CP_PER_INDEX_1_ENTRY = 1 << UTRIE2_SHIFT_1;
|
---|
228 | /** The alignment size of a data block. Also the granularity for compaction. */
|
---|
229 | var UTRIE2_DATA_GRANULARITY = 1 << UTRIE2_INDEX_SHIFT;
|
---|
230 | /* Fixed layout of the first part of the index array. ------------------- */
|
---|
231 | /**
|
---|
232 | * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
|
---|
233 | * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
|
---|
234 | */
|
---|
235 | var UTRIE2_INDEX_2_OFFSET = 0;
|
---|
236 | var UTRIE2_MAX_INDEX_1_LENGTH = 0x100000 >> UTRIE2_SHIFT_1;
|
---|
237 | /*
|
---|
238 | * Fixed layout of the first part of the data array. -----------------------
|
---|
239 | * Starts with 4 blocks (128=0x80 entries) for ASCII.
|
---|
240 | */
|
---|
241 | /**
|
---|
242 | * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
|
---|
243 | * Used with linear access for single bytes 0..0xbf for simple error handling.
|
---|
244 | * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
|
---|
245 | */
|
---|
246 | var UTRIE2_BAD_UTF8_DATA_OFFSET = 0x80;
|
---|
247 | /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
|
---|
248 | var UTRIE2_DATA_START_OFFSET = 0xc0;
|
---|
249 | /* Building a Trie2 ---------------------------------------------------------- */
|
---|
250 | /*
|
---|
251 | * These definitions are mostly needed by utrie2_builder.c, but also by
|
---|
252 | * utrie2_get32() and utrie2_enum().
|
---|
253 | */
|
---|
254 | /*
|
---|
255 | * At build time, leave a gap in the index-2 table,
|
---|
256 | * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
|
---|
257 | * and the supplementary index-1 table.
|
---|
258 | * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
|
---|
259 | */
|
---|
260 | var UNEWTRIE2_INDEX_GAP_OFFSET = UTRIE2_INDEX_2_BMP_LENGTH;
|
---|
261 | var UNEWTRIE2_INDEX_GAP_LENGTH = (UTRIE2_UTF8_2B_INDEX_2_LENGTH + UTRIE2_MAX_INDEX_1_LENGTH + UTRIE2_INDEX_2_MASK) & ~UTRIE2_INDEX_2_MASK;
|
---|
262 | /**
|
---|
263 | * Maximum length of the build-time index-2 array.
|
---|
264 | * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
|
---|
265 | * plus the part of the index-2 table for lead surrogate code points,
|
---|
266 | * plus the build-time index gap,
|
---|
267 | * plus the null index-2 block.
|
---|
268 | */
|
---|
269 | var UNEWTRIE2_MAX_INDEX_2_LENGTH = (0x110000 >> UTRIE2_SHIFT_2) +
|
---|
270 | UTRIE2_LSCP_INDEX_2_LENGTH +
|
---|
271 | UNEWTRIE2_INDEX_GAP_LENGTH +
|
---|
272 | UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
273 | var UNEWTRIE2_INDEX_1_LENGTH = 0x110000 >> UTRIE2_SHIFT_1;
|
---|
274 | /**
|
---|
275 | * Maximum length of the build-time data array.
|
---|
276 | * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
|
---|
277 | * plus values for the 0x400 surrogate code units.
|
---|
278 | */
|
---|
279 | var UNEWTRIE2_MAX_DATA_LENGTH = 0x110000 + 0x40 + 0x40 + 0x400;
|
---|
280 | /* Start with allocation of 16k data entries. */
|
---|
281 | var UNEWTRIE2_INITIAL_DATA_LENGTH = 1 << 14;
|
---|
282 | /* Grow about 8x each time. */
|
---|
283 | var UNEWTRIE2_MEDIUM_DATA_LENGTH = 1 << 17;
|
---|
284 | /** The null index-2 block, following the gap in the index-2 table. */
|
---|
285 | var UNEWTRIE2_INDEX_2_NULL_OFFSET = UNEWTRIE2_INDEX_GAP_OFFSET + UNEWTRIE2_INDEX_GAP_LENGTH;
|
---|
286 | /** The start of allocated index-2 blocks. */
|
---|
287 | var UNEWTRIE2_INDEX_2_START_OFFSET = UNEWTRIE2_INDEX_2_NULL_OFFSET + UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
288 | /**
|
---|
289 | * The null data block.
|
---|
290 | * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
|
---|
291 | * to work with 6-bit trail bytes from 2-byte UTF-8.
|
---|
292 | */
|
---|
293 | var UNEWTRIE2_DATA_NULL_OFFSET = UTRIE2_DATA_START_OFFSET;
|
---|
294 | /** The start of allocated data blocks. */
|
---|
295 | var UNEWTRIE2_DATA_START_OFFSET = UNEWTRIE2_DATA_NULL_OFFSET + 0x40;
|
---|
296 | /**
|
---|
297 | * The start of data blocks for U+0800 and above.
|
---|
298 | * Below, compaction uses a block length of 64 for 2-byte UTF-8.
|
---|
299 | * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
|
---|
300 | * Data values for 0x780 code points beyond ASCII.
|
---|
301 | */
|
---|
302 | var UNEWTRIE2_DATA_0800_OFFSET = UNEWTRIE2_DATA_START_OFFSET + 0x780;
|
---|
303 | /**
|
---|
304 | * Maximum length of the runtime index array.
|
---|
305 | * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
|
---|
306 | * (The actual maximum length is lower,
|
---|
307 | * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
|
---|
308 | */
|
---|
309 | var UTRIE2_MAX_INDEX_LENGTH = 0xffff;
|
---|
310 | /**
|
---|
311 | * Maximum length of the runtime data array.
|
---|
312 | * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
|
---|
313 | * and by uint16_t UTrie2Header.shiftedDataLength.
|
---|
314 | */
|
---|
315 | var UTRIE2_MAX_DATA_LENGTH = 0xffff << UTRIE2_INDEX_SHIFT;
|
---|
316 | var BITS_16 = 16;
|
---|
317 | var BITS_32 = 32;
|
---|
318 | var isHighSurrogate = function (c) { return c >= 0xd800 && c <= 0xdbff; };
|
---|
319 | var equalInt = function (a, s, t, length) {
|
---|
320 | for (var i = 0; i < length; i++) {
|
---|
321 | if (a[s + i] !== a[t + i]) {
|
---|
322 | return false;
|
---|
323 | }
|
---|
324 | }
|
---|
325 | return true;
|
---|
326 | };
|
---|
327 | var TrieBuilder = /** @class */ (function () {
|
---|
328 | function TrieBuilder(initialValue, errorValue) {
|
---|
329 | if (initialValue === void 0) { initialValue = 0; }
|
---|
330 | if (errorValue === void 0) { errorValue = 0; }
|
---|
331 | this.initialValue = initialValue;
|
---|
332 | this.errorValue = errorValue;
|
---|
333 | this.highStart = 0x110000;
|
---|
334 | this.data = new Uint32Array(UNEWTRIE2_INITIAL_DATA_LENGTH);
|
---|
335 | this.dataCapacity = UNEWTRIE2_INITIAL_DATA_LENGTH;
|
---|
336 | this.highStart = 0x110000;
|
---|
337 | this.firstFreeBlock = 0; /* no free block in the list */
|
---|
338 | this.isCompacted = false;
|
---|
339 | this.index1 = new Uint32Array(UNEWTRIE2_INDEX_1_LENGTH);
|
---|
340 | this.index2 = new Uint32Array(UNEWTRIE2_MAX_INDEX_2_LENGTH);
|
---|
341 | /*
|
---|
342 | * Multi-purpose per-data-block table.
|
---|
343 | *
|
---|
344 | * Before compacting:
|
---|
345 | *
|
---|
346 | * Per-data-block reference counters/free-block list.
|
---|
347 | * 0: unused
|
---|
348 | * >0: reference counter (number of index-2 entries pointing here)
|
---|
349 | * <0: next free data block in free-block list
|
---|
350 | *
|
---|
351 | * While compacting:
|
---|
352 | *
|
---|
353 | * Map of adjusted indexes, used in compactData() and compactIndex2().
|
---|
354 | * Maps from original indexes to new ones.
|
---|
355 | */
|
---|
356 | this.map = new Uint32Array(UNEWTRIE2_MAX_DATA_LENGTH >> UTRIE2_SHIFT_2);
|
---|
357 | /*
|
---|
358 | * preallocate and reset
|
---|
359 | * - ASCII
|
---|
360 | * - the bad-UTF-8-data block
|
---|
361 | * - the null data block
|
---|
362 | */
|
---|
363 | var i, j;
|
---|
364 | for (i = 0; i < 0x80; ++i) {
|
---|
365 | this.data[i] = initialValue;
|
---|
366 | }
|
---|
367 | for (; i < 0xc0; ++i) {
|
---|
368 | this.data[i] = errorValue;
|
---|
369 | }
|
---|
370 | for (i = UNEWTRIE2_DATA_NULL_OFFSET; i < UNEWTRIE2_DATA_START_OFFSET; ++i) {
|
---|
371 | this.data[i] = initialValue;
|
---|
372 | }
|
---|
373 | this.dataNullOffset = UNEWTRIE2_DATA_NULL_OFFSET;
|
---|
374 | this.dataLength = UNEWTRIE2_DATA_START_OFFSET;
|
---|
375 | /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
|
---|
376 | for (i = 0, j = 0; j < 0x80; ++i, j += UTRIE2_DATA_BLOCK_LENGTH) {
|
---|
377 | this.index2[i] = j;
|
---|
378 | this.map[i] = 1;
|
---|
379 | }
|
---|
380 | /* reference counts for the bad-UTF-8-data block */
|
---|
381 | for (; j < 0xc0; ++i, j += UTRIE2_DATA_BLOCK_LENGTH) {
|
---|
382 | this.map[i] = 0;
|
---|
383 | }
|
---|
384 | /*
|
---|
385 | * Reference counts for the null data block: all blocks except for the ASCII blocks.
|
---|
386 | * Plus 1 so that we don't drop this block during compaction.
|
---|
387 | * Plus as many as needed for lead surrogate code points.
|
---|
388 | */
|
---|
389 | /* i==newTrie->dataNullOffset */
|
---|
390 | this.map[i++] = (0x110000 >> UTRIE2_SHIFT_2) - (0x80 >> UTRIE2_SHIFT_2) + 1 + UTRIE2_LSCP_INDEX_2_LENGTH;
|
---|
391 | j += UTRIE2_DATA_BLOCK_LENGTH;
|
---|
392 | for (; j < UNEWTRIE2_DATA_START_OFFSET; ++i, j += UTRIE2_DATA_BLOCK_LENGTH) {
|
---|
393 | this.map[i] = 0;
|
---|
394 | }
|
---|
395 | /*
|
---|
396 | * set the remaining indexes in the BMP index-2 block
|
---|
397 | * to the null data block
|
---|
398 | */
|
---|
399 | for (i = 0x80 >> UTRIE2_SHIFT_2; i < UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
|
---|
400 | this.index2[i] = UNEWTRIE2_DATA_NULL_OFFSET;
|
---|
401 | }
|
---|
402 | /*
|
---|
403 | * Fill the index gap with impossible values so that compaction
|
---|
404 | * does not overlap other index-2 blocks with the gap.
|
---|
405 | */
|
---|
406 | for (i = 0; i < UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
|
---|
407 | this.index2[UNEWTRIE2_INDEX_GAP_OFFSET + i] = -1;
|
---|
408 | }
|
---|
409 | /* set the indexes in the null index-2 block */
|
---|
410 | for (i = 0; i < UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
|
---|
411 | this.index2[UNEWTRIE2_INDEX_2_NULL_OFFSET + i] = UNEWTRIE2_DATA_NULL_OFFSET;
|
---|
412 | }
|
---|
413 | this.index2NullOffset = UNEWTRIE2_INDEX_2_NULL_OFFSET;
|
---|
414 | this.index2Length = UNEWTRIE2_INDEX_2_START_OFFSET;
|
---|
415 | /* set the index-1 indexes for the linear index-2 block */
|
---|
416 | for (i = 0, j = 0; i < UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; ++i, j += UTRIE2_INDEX_2_BLOCK_LENGTH) {
|
---|
417 | this.index1[i] = j;
|
---|
418 | }
|
---|
419 | /* set the remaining index-1 indexes to the null index-2 block */
|
---|
420 | for (; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
|
---|
421 | this.index1[i] = UNEWTRIE2_INDEX_2_NULL_OFFSET;
|
---|
422 | }
|
---|
423 | /*
|
---|
424 | * Preallocate and reset data for U+0080..U+07ff,
|
---|
425 | * for 2-byte UTF-8 which will be compacted in 64-blocks
|
---|
426 | * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
|
---|
427 | */
|
---|
428 | for (i = 0x80; i < 0x800; i += UTRIE2_DATA_BLOCK_LENGTH) {
|
---|
429 | this.set(i, initialValue);
|
---|
430 | }
|
---|
431 | }
|
---|
432 | /**
|
---|
433 | * Set a value for a code point.
|
---|
434 | *
|
---|
435 | * @param c the code point
|
---|
436 | * @param value the value
|
---|
437 | */
|
---|
438 | TrieBuilder.prototype.set = function (c, value) {
|
---|
439 | if (c < 0 || c > 0x10ffff) {
|
---|
440 | throw new Error('Invalid code point.');
|
---|
441 | }
|
---|
442 | this._set(c, true, value);
|
---|
443 | return this;
|
---|
444 | };
|
---|
445 | /**
|
---|
446 | * Set a value in a range of code points [start..end].
|
---|
447 | * All code points c with start<=c<=end will get the value if
|
---|
448 | * overwrite is TRUE or if the old value is the initial value.
|
---|
449 | *
|
---|
450 | * @param start the first code point to get the value
|
---|
451 | * @param end the last code point to get the value (inclusive)
|
---|
452 | * @param value the value
|
---|
453 | * @param overwrite flag for whether old non-initial values are to be overwritten
|
---|
454 | */
|
---|
455 | TrieBuilder.prototype.setRange = function (start, end, value, overwrite) {
|
---|
456 | if (overwrite === void 0) { overwrite = false; }
|
---|
457 | /*
|
---|
458 | * repeat value in [start..end]
|
---|
459 | * mark index values for repeat-data blocks by setting bit 31 of the index values
|
---|
460 | * fill around existing values if any, if(overwrite)
|
---|
461 | */
|
---|
462 | var block, rest, repeatBlock;
|
---|
463 | if (start > 0x10ffff || start < 0 || end > 0x10ffff || end < 0 || start > end) {
|
---|
464 | throw new Error('Invalid code point range.');
|
---|
465 | }
|
---|
466 | if (!overwrite && value === this.initialValue) {
|
---|
467 | return this; /* nothing to do */
|
---|
468 | }
|
---|
469 | if (this.isCompacted) {
|
---|
470 | throw new Error('Trie was already compacted');
|
---|
471 | }
|
---|
472 | var limit = end + 1;
|
---|
473 | if ((start & UTRIE2_DATA_MASK) !== 0) {
|
---|
474 | /* set partial block at [start..following block boundary[ */
|
---|
475 | block = this.getDataBlock(start, true);
|
---|
476 | var nextStart = (start + UTRIE2_DATA_BLOCK_LENGTH) & ~UTRIE2_DATA_MASK;
|
---|
477 | if (nextStart <= limit) {
|
---|
478 | this.fillBlock(block, start & UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
|
---|
479 | start = nextStart;
|
---|
480 | }
|
---|
481 | else {
|
---|
482 | this.fillBlock(block, start & UTRIE2_DATA_MASK, limit & UTRIE2_DATA_MASK, value, this.initialValue, overwrite);
|
---|
483 | return this;
|
---|
484 | }
|
---|
485 | }
|
---|
486 | /* number of positions in the last, partial block */
|
---|
487 | rest = limit & UTRIE2_DATA_MASK;
|
---|
488 | /* round down limit to a block boundary */
|
---|
489 | limit &= ~UTRIE2_DATA_MASK;
|
---|
490 | /* iterate over all-value blocks */
|
---|
491 | repeatBlock = value === this.initialValue ? this.dataNullOffset : -1;
|
---|
492 | while (start < limit) {
|
---|
493 | var i2 = void 0;
|
---|
494 | var setRepeatBlock = false;
|
---|
495 | if (value === this.initialValue && this.isInNullBlock(start, true)) {
|
---|
496 | start += UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
|
---|
497 | continue;
|
---|
498 | }
|
---|
499 | /* get index value */
|
---|
500 | i2 = this.getIndex2Block(start, true);
|
---|
501 | i2 += (start >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
|
---|
502 | block = this.index2[i2];
|
---|
503 | if (this.isWritableBlock(block)) {
|
---|
504 | /* already allocated */
|
---|
505 | if (overwrite && block >= UNEWTRIE2_DATA_0800_OFFSET) {
|
---|
506 | /*
|
---|
507 | * We overwrite all values, and it's not a
|
---|
508 | * protected (ASCII-linear or 2-byte UTF-8) block:
|
---|
509 | * replace with the repeatBlock.
|
---|
510 | */
|
---|
511 | setRepeatBlock = true;
|
---|
512 | }
|
---|
513 | else {
|
---|
514 | /* !overwrite, or protected block: just write the values into this block */
|
---|
515 | this.fillBlock(block, 0, UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
|
---|
516 | }
|
---|
517 | }
|
---|
518 | else if (this.data[block] !== value && (overwrite || block === this.dataNullOffset)) {
|
---|
519 | /*
|
---|
520 | * Set the repeatBlock instead of the null block or previous repeat block:
|
---|
521 | *
|
---|
522 | * If !isWritableBlock() then all entries in the block have the same value
|
---|
523 | * because it's the null block or a range block (the repeatBlock from a previous
|
---|
524 | * call to utrie2_setRange32()).
|
---|
525 | * No other blocks are used multiple times before compacting.
|
---|
526 | *
|
---|
527 | * The null block is the only non-writable block with the initialValue because
|
---|
528 | * of the repeatBlock initialization above. (If value==initialValue, then
|
---|
529 | * the repeatBlock will be the null data block.)
|
---|
530 | *
|
---|
531 | * We set our repeatBlock if the desired value differs from the block's value,
|
---|
532 | * and if we overwrite any data or if the data is all initial values
|
---|
533 | * (which is the same as the block being the null block, see above).
|
---|
534 | */
|
---|
535 | setRepeatBlock = true;
|
---|
536 | }
|
---|
537 | if (setRepeatBlock) {
|
---|
538 | if (repeatBlock >= 0) {
|
---|
539 | this.setIndex2Entry(i2, repeatBlock);
|
---|
540 | }
|
---|
541 | else {
|
---|
542 | /* create and set and fill the repeatBlock */
|
---|
543 | repeatBlock = this.getDataBlock(start, true);
|
---|
544 | this.writeBlock(repeatBlock, value);
|
---|
545 | }
|
---|
546 | }
|
---|
547 | start += UTRIE2_DATA_BLOCK_LENGTH;
|
---|
548 | }
|
---|
549 | if (rest > 0) {
|
---|
550 | /* set partial block at [last block boundary..limit[ */
|
---|
551 | block = this.getDataBlock(start, true);
|
---|
552 | this.fillBlock(block, 0, rest, value, this.initialValue, overwrite);
|
---|
553 | }
|
---|
554 | return this;
|
---|
555 | };
|
---|
556 | /**
|
---|
557 | * Get the value for a code point as stored in the Trie2.
|
---|
558 | *
|
---|
559 | * @param codePoint the code point
|
---|
560 | * @return the value
|
---|
561 | */
|
---|
562 | TrieBuilder.prototype.get = function (codePoint) {
|
---|
563 | if (codePoint < 0 || codePoint > 0x10ffff) {
|
---|
564 | return this.errorValue;
|
---|
565 | }
|
---|
566 | else {
|
---|
567 | return this._get(codePoint, true);
|
---|
568 | }
|
---|
569 | };
|
---|
570 | TrieBuilder.prototype._get = function (c, fromLSCP) {
|
---|
571 | var i2;
|
---|
572 | if (c >= this.highStart && (!(c >= 0xd800 && c < 0xdc00) || fromLSCP)) {
|
---|
573 | return this.data[this.dataLength - UTRIE2_DATA_GRANULARITY];
|
---|
574 | }
|
---|
575 | if (c >= 0xd800 && c < 0xdc00 && fromLSCP) {
|
---|
576 | i2 = UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> UTRIE2_SHIFT_2) + (c >> UTRIE2_SHIFT_2);
|
---|
577 | }
|
---|
578 | else {
|
---|
579 | i2 = this.index1[c >> UTRIE2_SHIFT_1] + ((c >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK);
|
---|
580 | }
|
---|
581 | var block = this.index2[i2];
|
---|
582 | return this.data[block + (c & UTRIE2_DATA_MASK)];
|
---|
583 | };
|
---|
584 | TrieBuilder.prototype.freeze = function (valueBits) {
|
---|
585 | if (valueBits === void 0) { valueBits = BITS_32; }
|
---|
586 | var i;
|
---|
587 | var allIndexesLength;
|
---|
588 | var dataMove; /* >0 if the data is moved to the end of the index array */
|
---|
589 | /* compact if necessary */
|
---|
590 | if (!this.isCompacted) {
|
---|
591 | this.compactTrie();
|
---|
592 | }
|
---|
593 | allIndexesLength = this.highStart <= 0x10000 ? UTRIE2_INDEX_1_OFFSET : this.index2Length;
|
---|
594 | if (valueBits === BITS_16) {
|
---|
595 | // dataMove = allIndexesLength;
|
---|
596 | dataMove = 0;
|
---|
597 | }
|
---|
598 | else {
|
---|
599 | dataMove = 0;
|
---|
600 | }
|
---|
601 | /* are indexLength and dataLength within limits? */
|
---|
602 | if (
|
---|
603 | /* for unshifted indexLength */
|
---|
604 | allIndexesLength > UTRIE2_MAX_INDEX_LENGTH ||
|
---|
605 | /* for unshifted dataNullOffset */
|
---|
606 | dataMove + this.dataNullOffset > 0xffff ||
|
---|
607 | /* for unshifted 2-byte UTF-8 index-2 values */
|
---|
608 | dataMove + UNEWTRIE2_DATA_0800_OFFSET > 0xffff ||
|
---|
609 | /* for shiftedDataLength */
|
---|
610 | dataMove + this.dataLength > UTRIE2_MAX_DATA_LENGTH) {
|
---|
611 | throw new Error('Trie data is too large.');
|
---|
612 | }
|
---|
613 | var index = new Uint16Array(allIndexesLength);
|
---|
614 | /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
|
---|
615 | var destIdx = 0;
|
---|
616 | for (i = 0; i < UTRIE2_INDEX_2_BMP_LENGTH; i++) {
|
---|
617 | index[destIdx++] = (this.index2[i] + dataMove) >> UTRIE2_INDEX_SHIFT;
|
---|
618 | }
|
---|
619 | /* write UTF-8 2-byte index-2 values, not right-shifted */
|
---|
620 | for (i = 0; i < 0xc2 - 0xc0; ++i) {
|
---|
621 | /* C0..C1 */
|
---|
622 | index[destIdx++] = dataMove + UTRIE2_BAD_UTF8_DATA_OFFSET;
|
---|
623 | }
|
---|
624 | for (; i < 0xe0 - 0xc0; ++i) {
|
---|
625 | /* C2..DF */
|
---|
626 | index[destIdx++] = dataMove + this.index2[i << (6 - UTRIE2_SHIFT_2)];
|
---|
627 | }
|
---|
628 | if (this.highStart > 0x10000) {
|
---|
629 | var index1Length = (this.highStart - 0x10000) >> UTRIE2_SHIFT_1;
|
---|
630 | var index2Offset = UTRIE2_INDEX_2_BMP_LENGTH + UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length;
|
---|
631 | /* write 16-bit index-1 values for supplementary code points */
|
---|
632 | for (i = 0; i < index1Length; i++) {
|
---|
633 | index[destIdx++] = UTRIE2_INDEX_2_OFFSET + this.index1[i + UTRIE2_OMITTED_BMP_INDEX_1_LENGTH];
|
---|
634 | }
|
---|
635 | /*
|
---|
636 | * write the index-2 array values for supplementary code points,
|
---|
637 | * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
|
---|
638 | */
|
---|
639 | for (i = 0; i < this.index2Length - index2Offset; i++) {
|
---|
640 | index[destIdx++] = (dataMove + this.index2[index2Offset + i]) >> UTRIE2_INDEX_SHIFT;
|
---|
641 | }
|
---|
642 | }
|
---|
643 | /* write the 16/32-bit data array */
|
---|
644 | switch (valueBits) {
|
---|
645 | case BITS_16:
|
---|
646 | /* write 16-bit data values */
|
---|
647 | var data16 = new Uint16Array(this.dataLength);
|
---|
648 | for (i = 0; i < this.dataLength; i++) {
|
---|
649 | data16[i] = this.data[i];
|
---|
650 | }
|
---|
651 | return new Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data16);
|
---|
652 | case BITS_32:
|
---|
653 | /* write 32-bit data values */
|
---|
654 | var data32 = new Uint32Array(this.dataLength);
|
---|
655 | for (i = 0; i < this.dataLength; i++) {
|
---|
656 | data32[i] = this.data[i];
|
---|
657 | }
|
---|
658 | return new Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data32);
|
---|
659 | default:
|
---|
660 | throw new Error('Bits should be either 16 or 32');
|
---|
661 | }
|
---|
662 | };
|
---|
663 | /*
|
---|
664 | * Find the start of the last range in the trie by enumerating backward.
|
---|
665 | * Indexes for supplementary code points higher than this will be omitted.
|
---|
666 | */
|
---|
667 | TrieBuilder.prototype.findHighStart = function (highValue) {
|
---|
668 | var value;
|
---|
669 | var i2, j, i2Block, prevI2Block, block, prevBlock;
|
---|
670 | /* set variables for previous range */
|
---|
671 | if (highValue === this.initialValue) {
|
---|
672 | prevI2Block = this.index2NullOffset;
|
---|
673 | prevBlock = this.dataNullOffset;
|
---|
674 | }
|
---|
675 | else {
|
---|
676 | prevI2Block = -1;
|
---|
677 | prevBlock = -1;
|
---|
678 | }
|
---|
679 | var prev = 0x110000;
|
---|
680 | /* enumerate index-2 blocks */
|
---|
681 | var i1 = UNEWTRIE2_INDEX_1_LENGTH;
|
---|
682 | var c = prev;
|
---|
683 | while (c > 0) {
|
---|
684 | i2Block = this.index1[--i1];
|
---|
685 | if (i2Block === prevI2Block) {
|
---|
686 | /* the index-2 block is the same as the previous one, and filled with highValue */
|
---|
687 | c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
|
---|
688 | continue;
|
---|
689 | }
|
---|
690 | prevI2Block = i2Block;
|
---|
691 | if (i2Block === this.index2NullOffset) {
|
---|
692 | /* this is the null index-2 block */
|
---|
693 | if (highValue !== this.initialValue) {
|
---|
694 | return c;
|
---|
695 | }
|
---|
696 | c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
|
---|
697 | }
|
---|
698 | else {
|
---|
699 | /* enumerate data blocks for one index-2 block */
|
---|
700 | for (i2 = UTRIE2_INDEX_2_BLOCK_LENGTH; i2 > 0;) {
|
---|
701 | block = this.index2[i2Block + --i2];
|
---|
702 | if (block === prevBlock) {
|
---|
703 | /* the block is the same as the previous one, and filled with highValue */
|
---|
704 | c -= UTRIE2_DATA_BLOCK_LENGTH;
|
---|
705 | continue;
|
---|
706 | }
|
---|
707 | prevBlock = block;
|
---|
708 | if (block === this.dataNullOffset) {
|
---|
709 | /* this is the null data block */
|
---|
710 | if (highValue !== this.initialValue) {
|
---|
711 | return c;
|
---|
712 | }
|
---|
713 | c -= UTRIE2_DATA_BLOCK_LENGTH;
|
---|
714 | }
|
---|
715 | else {
|
---|
716 | for (j = UTRIE2_DATA_BLOCK_LENGTH; j > 0;) {
|
---|
717 | value = this.data[block + --j];
|
---|
718 | if (value !== highValue) {
|
---|
719 | return c;
|
---|
720 | }
|
---|
721 | --c;
|
---|
722 | }
|
---|
723 | }
|
---|
724 | }
|
---|
725 | }
|
---|
726 | }
|
---|
727 | /* deliver last range */
|
---|
728 | return 0;
|
---|
729 | };
|
---|
730 | /*
|
---|
731 | * Compact a build-time trie.
|
---|
732 | *
|
---|
733 | * The compaction
|
---|
734 | * - removes blocks that are identical with earlier ones
|
---|
735 | * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
|
---|
736 | * - moves blocks in steps of the data granularity
|
---|
737 | * - moves and overlaps blocks that overlap with multiple values in the overlap region
|
---|
738 | *
|
---|
739 | * It does not
|
---|
740 | * - try to move and overlap blocks that are not already adjacent
|
---|
741 | */
|
---|
742 | TrieBuilder.prototype.compactData = function () {
|
---|
743 | var start, movedStart;
|
---|
744 | var blockLength, overlap;
|
---|
745 | var i, mapIndex, blockCount;
|
---|
746 | /* do not compact linear-ASCII data */
|
---|
747 | var newStart = UTRIE2_DATA_START_OFFSET;
|
---|
748 | for (start = 0, i = 0; start < newStart; start += UTRIE2_DATA_BLOCK_LENGTH, ++i) {
|
---|
749 | this.map[i] = start;
|
---|
750 | }
|
---|
751 | /*
|
---|
752 | * Start with a block length of 64 for 2-byte UTF-8,
|
---|
753 | * then switch to UTRIE2_DATA_BLOCK_LENGTH.
|
---|
754 | */
|
---|
755 | blockLength = 64;
|
---|
756 | blockCount = blockLength >> UTRIE2_SHIFT_2;
|
---|
757 | for (start = newStart; start < this.dataLength;) {
|
---|
758 | /*
|
---|
759 | * start: index of first entry of current block
|
---|
760 | * newStart: index where the current block is to be moved
|
---|
761 | * (right after current end of already-compacted data)
|
---|
762 | */
|
---|
763 | if (start === UNEWTRIE2_DATA_0800_OFFSET) {
|
---|
764 | blockLength = UTRIE2_DATA_BLOCK_LENGTH;
|
---|
765 | blockCount = 1;
|
---|
766 | }
|
---|
767 | /* skip blocks that are not used */
|
---|
768 | if (this.map[start >> UTRIE2_SHIFT_2] <= 0) {
|
---|
769 | /* advance start to the next block */
|
---|
770 | start += blockLength;
|
---|
771 | /* leave newStart with the previous block! */
|
---|
772 | continue;
|
---|
773 | }
|
---|
774 | /* search for an identical block */
|
---|
775 | movedStart = this.findSameDataBlock(newStart, start, blockLength);
|
---|
776 | if (movedStart >= 0) {
|
---|
777 | /* found an identical block, set the other block's index value for the current block */
|
---|
778 | for (i = blockCount, mapIndex = start >> UTRIE2_SHIFT_2; i > 0; --i) {
|
---|
779 | this.map[mapIndex++] = movedStart;
|
---|
780 | movedStart += UTRIE2_DATA_BLOCK_LENGTH;
|
---|
781 | }
|
---|
782 | /* advance start to the next block */
|
---|
783 | start += blockLength;
|
---|
784 | /* leave newStart with the previous block! */
|
---|
785 | continue;
|
---|
786 | }
|
---|
787 | /* see if the beginning of this block can be overlapped with the end of the previous block */
|
---|
788 | /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
|
---|
789 | for (overlap = blockLength - UTRIE2_DATA_GRANULARITY; overlap > 0 && !equalInt(this.data, newStart - overlap, start, overlap); overlap -= UTRIE2_DATA_GRANULARITY) { }
|
---|
790 | if (overlap > 0 || newStart < start) {
|
---|
791 | /* some overlap, or just move the whole block */
|
---|
792 | movedStart = newStart - overlap;
|
---|
793 | for (i = blockCount, mapIndex = start >> UTRIE2_SHIFT_2; i > 0; --i) {
|
---|
794 | this.map[mapIndex++] = movedStart;
|
---|
795 | movedStart += UTRIE2_DATA_BLOCK_LENGTH;
|
---|
796 | }
|
---|
797 | /* move the non-overlapping indexes to their new positions */
|
---|
798 | start += overlap;
|
---|
799 | for (i = blockLength - overlap; i > 0; --i) {
|
---|
800 | this.data[newStart++] = this.data[start++];
|
---|
801 | }
|
---|
802 | }
|
---|
803 | else {
|
---|
804 | /* no overlap && newStart==start */
|
---|
805 | for (i = blockCount, mapIndex = start >> UTRIE2_SHIFT_2; i > 0; --i) {
|
---|
806 | this.map[mapIndex++] = start;
|
---|
807 | start += UTRIE2_DATA_BLOCK_LENGTH;
|
---|
808 | }
|
---|
809 | newStart = start;
|
---|
810 | }
|
---|
811 | }
|
---|
812 | /* now adjust the index-2 table */
|
---|
813 | for (i = 0; i < this.index2Length; ++i) {
|
---|
814 | if (i === UNEWTRIE2_INDEX_GAP_OFFSET) {
|
---|
815 | /* Gap indexes are invalid (-1). Skip over the gap. */
|
---|
816 | i += UNEWTRIE2_INDEX_GAP_LENGTH;
|
---|
817 | }
|
---|
818 | this.index2[i] = this.map[this.index2[i] >> UTRIE2_SHIFT_2];
|
---|
819 | }
|
---|
820 | this.dataNullOffset = this.map[this.dataNullOffset >> UTRIE2_SHIFT_2];
|
---|
821 | /* ensure dataLength alignment */
|
---|
822 | while ((newStart & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
|
---|
823 | this.data[newStart++] = this.initialValue;
|
---|
824 | }
|
---|
825 | this.dataLength = newStart;
|
---|
826 | };
|
---|
827 | TrieBuilder.prototype.findSameDataBlock = function (dataLength, otherBlock, blockLength) {
|
---|
828 | var block = 0;
|
---|
829 | /* ensure that we do not even partially get past dataLength */
|
---|
830 | dataLength -= blockLength;
|
---|
831 | for (; block <= dataLength; block += UTRIE2_DATA_GRANULARITY) {
|
---|
832 | if (equalInt(this.data, block, otherBlock, blockLength)) {
|
---|
833 | return block;
|
---|
834 | }
|
---|
835 | }
|
---|
836 | return -1;
|
---|
837 | };
|
---|
838 | TrieBuilder.prototype.compactTrie = function () {
|
---|
839 | var highValue = this.get(0x10ffff);
|
---|
840 | /* find highStart and round it up */
|
---|
841 | var localHighStart = this.findHighStart(highValue);
|
---|
842 | localHighStart = (localHighStart + (UTRIE2_CP_PER_INDEX_1_ENTRY - 1)) & ~(UTRIE2_CP_PER_INDEX_1_ENTRY - 1);
|
---|
843 | if (localHighStart === 0x110000) {
|
---|
844 | highValue = this.errorValue;
|
---|
845 | }
|
---|
846 | /*
|
---|
847 | * Set trie->highStart only after utrie2_get32(trie, highStart).
|
---|
848 | * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
|
---|
849 | */
|
---|
850 | this.highStart = localHighStart;
|
---|
851 | if (this.highStart < 0x110000) {
|
---|
852 | /* Blank out [highStart..10ffff] to release associated data blocks. */
|
---|
853 | var suppHighStart = this.highStart <= 0x10000 ? 0x10000 : this.highStart;
|
---|
854 | this.setRange(suppHighStart, 0x10ffff, this.initialValue, true);
|
---|
855 | }
|
---|
856 | this.compactData();
|
---|
857 | if (this.highStart > 0x10000) {
|
---|
858 | this.compactIndex2();
|
---|
859 | }
|
---|
860 | /*
|
---|
861 | * Store the highValue in the data array and round up the dataLength.
|
---|
862 | * Must be done after compactData() because that assumes that dataLength
|
---|
863 | * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
|
---|
864 | */
|
---|
865 | this.data[this.dataLength++] = highValue;
|
---|
866 | while ((this.dataLength & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
|
---|
867 | this.data[this.dataLength++] = this.initialValue;
|
---|
868 | }
|
---|
869 | this.isCompacted = true;
|
---|
870 | };
|
---|
871 | TrieBuilder.prototype.compactIndex2 = function () {
|
---|
872 | var i, start, movedStart, overlap;
|
---|
873 | /* do not compact linear-BMP index-2 blocks */
|
---|
874 | var newStart = UTRIE2_INDEX_2_BMP_LENGTH;
|
---|
875 | for (start = 0, i = 0; start < newStart; start += UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
|
---|
876 | this.map[i] = start;
|
---|
877 | }
|
---|
878 | /* Reduce the index table gap to what will be needed at runtime. */
|
---|
879 | newStart += UTRIE2_UTF8_2B_INDEX_2_LENGTH + ((this.highStart - 0x10000) >> UTRIE2_SHIFT_1);
|
---|
880 | for (start = UNEWTRIE2_INDEX_2_NULL_OFFSET; start < this.index2Length;) {
|
---|
881 | /*
|
---|
882 | * start: index of first entry of current block
|
---|
883 | * newStart: index where the current block is to be moved
|
---|
884 | * (right after current end of already-compacted data)
|
---|
885 | */
|
---|
886 | /* search for an identical block */
|
---|
887 | if ((movedStart = this.findSameIndex2Block(newStart, start)) >= 0) {
|
---|
888 | /* found an identical block, set the other block's index value for the current block */
|
---|
889 | this.map[start >> UTRIE2_SHIFT_1_2] = movedStart;
|
---|
890 | /* advance start to the next block */
|
---|
891 | start += UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
892 | /* leave newStart with the previous block! */
|
---|
893 | continue;
|
---|
894 | }
|
---|
895 | /* see if the beginning of this block can be overlapped with the end of the previous block */
|
---|
896 | /* look for maximum overlap with the previous, adjacent block */
|
---|
897 | for (overlap = UTRIE2_INDEX_2_BLOCK_LENGTH - 1; overlap > 0 && !equalInt(this.index2, newStart - overlap, start, overlap); --overlap) { }
|
---|
898 | if (overlap > 0 || newStart < start) {
|
---|
899 | /* some overlap, or just move the whole block */
|
---|
900 | this.map[start >> UTRIE2_SHIFT_1_2] = newStart - overlap;
|
---|
901 | /* move the non-overlapping indexes to their new positions */
|
---|
902 | start += overlap;
|
---|
903 | for (i = UTRIE2_INDEX_2_BLOCK_LENGTH - overlap; i > 0; --i) {
|
---|
904 | this.index2[newStart++] = this.index2[start++];
|
---|
905 | }
|
---|
906 | }
|
---|
907 | else {
|
---|
908 | /* no overlap && newStart==start */ this.map[start >> UTRIE2_SHIFT_1_2] = start;
|
---|
909 | start += UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
910 | newStart = start;
|
---|
911 | }
|
---|
912 | }
|
---|
913 | /* now adjust the index-1 table */
|
---|
914 | for (i = 0; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
|
---|
915 | this.index1[i] = this.map[this.index1[i] >> UTRIE2_SHIFT_1_2];
|
---|
916 | }
|
---|
917 | this.index2NullOffset = this.map[this.index2NullOffset >> UTRIE2_SHIFT_1_2];
|
---|
918 | /*
|
---|
919 | * Ensure data table alignment:
|
---|
920 | * Needs to be granularity-aligned for 16-bit trie
|
---|
921 | * (so that dataMove will be down-shiftable),
|
---|
922 | * and 2-aligned for uint32_t data.
|
---|
923 | */
|
---|
924 | while ((newStart & ((UTRIE2_DATA_GRANULARITY - 1) | 1)) !== 0) {
|
---|
925 | /* Arbitrary value: 0x3fffc not possible for real data. */
|
---|
926 | this.index2[newStart++] = 0x0000ffff << UTRIE2_INDEX_SHIFT;
|
---|
927 | }
|
---|
928 | this.index2Length = newStart;
|
---|
929 | };
|
---|
930 | TrieBuilder.prototype.findSameIndex2Block = function (index2Length, otherBlock) {
|
---|
931 | /* ensure that we do not even partially get past index2Length */
|
---|
932 | index2Length -= UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
933 | for (var block = 0; block <= index2Length; ++block) {
|
---|
934 | if (equalInt(this.index2, block, otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
|
---|
935 | return block;
|
---|
936 | }
|
---|
937 | }
|
---|
938 | return -1;
|
---|
939 | };
|
---|
940 | TrieBuilder.prototype._set = function (c, forLSCP, value) {
|
---|
941 | if (this.isCompacted) {
|
---|
942 | throw new Error('Trie was already compacted');
|
---|
943 | }
|
---|
944 | var block = this.getDataBlock(c, forLSCP);
|
---|
945 | this.data[block + (c & UTRIE2_DATA_MASK)] = value;
|
---|
946 | return this;
|
---|
947 | };
|
---|
948 | TrieBuilder.prototype.writeBlock = function (block, value) {
|
---|
949 | var limit = block + UTRIE2_DATA_BLOCK_LENGTH;
|
---|
950 | while (block < limit) {
|
---|
951 | this.data[block++] = value;
|
---|
952 | }
|
---|
953 | };
|
---|
954 | TrieBuilder.prototype.isInNullBlock = function (c, forLSCP) {
|
---|
955 | var i2 = isHighSurrogate(c) && forLSCP
|
---|
956 | ? UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> UTRIE2_SHIFT_2) + (c >> UTRIE2_SHIFT_2)
|
---|
957 | : this.index1[c >> UTRIE2_SHIFT_1] + ((c >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK);
|
---|
958 | var block = this.index2[i2];
|
---|
959 | return block === this.dataNullOffset;
|
---|
960 | };
|
---|
961 | TrieBuilder.prototype.fillBlock = function (block, start, limit, value, initialValue, overwrite) {
|
---|
962 | var pLimit = block + limit;
|
---|
963 | if (overwrite) {
|
---|
964 | for (var i = block + start; i < pLimit; i++) {
|
---|
965 | this.data[i] = value;
|
---|
966 | }
|
---|
967 | }
|
---|
968 | else {
|
---|
969 | for (var i = block + start; i < pLimit; i++) {
|
---|
970 | if (this.data[i] === initialValue) {
|
---|
971 | this.data[i] = value;
|
---|
972 | }
|
---|
973 | }
|
---|
974 | }
|
---|
975 | };
|
---|
976 | TrieBuilder.prototype.setIndex2Entry = function (i2, block) {
|
---|
977 | ++this.map[block >> UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
|
---|
978 | var oldBlock = this.index2[i2];
|
---|
979 | if (0 === --this.map[oldBlock >> UTRIE2_SHIFT_2]) {
|
---|
980 | this.releaseDataBlock(oldBlock);
|
---|
981 | }
|
---|
982 | this.index2[i2] = block;
|
---|
983 | };
|
---|
984 | TrieBuilder.prototype.releaseDataBlock = function (block) {
|
---|
985 | /* put this block at the front of the free-block chain */
|
---|
986 | this.map[block >> UTRIE2_SHIFT_2] = -this.firstFreeBlock;
|
---|
987 | this.firstFreeBlock = block;
|
---|
988 | };
|
---|
989 | TrieBuilder.prototype.getDataBlock = function (c, forLSCP) {
|
---|
990 | var i2 = this.getIndex2Block(c, forLSCP);
|
---|
991 | i2 += (c >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
|
---|
992 | var oldBlock = this.index2[i2];
|
---|
993 | if (this.isWritableBlock(oldBlock)) {
|
---|
994 | return oldBlock;
|
---|
995 | }
|
---|
996 | /* allocate a new data block */
|
---|
997 | var newBlock = this.allocDataBlock(oldBlock);
|
---|
998 | this.setIndex2Entry(i2, newBlock);
|
---|
999 | return newBlock;
|
---|
1000 | };
|
---|
1001 | TrieBuilder.prototype.isWritableBlock = function (block) {
|
---|
1002 | return block !== this.dataNullOffset && 1 === this.map[block >> UTRIE2_SHIFT_2];
|
---|
1003 | };
|
---|
1004 | TrieBuilder.prototype.getIndex2Block = function (c, forLSCP) {
|
---|
1005 | if (c >= 0xd800 && c < 0xdc00 && forLSCP) {
|
---|
1006 | return UTRIE2_LSCP_INDEX_2_OFFSET;
|
---|
1007 | }
|
---|
1008 | var i1 = c >> UTRIE2_SHIFT_1;
|
---|
1009 | var i2 = this.index1[i1];
|
---|
1010 | if (i2 === this.index2NullOffset) {
|
---|
1011 | i2 = this.allocIndex2Block();
|
---|
1012 | this.index1[i1] = i2;
|
---|
1013 | }
|
---|
1014 | return i2;
|
---|
1015 | };
|
---|
1016 | TrieBuilder.prototype.allocDataBlock = function (copyBlock) {
|
---|
1017 | var newBlock;
|
---|
1018 | if (this.firstFreeBlock !== 0) {
|
---|
1019 | /* get the first free block */
|
---|
1020 | newBlock = this.firstFreeBlock;
|
---|
1021 | this.firstFreeBlock = -this.map[newBlock >> UTRIE2_SHIFT_2];
|
---|
1022 | }
|
---|
1023 | else {
|
---|
1024 | /* get a new block from the high end */
|
---|
1025 | newBlock = this.dataLength;
|
---|
1026 | var newTop = newBlock + UTRIE2_DATA_BLOCK_LENGTH;
|
---|
1027 | if (newTop > this.dataCapacity) {
|
---|
1028 | var capacity = void 0;
|
---|
1029 | /* out of memory in the data array */
|
---|
1030 | if (this.dataCapacity < UNEWTRIE2_MEDIUM_DATA_LENGTH) {
|
---|
1031 | capacity = UNEWTRIE2_MEDIUM_DATA_LENGTH;
|
---|
1032 | }
|
---|
1033 | else if (this.dataCapacity < UNEWTRIE2_MAX_DATA_LENGTH) {
|
---|
1034 | capacity = UNEWTRIE2_MAX_DATA_LENGTH;
|
---|
1035 | }
|
---|
1036 | else {
|
---|
1037 | /*
|
---|
1038 | * Should never occur.
|
---|
1039 | * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
|
---|
1040 | * or the code writes more values than should be possible.
|
---|
1041 | */
|
---|
1042 | throw new Error('Internal error in Trie creation.');
|
---|
1043 | }
|
---|
1044 | var newData = new Uint32Array(capacity);
|
---|
1045 | newData.set(this.data.subarray(0, this.dataLength));
|
---|
1046 | this.data = newData;
|
---|
1047 | this.dataCapacity = capacity;
|
---|
1048 | }
|
---|
1049 | this.dataLength = newTop;
|
---|
1050 | }
|
---|
1051 | this.data.set(this.data.subarray(copyBlock, copyBlock + UTRIE2_DATA_BLOCK_LENGTH), newBlock);
|
---|
1052 | this.map[newBlock >> UTRIE2_SHIFT_2] = 0;
|
---|
1053 | return newBlock;
|
---|
1054 | };
|
---|
1055 | TrieBuilder.prototype.allocIndex2Block = function () {
|
---|
1056 | var newBlock = this.index2Length;
|
---|
1057 | var newTop = newBlock + UTRIE2_INDEX_2_BLOCK_LENGTH;
|
---|
1058 | if (newTop > this.index2.length) {
|
---|
1059 | throw new Error('Internal error in Trie creation.');
|
---|
1060 | /*
|
---|
1061 | * Should never occur.
|
---|
1062 | * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
|
---|
1063 | * or the code writes more values than should be possible.
|
---|
1064 | */
|
---|
1065 | }
|
---|
1066 | this.index2Length = newTop;
|
---|
1067 | this.index2.set(this.index2.subarray(this.index2NullOffset, this.index2NullOffset + UTRIE2_INDEX_2_BLOCK_LENGTH), newBlock);
|
---|
1068 | return newBlock;
|
---|
1069 | };
|
---|
1070 | return TrieBuilder;
|
---|
1071 | }());
|
---|
1072 | var serializeBase64 = function (trie) {
|
---|
1073 | var index = trie.index;
|
---|
1074 | var data = trie.data;
|
---|
1075 | if (!(index instanceof Uint16Array) || !(data instanceof Uint16Array || data instanceof Uint32Array)) {
|
---|
1076 | throw new Error('TrieBuilder serializer only support TypedArrays');
|
---|
1077 | }
|
---|
1078 | var headerLength = Uint32Array.BYTES_PER_ELEMENT * 6;
|
---|
1079 | var bufferLength = headerLength + index.byteLength + data.byteLength;
|
---|
1080 | var buffer = new ArrayBuffer(Math.ceil(bufferLength / 4) * 4);
|
---|
1081 | var view32 = new Uint32Array(buffer);
|
---|
1082 | var view16 = new Uint16Array(buffer);
|
---|
1083 | view32[0] = trie.initialValue;
|
---|
1084 | view32[1] = trie.errorValue;
|
---|
1085 | view32[2] = trie.highStart;
|
---|
1086 | view32[3] = trie.highValueIndex;
|
---|
1087 | view32[4] = index.byteLength;
|
---|
1088 | // $FlowFixMe
|
---|
1089 | view32[5] = data.BYTES_PER_ELEMENT;
|
---|
1090 | view16.set(index, headerLength / Uint16Array.BYTES_PER_ELEMENT);
|
---|
1091 | if (data.BYTES_PER_ELEMENT === Uint16Array.BYTES_PER_ELEMENT) {
|
---|
1092 | view16.set(data, (headerLength + index.byteLength) / Uint16Array.BYTES_PER_ELEMENT);
|
---|
1093 | }
|
---|
1094 | else {
|
---|
1095 | view32.set(data, Math.ceil((headerLength + index.byteLength) / Uint32Array.BYTES_PER_ELEMENT));
|
---|
1096 | }
|
---|
1097 | return [encode(new Uint8Array(buffer)), buffer.byteLength];
|
---|
1098 | };
|
---|
1099 |
|
---|
1100 | export { Trie, TrieBuilder, createTrieFromBase64, serializeBase64 };
|
---|
1101 | //# sourceMappingURL=utrie.es5.js.map
|
---|