1 | var TokenStream = require('../common/TokenStream');
|
---|
2 | var adoptBuffer = require('../common/adopt-buffer');
|
---|
3 |
|
---|
4 | var constants = require('./const');
|
---|
5 | var TYPE = constants.TYPE;
|
---|
6 |
|
---|
7 | var charCodeDefinitions = require('./char-code-definitions');
|
---|
8 | var isNewline = charCodeDefinitions.isNewline;
|
---|
9 | var isName = charCodeDefinitions.isName;
|
---|
10 | var isValidEscape = charCodeDefinitions.isValidEscape;
|
---|
11 | var isNumberStart = charCodeDefinitions.isNumberStart;
|
---|
12 | var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
|
---|
13 | var charCodeCategory = charCodeDefinitions.charCodeCategory;
|
---|
14 | var isBOM = charCodeDefinitions.isBOM;
|
---|
15 |
|
---|
16 | var utils = require('./utils');
|
---|
17 | var cmpStr = utils.cmpStr;
|
---|
18 | var getNewlineLength = utils.getNewlineLength;
|
---|
19 | var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
|
---|
20 | var consumeEscaped = utils.consumeEscaped;
|
---|
21 | var consumeName = utils.consumeName;
|
---|
22 | var consumeNumber = utils.consumeNumber;
|
---|
23 | var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
|
---|
24 |
|
---|
25 | var OFFSET_MASK = 0x00FFFFFF;
|
---|
26 | var TYPE_SHIFT = 24;
|
---|
27 |
|
---|
28 | function tokenize(source, stream) {
|
---|
29 | function getCharCode(offset) {
|
---|
30 | return offset < sourceLength ? source.charCodeAt(offset) : 0;
|
---|
31 | }
|
---|
32 |
|
---|
33 | // § 4.3.3. Consume a numeric token
|
---|
34 | function consumeNumericToken() {
|
---|
35 | // Consume a number and let number be the result.
|
---|
36 | offset = consumeNumber(source, offset);
|
---|
37 |
|
---|
38 | // If the next 3 input code points would start an identifier, then:
|
---|
39 | if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
40 | // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
|
---|
41 | // Consume a name. Set the <dimension-token>’s unit to the returned value.
|
---|
42 | // Return the <dimension-token>.
|
---|
43 | type = TYPE.Dimension;
|
---|
44 | offset = consumeName(source, offset);
|
---|
45 | return;
|
---|
46 | }
|
---|
47 |
|
---|
48 | // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
|
---|
49 | if (getCharCode(offset) === 0x0025) {
|
---|
50 | // Create a <percentage-token> with the same value as number, and return it.
|
---|
51 | type = TYPE.Percentage;
|
---|
52 | offset++;
|
---|
53 | return;
|
---|
54 | }
|
---|
55 |
|
---|
56 | // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
|
---|
57 | type = TYPE.Number;
|
---|
58 | }
|
---|
59 |
|
---|
60 | // § 4.3.4. Consume an ident-like token
|
---|
61 | function consumeIdentLikeToken() {
|
---|
62 | const nameStartOffset = offset;
|
---|
63 |
|
---|
64 | // Consume a name, and let string be the result.
|
---|
65 | offset = consumeName(source, offset);
|
---|
66 |
|
---|
67 | // If string’s value is an ASCII case-insensitive match for "url",
|
---|
68 | // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
|
---|
69 | if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
|
---|
70 | // While the next two input code points are whitespace, consume the next input code point.
|
---|
71 | offset = findWhiteSpaceEnd(source, offset + 1);
|
---|
72 |
|
---|
73 | // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
|
---|
74 | // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
|
---|
75 | // then create a <function-token> with its value set to string and return it.
|
---|
76 | if (getCharCode(offset) === 0x0022 ||
|
---|
77 | getCharCode(offset) === 0x0027) {
|
---|
78 | type = TYPE.Function;
|
---|
79 | offset = nameStartOffset + 4;
|
---|
80 | return;
|
---|
81 | }
|
---|
82 |
|
---|
83 | // Otherwise, consume a url token, and return it.
|
---|
84 | consumeUrlToken();
|
---|
85 | return;
|
---|
86 | }
|
---|
87 |
|
---|
88 | // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
|
---|
89 | // Create a <function-token> with its value set to string and return it.
|
---|
90 | if (getCharCode(offset) === 0x0028) {
|
---|
91 | type = TYPE.Function;
|
---|
92 | offset++;
|
---|
93 | return;
|
---|
94 | }
|
---|
95 |
|
---|
96 | // Otherwise, create an <ident-token> with its value set to string and return it.
|
---|
97 | type = TYPE.Ident;
|
---|
98 | }
|
---|
99 |
|
---|
100 | // § 4.3.5. Consume a string token
|
---|
101 | function consumeStringToken(endingCodePoint) {
|
---|
102 | // This algorithm may be called with an ending code point, which denotes the code point
|
---|
103 | // that ends the string. If an ending code point is not specified,
|
---|
104 | // the current input code point is used.
|
---|
105 | if (!endingCodePoint) {
|
---|
106 | endingCodePoint = getCharCode(offset++);
|
---|
107 | }
|
---|
108 |
|
---|
109 | // Initially create a <string-token> with its value set to the empty string.
|
---|
110 | type = TYPE.String;
|
---|
111 |
|
---|
112 | // Repeatedly consume the next input code point from the stream:
|
---|
113 | for (; offset < source.length; offset++) {
|
---|
114 | var code = source.charCodeAt(offset);
|
---|
115 |
|
---|
116 | switch (charCodeCategory(code)) {
|
---|
117 | // ending code point
|
---|
118 | case endingCodePoint:
|
---|
119 | // Return the <string-token>.
|
---|
120 | offset++;
|
---|
121 | return;
|
---|
122 |
|
---|
123 | // EOF
|
---|
124 | case charCodeCategory.Eof:
|
---|
125 | // This is a parse error. Return the <string-token>.
|
---|
126 | return;
|
---|
127 |
|
---|
128 | // newline
|
---|
129 | case charCodeCategory.WhiteSpace:
|
---|
130 | if (isNewline(code)) {
|
---|
131 | // This is a parse error. Reconsume the current input code point,
|
---|
132 | // create a <bad-string-token>, and return it.
|
---|
133 | offset += getNewlineLength(source, offset, code);
|
---|
134 | type = TYPE.BadString;
|
---|
135 | return;
|
---|
136 | }
|
---|
137 | break;
|
---|
138 |
|
---|
139 | // U+005C REVERSE SOLIDUS (\)
|
---|
140 | case 0x005C:
|
---|
141 | // If the next input code point is EOF, do nothing.
|
---|
142 | if (offset === source.length - 1) {
|
---|
143 | break;
|
---|
144 | }
|
---|
145 |
|
---|
146 | var nextCode = getCharCode(offset + 1);
|
---|
147 |
|
---|
148 | // Otherwise, if the next input code point is a newline, consume it.
|
---|
149 | if (isNewline(nextCode)) {
|
---|
150 | offset += getNewlineLength(source, offset + 1, nextCode);
|
---|
151 | } else if (isValidEscape(code, nextCode)) {
|
---|
152 | // Otherwise, (the stream starts with a valid escape) consume
|
---|
153 | // an escaped code point and append the returned code point to
|
---|
154 | // the <string-token>’s value.
|
---|
155 | offset = consumeEscaped(source, offset) - 1;
|
---|
156 | }
|
---|
157 | break;
|
---|
158 |
|
---|
159 | // anything else
|
---|
160 | // Append the current input code point to the <string-token>’s value.
|
---|
161 | }
|
---|
162 | }
|
---|
163 | }
|
---|
164 |
|
---|
165 | // § 4.3.6. Consume a url token
|
---|
166 | // Note: This algorithm assumes that the initial "url(" has already been consumed.
|
---|
167 | // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
|
---|
168 | // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
|
---|
169 | // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
|
---|
170 | function consumeUrlToken() {
|
---|
171 | // Initially create a <url-token> with its value set to the empty string.
|
---|
172 | type = TYPE.Url;
|
---|
173 |
|
---|
174 | // Consume as much whitespace as possible.
|
---|
175 | offset = findWhiteSpaceEnd(source, offset);
|
---|
176 |
|
---|
177 | // Repeatedly consume the next input code point from the stream:
|
---|
178 | for (; offset < source.length; offset++) {
|
---|
179 | var code = source.charCodeAt(offset);
|
---|
180 |
|
---|
181 | switch (charCodeCategory(code)) {
|
---|
182 | // U+0029 RIGHT PARENTHESIS ())
|
---|
183 | case 0x0029:
|
---|
184 | // Return the <url-token>.
|
---|
185 | offset++;
|
---|
186 | return;
|
---|
187 |
|
---|
188 | // EOF
|
---|
189 | case charCodeCategory.Eof:
|
---|
190 | // This is a parse error. Return the <url-token>.
|
---|
191 | return;
|
---|
192 |
|
---|
193 | // whitespace
|
---|
194 | case charCodeCategory.WhiteSpace:
|
---|
195 | // Consume as much whitespace as possible.
|
---|
196 | offset = findWhiteSpaceEnd(source, offset);
|
---|
197 |
|
---|
198 | // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
|
---|
199 | // consume it and return the <url-token>
|
---|
200 | // (if EOF was encountered, this is a parse error);
|
---|
201 | if (getCharCode(offset) === 0x0029 || offset >= source.length) {
|
---|
202 | if (offset < source.length) {
|
---|
203 | offset++;
|
---|
204 | }
|
---|
205 | return;
|
---|
206 | }
|
---|
207 |
|
---|
208 | // otherwise, consume the remnants of a bad url, create a <bad-url-token>,
|
---|
209 | // and return it.
|
---|
210 | offset = consumeBadUrlRemnants(source, offset);
|
---|
211 | type = TYPE.BadUrl;
|
---|
212 | return;
|
---|
213 |
|
---|
214 | // U+0022 QUOTATION MARK (")
|
---|
215 | // U+0027 APOSTROPHE (')
|
---|
216 | // U+0028 LEFT PARENTHESIS (()
|
---|
217 | // non-printable code point
|
---|
218 | case 0x0022:
|
---|
219 | case 0x0027:
|
---|
220 | case 0x0028:
|
---|
221 | case charCodeCategory.NonPrintable:
|
---|
222 | // This is a parse error. Consume the remnants of a bad url,
|
---|
223 | // create a <bad-url-token>, and return it.
|
---|
224 | offset = consumeBadUrlRemnants(source, offset);
|
---|
225 | type = TYPE.BadUrl;
|
---|
226 | return;
|
---|
227 |
|
---|
228 | // U+005C REVERSE SOLIDUS (\)
|
---|
229 | case 0x005C:
|
---|
230 | // If the stream starts with a valid escape, consume an escaped code point and
|
---|
231 | // append the returned code point to the <url-token>’s value.
|
---|
232 | if (isValidEscape(code, getCharCode(offset + 1))) {
|
---|
233 | offset = consumeEscaped(source, offset) - 1;
|
---|
234 | break;
|
---|
235 | }
|
---|
236 |
|
---|
237 | // Otherwise, this is a parse error. Consume the remnants of a bad url,
|
---|
238 | // create a <bad-url-token>, and return it.
|
---|
239 | offset = consumeBadUrlRemnants(source, offset);
|
---|
240 | type = TYPE.BadUrl;
|
---|
241 | return;
|
---|
242 |
|
---|
243 | // anything else
|
---|
244 | // Append the current input code point to the <url-token>’s value.
|
---|
245 | }
|
---|
246 | }
|
---|
247 | }
|
---|
248 |
|
---|
249 | if (!stream) {
|
---|
250 | stream = new TokenStream();
|
---|
251 | }
|
---|
252 |
|
---|
253 | // ensure source is a string
|
---|
254 | source = String(source || '');
|
---|
255 |
|
---|
256 | var sourceLength = source.length;
|
---|
257 | var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
|
---|
258 | var balance = adoptBuffer(stream.balance, sourceLength + 1);
|
---|
259 | var tokenCount = 0;
|
---|
260 | var start = isBOM(getCharCode(0));
|
---|
261 | var offset = start;
|
---|
262 | var balanceCloseType = 0;
|
---|
263 | var balanceStart = 0;
|
---|
264 | var balancePrev = 0;
|
---|
265 |
|
---|
266 | // https://drafts.csswg.org/css-syntax-3/#consume-token
|
---|
267 | // § 4.3.1. Consume a token
|
---|
268 | while (offset < sourceLength) {
|
---|
269 | var code = source.charCodeAt(offset);
|
---|
270 | var type = 0;
|
---|
271 |
|
---|
272 | balance[tokenCount] = sourceLength;
|
---|
273 |
|
---|
274 | switch (charCodeCategory(code)) {
|
---|
275 | // whitespace
|
---|
276 | case charCodeCategory.WhiteSpace:
|
---|
277 | // Consume as much whitespace as possible. Return a <whitespace-token>.
|
---|
278 | type = TYPE.WhiteSpace;
|
---|
279 | offset = findWhiteSpaceEnd(source, offset + 1);
|
---|
280 | break;
|
---|
281 |
|
---|
282 | // U+0022 QUOTATION MARK (")
|
---|
283 | case 0x0022:
|
---|
284 | // Consume a string token and return it.
|
---|
285 | consumeStringToken();
|
---|
286 | break;
|
---|
287 |
|
---|
288 | // U+0023 NUMBER SIGN (#)
|
---|
289 | case 0x0023:
|
---|
290 | // If the next input code point is a name code point or the next two input code points are a valid escape, then:
|
---|
291 | if (isName(getCharCode(offset + 1)) || isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
292 | // Create a <hash-token>.
|
---|
293 | type = TYPE.Hash;
|
---|
294 |
|
---|
295 | // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
|
---|
296 | // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
|
---|
297 | // // TODO: set id flag
|
---|
298 | // }
|
---|
299 |
|
---|
300 | // Consume a name, and set the <hash-token>’s value to the returned string.
|
---|
301 | offset = consumeName(source, offset + 1);
|
---|
302 |
|
---|
303 | // Return the <hash-token>.
|
---|
304 | } else {
|
---|
305 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
306 | type = TYPE.Delim;
|
---|
307 | offset++;
|
---|
308 | }
|
---|
309 |
|
---|
310 | break;
|
---|
311 |
|
---|
312 | // U+0027 APOSTROPHE (')
|
---|
313 | case 0x0027:
|
---|
314 | // Consume a string token and return it.
|
---|
315 | consumeStringToken();
|
---|
316 | break;
|
---|
317 |
|
---|
318 | // U+0028 LEFT PARENTHESIS (()
|
---|
319 | case 0x0028:
|
---|
320 | // Return a <(-token>.
|
---|
321 | type = TYPE.LeftParenthesis;
|
---|
322 | offset++;
|
---|
323 | break;
|
---|
324 |
|
---|
325 | // U+0029 RIGHT PARENTHESIS ())
|
---|
326 | case 0x0029:
|
---|
327 | // Return a <)-token>.
|
---|
328 | type = TYPE.RightParenthesis;
|
---|
329 | offset++;
|
---|
330 | break;
|
---|
331 |
|
---|
332 | // U+002B PLUS SIGN (+)
|
---|
333 | case 0x002B:
|
---|
334 | // If the input stream starts with a number, ...
|
---|
335 | if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
336 | // ... reconsume the current input code point, consume a numeric token, and return it.
|
---|
337 | consumeNumericToken();
|
---|
338 | } else {
|
---|
339 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
340 | type = TYPE.Delim;
|
---|
341 | offset++;
|
---|
342 | }
|
---|
343 | break;
|
---|
344 |
|
---|
345 | // U+002C COMMA (,)
|
---|
346 | case 0x002C:
|
---|
347 | // Return a <comma-token>.
|
---|
348 | type = TYPE.Comma;
|
---|
349 | offset++;
|
---|
350 | break;
|
---|
351 |
|
---|
352 | // U+002D HYPHEN-MINUS (-)
|
---|
353 | case 0x002D:
|
---|
354 | // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
|
---|
355 | if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
356 | consumeNumericToken();
|
---|
357 | } else {
|
---|
358 | // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
|
---|
359 | if (getCharCode(offset + 1) === 0x002D &&
|
---|
360 | getCharCode(offset + 2) === 0x003E) {
|
---|
361 | type = TYPE.CDC;
|
---|
362 | offset = offset + 3;
|
---|
363 | } else {
|
---|
364 | // Otherwise, if the input stream starts with an identifier, ...
|
---|
365 | if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
366 | // ... reconsume the current input code point, consume an ident-like token, and return it.
|
---|
367 | consumeIdentLikeToken();
|
---|
368 | } else {
|
---|
369 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
370 | type = TYPE.Delim;
|
---|
371 | offset++;
|
---|
372 | }
|
---|
373 | }
|
---|
374 | }
|
---|
375 | break;
|
---|
376 |
|
---|
377 | // U+002E FULL STOP (.)
|
---|
378 | case 0x002E:
|
---|
379 | // If the input stream starts with a number, ...
|
---|
380 | if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
|
---|
381 | // ... reconsume the current input code point, consume a numeric token, and return it.
|
---|
382 | consumeNumericToken();
|
---|
383 | } else {
|
---|
384 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
385 | type = TYPE.Delim;
|
---|
386 | offset++;
|
---|
387 | }
|
---|
388 |
|
---|
389 | break;
|
---|
390 |
|
---|
391 | // U+002F SOLIDUS (/)
|
---|
392 | case 0x002F:
|
---|
393 | // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
|
---|
394 | if (getCharCode(offset + 1) === 0x002A) {
|
---|
395 | // ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
|
---|
396 | // followed by a U+002F SOLIDUS (/), or up to an EOF code point.
|
---|
397 | type = TYPE.Comment;
|
---|
398 | offset = source.indexOf('*/', offset + 2) + 2;
|
---|
399 | if (offset === 1) {
|
---|
400 | offset = source.length;
|
---|
401 | }
|
---|
402 | } else {
|
---|
403 | type = TYPE.Delim;
|
---|
404 | offset++;
|
---|
405 | }
|
---|
406 | break;
|
---|
407 |
|
---|
408 | // U+003A COLON (:)
|
---|
409 | case 0x003A:
|
---|
410 | // Return a <colon-token>.
|
---|
411 | type = TYPE.Colon;
|
---|
412 | offset++;
|
---|
413 | break;
|
---|
414 |
|
---|
415 | // U+003B SEMICOLON (;)
|
---|
416 | case 0x003B:
|
---|
417 | // Return a <semicolon-token>.
|
---|
418 | type = TYPE.Semicolon;
|
---|
419 | offset++;
|
---|
420 | break;
|
---|
421 |
|
---|
422 | // U+003C LESS-THAN SIGN (<)
|
---|
423 | case 0x003C:
|
---|
424 | // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
|
---|
425 | if (getCharCode(offset + 1) === 0x0021 &&
|
---|
426 | getCharCode(offset + 2) === 0x002D &&
|
---|
427 | getCharCode(offset + 3) === 0x002D) {
|
---|
428 | // ... consume them and return a <CDO-token>.
|
---|
429 | type = TYPE.CDO;
|
---|
430 | offset = offset + 4;
|
---|
431 | } else {
|
---|
432 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
433 | type = TYPE.Delim;
|
---|
434 | offset++;
|
---|
435 | }
|
---|
436 |
|
---|
437 | break;
|
---|
438 |
|
---|
439 | // U+0040 COMMERCIAL AT (@)
|
---|
440 | case 0x0040:
|
---|
441 | // If the next 3 input code points would start an identifier, ...
|
---|
442 | if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
|
---|
443 | // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
|
---|
444 | type = TYPE.AtKeyword;
|
---|
445 | offset = consumeName(source, offset + 1);
|
---|
446 | } else {
|
---|
447 | // Otherwise, return a <delim-token> with its value set to the current input code point.
|
---|
448 | type = TYPE.Delim;
|
---|
449 | offset++;
|
---|
450 | }
|
---|
451 |
|
---|
452 | break;
|
---|
453 |
|
---|
454 | // U+005B LEFT SQUARE BRACKET ([)
|
---|
455 | case 0x005B:
|
---|
456 | // Return a <[-token>.
|
---|
457 | type = TYPE.LeftSquareBracket;
|
---|
458 | offset++;
|
---|
459 | break;
|
---|
460 |
|
---|
461 | // U+005C REVERSE SOLIDUS (\)
|
---|
462 | case 0x005C:
|
---|
463 | // If the input stream starts with a valid escape, ...
|
---|
464 | if (isValidEscape(code, getCharCode(offset + 1))) {
|
---|
465 | // ... reconsume the current input code point, consume an ident-like token, and return it.
|
---|
466 | consumeIdentLikeToken();
|
---|
467 | } else {
|
---|
468 | // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
|
---|
469 | type = TYPE.Delim;
|
---|
470 | offset++;
|
---|
471 | }
|
---|
472 | break;
|
---|
473 |
|
---|
474 | // U+005D RIGHT SQUARE BRACKET (])
|
---|
475 | case 0x005D:
|
---|
476 | // Return a <]-token>.
|
---|
477 | type = TYPE.RightSquareBracket;
|
---|
478 | offset++;
|
---|
479 | break;
|
---|
480 |
|
---|
481 | // U+007B LEFT CURLY BRACKET ({)
|
---|
482 | case 0x007B:
|
---|
483 | // Return a <{-token>.
|
---|
484 | type = TYPE.LeftCurlyBracket;
|
---|
485 | offset++;
|
---|
486 | break;
|
---|
487 |
|
---|
488 | // U+007D RIGHT CURLY BRACKET (})
|
---|
489 | case 0x007D:
|
---|
490 | // Return a <}-token>.
|
---|
491 | type = TYPE.RightCurlyBracket;
|
---|
492 | offset++;
|
---|
493 | break;
|
---|
494 |
|
---|
495 | // digit
|
---|
496 | case charCodeCategory.Digit:
|
---|
497 | // Reconsume the current input code point, consume a numeric token, and return it.
|
---|
498 | consumeNumericToken();
|
---|
499 | break;
|
---|
500 |
|
---|
501 | // name-start code point
|
---|
502 | case charCodeCategory.NameStart:
|
---|
503 | // Reconsume the current input code point, consume an ident-like token, and return it.
|
---|
504 | consumeIdentLikeToken();
|
---|
505 | break;
|
---|
506 |
|
---|
507 | // EOF
|
---|
508 | case charCodeCategory.Eof:
|
---|
509 | // Return an <EOF-token>.
|
---|
510 | break;
|
---|
511 |
|
---|
512 | // anything else
|
---|
513 | default:
|
---|
514 | // Return a <delim-token> with its value set to the current input code point.
|
---|
515 | type = TYPE.Delim;
|
---|
516 | offset++;
|
---|
517 | }
|
---|
518 |
|
---|
519 | switch (type) {
|
---|
520 | case balanceCloseType:
|
---|
521 | balancePrev = balanceStart & OFFSET_MASK;
|
---|
522 | balanceStart = balance[balancePrev];
|
---|
523 | balanceCloseType = balanceStart >> TYPE_SHIFT;
|
---|
524 | balance[tokenCount] = balancePrev;
|
---|
525 | balance[balancePrev++] = tokenCount;
|
---|
526 | for (; balancePrev < tokenCount; balancePrev++) {
|
---|
527 | if (balance[balancePrev] === sourceLength) {
|
---|
528 | balance[balancePrev] = tokenCount;
|
---|
529 | }
|
---|
530 | }
|
---|
531 | break;
|
---|
532 |
|
---|
533 | case TYPE.LeftParenthesis:
|
---|
534 | case TYPE.Function:
|
---|
535 | balance[tokenCount] = balanceStart;
|
---|
536 | balanceCloseType = TYPE.RightParenthesis;
|
---|
537 | balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
|
---|
538 | break;
|
---|
539 |
|
---|
540 | case TYPE.LeftSquareBracket:
|
---|
541 | balance[tokenCount] = balanceStart;
|
---|
542 | balanceCloseType = TYPE.RightSquareBracket;
|
---|
543 | balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
|
---|
544 | break;
|
---|
545 |
|
---|
546 | case TYPE.LeftCurlyBracket:
|
---|
547 | balance[tokenCount] = balanceStart;
|
---|
548 | balanceCloseType = TYPE.RightCurlyBracket;
|
---|
549 | balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
|
---|
550 | break;
|
---|
551 | }
|
---|
552 |
|
---|
553 | offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
|
---|
554 | }
|
---|
555 |
|
---|
556 | // finalize buffers
|
---|
557 | offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) | offset; // <EOF-token>
|
---|
558 | balance[tokenCount] = sourceLength;
|
---|
559 | balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
|
---|
560 | while (balanceStart !== 0) {
|
---|
561 | balancePrev = balanceStart & OFFSET_MASK;
|
---|
562 | balanceStart = balance[balancePrev];
|
---|
563 | balance[balancePrev] = sourceLength;
|
---|
564 | }
|
---|
565 |
|
---|
566 | // update stream
|
---|
567 | stream.source = source;
|
---|
568 | stream.firstCharOffset = start;
|
---|
569 | stream.offsetAndType = offsetAndType;
|
---|
570 | stream.tokenCount = tokenCount;
|
---|
571 | stream.balance = balance;
|
---|
572 | stream.reset();
|
---|
573 | stream.next();
|
---|
574 |
|
---|
575 | return stream;
|
---|
576 | }
|
---|
577 |
|
---|
578 | // extend tokenizer with constants
|
---|
579 | Object.keys(constants).forEach(function(key) {
|
---|
580 | tokenize[key] = constants[key];
|
---|
581 | });
|
---|
582 |
|
---|
583 | // extend tokenizer with static methods from utils
|
---|
584 | Object.keys(charCodeDefinitions).forEach(function(key) {
|
---|
585 | tokenize[key] = charCodeDefinitions[key];
|
---|
586 | });
|
---|
587 | Object.keys(utils).forEach(function(key) {
|
---|
588 | tokenize[key] = utils[key];
|
---|
589 | });
|
---|
590 |
|
---|
591 | module.exports = tokenize;
|
---|