source: imaps-frontend/node_modules/css-tree/lib/tokenizer/index.js@ d565449

main
Last change on this file since d565449 was d565449, checked in by stefan toskovski <stefantoska84@…>, 4 weeks ago

Update repo after prototype presentation

  • Property mode set to 100644
File size: 23.3 KB
RevLine 
[d565449]1var TokenStream = require('../common/TokenStream');
2var adoptBuffer = require('../common/adopt-buffer');
3
4var constants = require('./const');
5var TYPE = constants.TYPE;
6
7var charCodeDefinitions = require('./char-code-definitions');
8var isNewline = charCodeDefinitions.isNewline;
9var isName = charCodeDefinitions.isName;
10var isValidEscape = charCodeDefinitions.isValidEscape;
11var isNumberStart = charCodeDefinitions.isNumberStart;
12var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
13var charCodeCategory = charCodeDefinitions.charCodeCategory;
14var isBOM = charCodeDefinitions.isBOM;
15
16var utils = require('./utils');
17var cmpStr = utils.cmpStr;
18var getNewlineLength = utils.getNewlineLength;
19var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
20var consumeEscaped = utils.consumeEscaped;
21var consumeName = utils.consumeName;
22var consumeNumber = utils.consumeNumber;
23var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
24
25var OFFSET_MASK = 0x00FFFFFF;
26var TYPE_SHIFT = 24;
27
28function tokenize(source, stream) {
29 function getCharCode(offset) {
30 return offset < sourceLength ? source.charCodeAt(offset) : 0;
31 }
32
33 // § 4.3.3. Consume a numeric token
34 function consumeNumericToken() {
35 // Consume a number and let number be the result.
36 offset = consumeNumber(source, offset);
37
38 // If the next 3 input code points would start an identifier, then:
39 if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
40 // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
41 // Consume a name. Set the <dimension-token>’s unit to the returned value.
42 // Return the <dimension-token>.
43 type = TYPE.Dimension;
44 offset = consumeName(source, offset);
45 return;
46 }
47
48 // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
49 if (getCharCode(offset) === 0x0025) {
50 // Create a <percentage-token> with the same value as number, and return it.
51 type = TYPE.Percentage;
52 offset++;
53 return;
54 }
55
56 // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
57 type = TYPE.Number;
58 }
59
60 // § 4.3.4. Consume an ident-like token
61 function consumeIdentLikeToken() {
62 const nameStartOffset = offset;
63
64 // Consume a name, and let string be the result.
65 offset = consumeName(source, offset);
66
67 // If string’s value is an ASCII case-insensitive match for "url",
68 // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
69 if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
70 // While the next two input code points are whitespace, consume the next input code point.
71 offset = findWhiteSpaceEnd(source, offset + 1);
72
73 // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
74 // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
75 // then create a <function-token> with its value set to string and return it.
76 if (getCharCode(offset) === 0x0022 ||
77 getCharCode(offset) === 0x0027) {
78 type = TYPE.Function;
79 offset = nameStartOffset + 4;
80 return;
81 }
82
83 // Otherwise, consume a url token, and return it.
84 consumeUrlToken();
85 return;
86 }
87
88 // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
89 // Create a <function-token> with its value set to string and return it.
90 if (getCharCode(offset) === 0x0028) {
91 type = TYPE.Function;
92 offset++;
93 return;
94 }
95
96 // Otherwise, create an <ident-token> with its value set to string and return it.
97 type = TYPE.Ident;
98 }
99
100 // § 4.3.5. Consume a string token
101 function consumeStringToken(endingCodePoint) {
102 // This algorithm may be called with an ending code point, which denotes the code point
103 // that ends the string. If an ending code point is not specified,
104 // the current input code point is used.
105 if (!endingCodePoint) {
106 endingCodePoint = getCharCode(offset++);
107 }
108
109 // Initially create a <string-token> with its value set to the empty string.
110 type = TYPE.String;
111
112 // Repeatedly consume the next input code point from the stream:
113 for (; offset < source.length; offset++) {
114 var code = source.charCodeAt(offset);
115
116 switch (charCodeCategory(code)) {
117 // ending code point
118 case endingCodePoint:
119 // Return the <string-token>.
120 offset++;
121 return;
122
123 // EOF
124 case charCodeCategory.Eof:
125 // This is a parse error. Return the <string-token>.
126 return;
127
128 // newline
129 case charCodeCategory.WhiteSpace:
130 if (isNewline(code)) {
131 // This is a parse error. Reconsume the current input code point,
132 // create a <bad-string-token>, and return it.
133 offset += getNewlineLength(source, offset, code);
134 type = TYPE.BadString;
135 return;
136 }
137 break;
138
139 // U+005C REVERSE SOLIDUS (\)
140 case 0x005C:
141 // If the next input code point is EOF, do nothing.
142 if (offset === source.length - 1) {
143 break;
144 }
145
146 var nextCode = getCharCode(offset + 1);
147
148 // Otherwise, if the next input code point is a newline, consume it.
149 if (isNewline(nextCode)) {
150 offset += getNewlineLength(source, offset + 1, nextCode);
151 } else if (isValidEscape(code, nextCode)) {
152 // Otherwise, (the stream starts with a valid escape) consume
153 // an escaped code point and append the returned code point to
154 // the <string-token>’s value.
155 offset = consumeEscaped(source, offset) - 1;
156 }
157 break;
158
159 // anything else
160 // Append the current input code point to the <string-token>’s value.
161 }
162 }
163 }
164
165 // § 4.3.6. Consume a url token
166 // Note: This algorithm assumes that the initial "url(" has already been consumed.
167 // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
168 // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
169 // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
170 function consumeUrlToken() {
171 // Initially create a <url-token> with its value set to the empty string.
172 type = TYPE.Url;
173
174 // Consume as much whitespace as possible.
175 offset = findWhiteSpaceEnd(source, offset);
176
177 // Repeatedly consume the next input code point from the stream:
178 for (; offset < source.length; offset++) {
179 var code = source.charCodeAt(offset);
180
181 switch (charCodeCategory(code)) {
182 // U+0029 RIGHT PARENTHESIS ())
183 case 0x0029:
184 // Return the <url-token>.
185 offset++;
186 return;
187
188 // EOF
189 case charCodeCategory.Eof:
190 // This is a parse error. Return the <url-token>.
191 return;
192
193 // whitespace
194 case charCodeCategory.WhiteSpace:
195 // Consume as much whitespace as possible.
196 offset = findWhiteSpaceEnd(source, offset);
197
198 // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
199 // consume it and return the <url-token>
200 // (if EOF was encountered, this is a parse error);
201 if (getCharCode(offset) === 0x0029 || offset >= source.length) {
202 if (offset < source.length) {
203 offset++;
204 }
205 return;
206 }
207
208 // otherwise, consume the remnants of a bad url, create a <bad-url-token>,
209 // and return it.
210 offset = consumeBadUrlRemnants(source, offset);
211 type = TYPE.BadUrl;
212 return;
213
214 // U+0022 QUOTATION MARK (")
215 // U+0027 APOSTROPHE (')
216 // U+0028 LEFT PARENTHESIS (()
217 // non-printable code point
218 case 0x0022:
219 case 0x0027:
220 case 0x0028:
221 case charCodeCategory.NonPrintable:
222 // This is a parse error. Consume the remnants of a bad url,
223 // create a <bad-url-token>, and return it.
224 offset = consumeBadUrlRemnants(source, offset);
225 type = TYPE.BadUrl;
226 return;
227
228 // U+005C REVERSE SOLIDUS (\)
229 case 0x005C:
230 // If the stream starts with a valid escape, consume an escaped code point and
231 // append the returned code point to the <url-token>’s value.
232 if (isValidEscape(code, getCharCode(offset + 1))) {
233 offset = consumeEscaped(source, offset) - 1;
234 break;
235 }
236
237 // Otherwise, this is a parse error. Consume the remnants of a bad url,
238 // create a <bad-url-token>, and return it.
239 offset = consumeBadUrlRemnants(source, offset);
240 type = TYPE.BadUrl;
241 return;
242
243 // anything else
244 // Append the current input code point to the <url-token>’s value.
245 }
246 }
247 }
248
249 if (!stream) {
250 stream = new TokenStream();
251 }
252
253 // ensure source is a string
254 source = String(source || '');
255
256 var sourceLength = source.length;
257 var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
258 var balance = adoptBuffer(stream.balance, sourceLength + 1);
259 var tokenCount = 0;
260 var start = isBOM(getCharCode(0));
261 var offset = start;
262 var balanceCloseType = 0;
263 var balanceStart = 0;
264 var balancePrev = 0;
265
266 // https://drafts.csswg.org/css-syntax-3/#consume-token
267 // § 4.3.1. Consume a token
268 while (offset < sourceLength) {
269 var code = source.charCodeAt(offset);
270 var type = 0;
271
272 balance[tokenCount] = sourceLength;
273
274 switch (charCodeCategory(code)) {
275 // whitespace
276 case charCodeCategory.WhiteSpace:
277 // Consume as much whitespace as possible. Return a <whitespace-token>.
278 type = TYPE.WhiteSpace;
279 offset = findWhiteSpaceEnd(source, offset + 1);
280 break;
281
282 // U+0022 QUOTATION MARK (")
283 case 0x0022:
284 // Consume a string token and return it.
285 consumeStringToken();
286 break;
287
288 // U+0023 NUMBER SIGN (#)
289 case 0x0023:
290 // If the next input code point is a name code point or the next two input code points are a valid escape, then:
291 if (isName(getCharCode(offset + 1)) || isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
292 // Create a <hash-token>.
293 type = TYPE.Hash;
294
295 // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
296 // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
297 // // TODO: set id flag
298 // }
299
300 // Consume a name, and set the <hash-token>’s value to the returned string.
301 offset = consumeName(source, offset + 1);
302
303 // Return the <hash-token>.
304 } else {
305 // Otherwise, return a <delim-token> with its value set to the current input code point.
306 type = TYPE.Delim;
307 offset++;
308 }
309
310 break;
311
312 // U+0027 APOSTROPHE (')
313 case 0x0027:
314 // Consume a string token and return it.
315 consumeStringToken();
316 break;
317
318 // U+0028 LEFT PARENTHESIS (()
319 case 0x0028:
320 // Return a <(-token>.
321 type = TYPE.LeftParenthesis;
322 offset++;
323 break;
324
325 // U+0029 RIGHT PARENTHESIS ())
326 case 0x0029:
327 // Return a <)-token>.
328 type = TYPE.RightParenthesis;
329 offset++;
330 break;
331
332 // U+002B PLUS SIGN (+)
333 case 0x002B:
334 // If the input stream starts with a number, ...
335 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
336 // ... reconsume the current input code point, consume a numeric token, and return it.
337 consumeNumericToken();
338 } else {
339 // Otherwise, return a <delim-token> with its value set to the current input code point.
340 type = TYPE.Delim;
341 offset++;
342 }
343 break;
344
345 // U+002C COMMA (,)
346 case 0x002C:
347 // Return a <comma-token>.
348 type = TYPE.Comma;
349 offset++;
350 break;
351
352 // U+002D HYPHEN-MINUS (-)
353 case 0x002D:
354 // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
355 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
356 consumeNumericToken();
357 } else {
358 // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
359 if (getCharCode(offset + 1) === 0x002D &&
360 getCharCode(offset + 2) === 0x003E) {
361 type = TYPE.CDC;
362 offset = offset + 3;
363 } else {
364 // Otherwise, if the input stream starts with an identifier, ...
365 if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
366 // ... reconsume the current input code point, consume an ident-like token, and return it.
367 consumeIdentLikeToken();
368 } else {
369 // Otherwise, return a <delim-token> with its value set to the current input code point.
370 type = TYPE.Delim;
371 offset++;
372 }
373 }
374 }
375 break;
376
377 // U+002E FULL STOP (.)
378 case 0x002E:
379 // If the input stream starts with a number, ...
380 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
381 // ... reconsume the current input code point, consume a numeric token, and return it.
382 consumeNumericToken();
383 } else {
384 // Otherwise, return a <delim-token> with its value set to the current input code point.
385 type = TYPE.Delim;
386 offset++;
387 }
388
389 break;
390
391 // U+002F SOLIDUS (/)
392 case 0x002F:
393 // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
394 if (getCharCode(offset + 1) === 0x002A) {
395 // ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
396 // followed by a U+002F SOLIDUS (/), or up to an EOF code point.
397 type = TYPE.Comment;
398 offset = source.indexOf('*/', offset + 2) + 2;
399 if (offset === 1) {
400 offset = source.length;
401 }
402 } else {
403 type = TYPE.Delim;
404 offset++;
405 }
406 break;
407
408 // U+003A COLON (:)
409 case 0x003A:
410 // Return a <colon-token>.
411 type = TYPE.Colon;
412 offset++;
413 break;
414
415 // U+003B SEMICOLON (;)
416 case 0x003B:
417 // Return a <semicolon-token>.
418 type = TYPE.Semicolon;
419 offset++;
420 break;
421
422 // U+003C LESS-THAN SIGN (<)
423 case 0x003C:
424 // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
425 if (getCharCode(offset + 1) === 0x0021 &&
426 getCharCode(offset + 2) === 0x002D &&
427 getCharCode(offset + 3) === 0x002D) {
428 // ... consume them and return a <CDO-token>.
429 type = TYPE.CDO;
430 offset = offset + 4;
431 } else {
432 // Otherwise, return a <delim-token> with its value set to the current input code point.
433 type = TYPE.Delim;
434 offset++;
435 }
436
437 break;
438
439 // U+0040 COMMERCIAL AT (@)
440 case 0x0040:
441 // If the next 3 input code points would start an identifier, ...
442 if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
443 // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
444 type = TYPE.AtKeyword;
445 offset = consumeName(source, offset + 1);
446 } else {
447 // Otherwise, return a <delim-token> with its value set to the current input code point.
448 type = TYPE.Delim;
449 offset++;
450 }
451
452 break;
453
454 // U+005B LEFT SQUARE BRACKET ([)
455 case 0x005B:
456 // Return a <[-token>.
457 type = TYPE.LeftSquareBracket;
458 offset++;
459 break;
460
461 // U+005C REVERSE SOLIDUS (\)
462 case 0x005C:
463 // If the input stream starts with a valid escape, ...
464 if (isValidEscape(code, getCharCode(offset + 1))) {
465 // ... reconsume the current input code point, consume an ident-like token, and return it.
466 consumeIdentLikeToken();
467 } else {
468 // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
469 type = TYPE.Delim;
470 offset++;
471 }
472 break;
473
474 // U+005D RIGHT SQUARE BRACKET (])
475 case 0x005D:
476 // Return a <]-token>.
477 type = TYPE.RightSquareBracket;
478 offset++;
479 break;
480
481 // U+007B LEFT CURLY BRACKET ({)
482 case 0x007B:
483 // Return a <{-token>.
484 type = TYPE.LeftCurlyBracket;
485 offset++;
486 break;
487
488 // U+007D RIGHT CURLY BRACKET (})
489 case 0x007D:
490 // Return a <}-token>.
491 type = TYPE.RightCurlyBracket;
492 offset++;
493 break;
494
495 // digit
496 case charCodeCategory.Digit:
497 // Reconsume the current input code point, consume a numeric token, and return it.
498 consumeNumericToken();
499 break;
500
501 // name-start code point
502 case charCodeCategory.NameStart:
503 // Reconsume the current input code point, consume an ident-like token, and return it.
504 consumeIdentLikeToken();
505 break;
506
507 // EOF
508 case charCodeCategory.Eof:
509 // Return an <EOF-token>.
510 break;
511
512 // anything else
513 default:
514 // Return a <delim-token> with its value set to the current input code point.
515 type = TYPE.Delim;
516 offset++;
517 }
518
519 switch (type) {
520 case balanceCloseType:
521 balancePrev = balanceStart & OFFSET_MASK;
522 balanceStart = balance[balancePrev];
523 balanceCloseType = balanceStart >> TYPE_SHIFT;
524 balance[tokenCount] = balancePrev;
525 balance[balancePrev++] = tokenCount;
526 for (; balancePrev < tokenCount; balancePrev++) {
527 if (balance[balancePrev] === sourceLength) {
528 balance[balancePrev] = tokenCount;
529 }
530 }
531 break;
532
533 case TYPE.LeftParenthesis:
534 case TYPE.Function:
535 balance[tokenCount] = balanceStart;
536 balanceCloseType = TYPE.RightParenthesis;
537 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
538 break;
539
540 case TYPE.LeftSquareBracket:
541 balance[tokenCount] = balanceStart;
542 balanceCloseType = TYPE.RightSquareBracket;
543 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
544 break;
545
546 case TYPE.LeftCurlyBracket:
547 balance[tokenCount] = balanceStart;
548 balanceCloseType = TYPE.RightCurlyBracket;
549 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
550 break;
551 }
552
553 offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
554 }
555
556 // finalize buffers
557 offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) | offset; // <EOF-token>
558 balance[tokenCount] = sourceLength;
559 balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
560 while (balanceStart !== 0) {
561 balancePrev = balanceStart & OFFSET_MASK;
562 balanceStart = balance[balancePrev];
563 balance[balancePrev] = sourceLength;
564 }
565
566 // update stream
567 stream.source = source;
568 stream.firstCharOffset = start;
569 stream.offsetAndType = offsetAndType;
570 stream.tokenCount = tokenCount;
571 stream.balance = balance;
572 stream.reset();
573 stream.next();
574
575 return stream;
576}
577
578// extend tokenizer with constants
579Object.keys(constants).forEach(function(key) {
580 tokenize[key] = constants[key];
581});
582
583// extend tokenizer with static methods from utils
584Object.keys(charCodeDefinitions).forEach(function(key) {
585 tokenize[key] = charCodeDefinitions[key];
586});
587Object.keys(utils).forEach(function(key) {
588 tokenize[key] = utils[key];
589});
590
591module.exports = tokenize;
Note: See TracBrowser for help on using the repository browser.