Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: trip-planner-front/node_modules/css-tree/lib/tokenizer/index.js

Last change on this file was 6a3a178, checked in by Ema <ema_spirova@…>, 3 years ago
initial commit
Property mode set to `100644`
File size: 23.3 KB

Line
1	var TokenStream = require('../common/TokenStream');
2	var adoptBuffer = require('../common/adopt-buffer');
3
4	var constants = require('./const');
5	var TYPE = constants.TYPE;
6
7	var charCodeDefinitions = require('./char-code-definitions');
8	var isNewline = charCodeDefinitions.isNewline;
9	var isName = charCodeDefinitions.isName;
10	var isValidEscape = charCodeDefinitions.isValidEscape;
11	var isNumberStart = charCodeDefinitions.isNumberStart;
12	var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
13	var charCodeCategory = charCodeDefinitions.charCodeCategory;
14	var isBOM = charCodeDefinitions.isBOM;
15
16	var utils = require('./utils');
17	var cmpStr = utils.cmpStr;
18	var getNewlineLength = utils.getNewlineLength;
19	var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
20	var consumeEscaped = utils.consumeEscaped;
21	var consumeName = utils.consumeName;
22	var consumeNumber = utils.consumeNumber;
23	var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
24
25	var OFFSET_MASK = 0x00FFFFFF;
26	var TYPE_SHIFT = 24;
27
28	function tokenize(source, stream) {
29	function getCharCode(offset) {
30	return offset < sourceLength ? source.charCodeAt(offset) : 0;
31	}
32
33	// § 4.3.3. Consume a numeric token
34	function consumeNumericToken() {
35	// Consume a number and let number be the result.
36	offset = consumeNumber(source, offset);
37
38	// If the next 3 input code points would start an identifier, then:
39	if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
40	// Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
41	// Consume a name. Set the <dimension-token>’s unit to the returned value.
42	// Return the <dimension-token>.
43	type = TYPE.Dimension;
44	offset = consumeName(source, offset);
45	return;
46	}
47
48	// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
49	if (getCharCode(offset) === 0x0025) {
50	// Create a <percentage-token> with the same value as number, and return it.
51	type = TYPE.Percentage;
52	offset++;
53	return;
54	}
55
56	// Otherwise, create a <number-token> with the same value and type flag as number, and return it.
57	type = TYPE.Number;
58	}
59
60	// § 4.3.4. Consume an ident-like token
61	function consumeIdentLikeToken() {
62	const nameStartOffset = offset;
63
64	// Consume a name, and let string be the result.
65	offset = consumeName(source, offset);
66
67	// If string’s value is an ASCII case-insensitive match for "url",
68	// and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
69	if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
70	// While the next two input code points are whitespace, consume the next input code point.
71	offset = findWhiteSpaceEnd(source, offset + 1);
72
73	// If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
74	// or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
75	// then create a <function-token> with its value set to string and return it.
76	if (getCharCode(offset) === 0x0022 \|\|
77	getCharCode(offset) === 0x0027) {
78	type = TYPE.Function;
79	offset = nameStartOffset + 4;
80	return;
81	}
82
83	// Otherwise, consume a url token, and return it.
84	consumeUrlToken();
85	return;
86	}
87
88	// Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
89	// Create a <function-token> with its value set to string and return it.
90	if (getCharCode(offset) === 0x0028) {
91	type = TYPE.Function;
92	offset++;
93	return;
94	}
95
96	// Otherwise, create an <ident-token> with its value set to string and return it.
97	type = TYPE.Ident;
98	}
99
100	// § 4.3.5. Consume a string token
101	function consumeStringToken(endingCodePoint) {
102	// This algorithm may be called with an ending code point, which denotes the code point
103	// that ends the string. If an ending code point is not specified,
104	// the current input code point is used.
105	if (!endingCodePoint) {
106	endingCodePoint = getCharCode(offset++);
107	}
108
109	// Initially create a <string-token> with its value set to the empty string.
110	type = TYPE.String;
111
112	// Repeatedly consume the next input code point from the stream:
113	for (; offset < source.length; offset++) {
114	var code = source.charCodeAt(offset);
115
116	switch (charCodeCategory(code)) {
117	// ending code point
118	case endingCodePoint:
119	// Return the <string-token>.
120	offset++;
121	return;
122
123	// EOF
124	case charCodeCategory.Eof:
125	// This is a parse error. Return the <string-token>.
126	return;
127
128	// newline
129	case charCodeCategory.WhiteSpace:
130	if (isNewline(code)) {
131	// This is a parse error. Reconsume the current input code point,
132	// create a <bad-string-token>, and return it.
133	offset += getNewlineLength(source, offset, code);
134	type = TYPE.BadString;
135	return;
136	}
137	break;
138
139	// U+005C REVERSE SOLIDUS (\)
140	case 0x005C:
141	// If the next input code point is EOF, do nothing.
142	if (offset === source.length - 1) {
143	break;
144	}
145
146	var nextCode = getCharCode(offset + 1);
147
148	// Otherwise, if the next input code point is a newline, consume it.
149	if (isNewline(nextCode)) {
150	offset += getNewlineLength(source, offset + 1, nextCode);
151	} else if (isValidEscape(code, nextCode)) {
152	// Otherwise, (the stream starts with a valid escape) consume
153	// an escaped code point and append the returned code point to
154	// the <string-token>’s value.
155	offset = consumeEscaped(source, offset) - 1;
156	}
157	break;
158
159	// anything else
160	// Append the current input code point to the <string-token>’s value.
161	}
162	}
163	}
164
165	// § 4.3.6. Consume a url token
166	// Note: This algorithm assumes that the initial "url(" has already been consumed.
167	// This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
168	// A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
169	// automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
170	function consumeUrlToken() {
171	// Initially create a <url-token> with its value set to the empty string.
172	type = TYPE.Url;
173
174	// Consume as much whitespace as possible.
175	offset = findWhiteSpaceEnd(source, offset);
176
177	// Repeatedly consume the next input code point from the stream:
178	for (; offset < source.length; offset++) {
179	var code = source.charCodeAt(offset);
180
181	switch (charCodeCategory(code)) {
182	// U+0029 RIGHT PARENTHESIS ())
183	case 0x0029:
184	// Return the <url-token>.
185	offset++;
186	return;
187
188	// EOF
189	case charCodeCategory.Eof:
190	// This is a parse error. Return the <url-token>.
191	return;
192
193	// whitespace
194	case charCodeCategory.WhiteSpace:
195	// Consume as much whitespace as possible.
196	offset = findWhiteSpaceEnd(source, offset);
197
198	// If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
199	// consume it and return the <url-token>
200	// (if EOF was encountered, this is a parse error);
201	if (getCharCode(offset) === 0x0029 \|\| offset >= source.length) {
202	if (offset < source.length) {
203	offset++;
204	}
205	return;
206	}
207
208	// otherwise, consume the remnants of a bad url, create a <bad-url-token>,
209	// and return it.
210	offset = consumeBadUrlRemnants(source, offset);
211	type = TYPE.BadUrl;
212	return;
213
214	// U+0022 QUOTATION MARK (")
215	// U+0027 APOSTROPHE (')
216	// U+0028 LEFT PARENTHESIS (()
217	// non-printable code point
218	case 0x0022:
219	case 0x0027:
220	case 0x0028:
221	case charCodeCategory.NonPrintable:
222	// This is a parse error. Consume the remnants of a bad url,
223	// create a <bad-url-token>, and return it.
224	offset = consumeBadUrlRemnants(source, offset);
225	type = TYPE.BadUrl;
226	return;
227
228	// U+005C REVERSE SOLIDUS (\)
229	case 0x005C:
230	// If the stream starts with a valid escape, consume an escaped code point and
231	// append the returned code point to the <url-token>’s value.
232	if (isValidEscape(code, getCharCode(offset + 1))) {
233	offset = consumeEscaped(source, offset) - 1;
234	break;
235	}
236
237	// Otherwise, this is a parse error. Consume the remnants of a bad url,
238	// create a <bad-url-token>, and return it.
239	offset = consumeBadUrlRemnants(source, offset);
240	type = TYPE.BadUrl;
241	return;
242
243	// anything else
244	// Append the current input code point to the <url-token>’s value.
245	}
246	}
247	}
248
249	if (!stream) {
250	stream = new TokenStream();
251	}
252
253	// ensure source is a string
254	source = String(source \|\| '');
255
256	var sourceLength = source.length;
257	var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
258	var balance = adoptBuffer(stream.balance, sourceLength + 1);
259	var tokenCount = 0;
260	var start = isBOM(getCharCode(0));
261	var offset = start;
262	var balanceCloseType = 0;
263	var balanceStart = 0;
264	var balancePrev = 0;
265
266	// https://drafts.csswg.org/css-syntax-3/#consume-token
267	// § 4.3.1. Consume a token
268	while (offset < sourceLength) {
269	var code = source.charCodeAt(offset);
270	var type = 0;
271
272	balance[tokenCount] = sourceLength;
273
274	switch (charCodeCategory(code)) {
275	// whitespace
276	case charCodeCategory.WhiteSpace:
277	// Consume as much whitespace as possible. Return a <whitespace-token>.
278	type = TYPE.WhiteSpace;
279	offset = findWhiteSpaceEnd(source, offset + 1);
280	break;
281
282	// U+0022 QUOTATION MARK (")
283	case 0x0022:
284	// Consume a string token and return it.
285	consumeStringToken();
286	break;
287
288	// U+0023 NUMBER SIGN (#)
289	case 0x0023:
290	// If the next input code point is a name code point or the next two input code points are a valid escape, then:
291	if (isName(getCharCode(offset + 1)) \|\| isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
292	// Create a <hash-token>.
293	type = TYPE.Hash;
294
295	// If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
296	// if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
297	// // TODO: set id flag
298	// }
299
300	// Consume a name, and set the <hash-token>’s value to the returned string.
301	offset = consumeName(source, offset + 1);
302
303	// Return the <hash-token>.
304	} else {
305	// Otherwise, return a <delim-token> with its value set to the current input code point.
306	type = TYPE.Delim;
307	offset++;
308	}
309
310	break;
311
312	// U+0027 APOSTROPHE (')
313	case 0x0027:
314	// Consume a string token and return it.
315	consumeStringToken();
316	break;
317
318	// U+0028 LEFT PARENTHESIS (()
319	case 0x0028:
320	// Return a <(-token>.
321	type = TYPE.LeftParenthesis;
322	offset++;
323	break;
324
325	// U+0029 RIGHT PARENTHESIS ())
326	case 0x0029:
327	// Return a <)-token>.
328	type = TYPE.RightParenthesis;
329	offset++;
330	break;
331
332	// U+002B PLUS SIGN (+)
333	case 0x002B:
334	// If the input stream starts with a number, ...
335	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
336	// ... reconsume the current input code point, consume a numeric token, and return it.
337	consumeNumericToken();
338	} else {
339	// Otherwise, return a <delim-token> with its value set to the current input code point.
340	type = TYPE.Delim;
341	offset++;
342	}
343	break;
344
345	// U+002C COMMA (,)
346	case 0x002C:
347	// Return a <comma-token>.
348	type = TYPE.Comma;
349	offset++;
350	break;
351
352	// U+002D HYPHEN-MINUS (-)
353	case 0x002D:
354	// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
355	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
356	consumeNumericToken();
357	} else {
358	// Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
359	if (getCharCode(offset + 1) === 0x002D &&
360	getCharCode(offset + 2) === 0x003E) {
361	type = TYPE.CDC;
362	offset = offset + 3;
363	} else {
364	// Otherwise, if the input stream starts with an identifier, ...
365	if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
366	// ... reconsume the current input code point, consume an ident-like token, and return it.
367	consumeIdentLikeToken();
368	} else {
369	// Otherwise, return a <delim-token> with its value set to the current input code point.
370	type = TYPE.Delim;
371	offset++;
372	}
373	}
374	}
375	break;
376
377	// U+002E FULL STOP (.)
378	case 0x002E:
379	// If the input stream starts with a number, ...
380	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
381	// ... reconsume the current input code point, consume a numeric token, and return it.
382	consumeNumericToken();
383	} else {
384	// Otherwise, return a <delim-token> with its value set to the current input code point.
385	type = TYPE.Delim;
386	offset++;
387	}
388
389	break;
390
391	// U+002F SOLIDUS (/)
392	case 0x002F:
393	// If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
394	if (getCharCode(offset + 1) === 0x002A) {
395	// ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
396	// followed by a U+002F SOLIDUS (/), or up to an EOF code point.
397	type = TYPE.Comment;
398	offset = source.indexOf('*/', offset + 2) + 2;
399	if (offset === 1) {
400	offset = source.length;
401	}
402	} else {
403	type = TYPE.Delim;
404	offset++;
405	}
406	break;
407
408	// U+003A COLON (:)
409	case 0x003A:
410	// Return a <colon-token>.
411	type = TYPE.Colon;
412	offset++;
413	break;
414
415	// U+003B SEMICOLON (;)
416	case 0x003B:
417	// Return a <semicolon-token>.
418	type = TYPE.Semicolon;
419	offset++;
420	break;
421
422	// U+003C LESS-THAN SIGN (<)
423	case 0x003C:
424	// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
425	if (getCharCode(offset + 1) === 0x0021 &&
426	getCharCode(offset + 2) === 0x002D &&
427	getCharCode(offset + 3) === 0x002D) {
428	// ... consume them and return a <CDO-token>.
429	type = TYPE.CDO;
430	offset = offset + 4;
431	} else {
432	// Otherwise, return a <delim-token> with its value set to the current input code point.
433	type = TYPE.Delim;
434	offset++;
435	}
436
437	break;
438
439	// U+0040 COMMERCIAL AT (@)
440	case 0x0040:
441	// If the next 3 input code points would start an identifier, ...
442	if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
443	// ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
444	type = TYPE.AtKeyword;
445	offset = consumeName(source, offset + 1);
446	} else {
447	// Otherwise, return a <delim-token> with its value set to the current input code point.
448	type = TYPE.Delim;
449	offset++;
450	}
451
452	break;
453
454	// U+005B LEFT SQUARE BRACKET ([)
455	case 0x005B:
456	// Return a <[-token>.
457	type = TYPE.LeftSquareBracket;
458	offset++;
459	break;
460
461	// U+005C REVERSE SOLIDUS (\)
462	case 0x005C:
463	// If the input stream starts with a valid escape, ...
464	if (isValidEscape(code, getCharCode(offset + 1))) {
465	// ... reconsume the current input code point, consume an ident-like token, and return it.
466	consumeIdentLikeToken();
467	} else {
468	// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
469	type = TYPE.Delim;
470	offset++;
471	}
472	break;
473
474	// U+005D RIGHT SQUARE BRACKET (])
475	case 0x005D:
476	// Return a <]-token>.
477	type = TYPE.RightSquareBracket;
478	offset++;
479	break;
480
481	// U+007B LEFT CURLY BRACKET ({)
482	case 0x007B:
483	// Return a <{-token>.
484	type = TYPE.LeftCurlyBracket;
485	offset++;
486	break;
487
488	// U+007D RIGHT CURLY BRACKET (})
489	case 0x007D:
490	// Return a <}-token>.
491	type = TYPE.RightCurlyBracket;
492	offset++;
493	break;
494
495	// digit
496	case charCodeCategory.Digit:
497	// Reconsume the current input code point, consume a numeric token, and return it.
498	consumeNumericToken();
499	break;
500
501	// name-start code point
502	case charCodeCategory.NameStart:
503	// Reconsume the current input code point, consume an ident-like token, and return it.
504	consumeIdentLikeToken();
505	break;
506
507	// EOF
508	case charCodeCategory.Eof:
509	// Return an <EOF-token>.
510	break;
511
512	// anything else
513	default:
514	// Return a <delim-token> with its value set to the current input code point.
515	type = TYPE.Delim;
516	offset++;
517	}
518
519	switch (type) {
520	case balanceCloseType:
521	balancePrev = balanceStart & OFFSET_MASK;
522	balanceStart = balance[balancePrev];
523	balanceCloseType = balanceStart >> TYPE_SHIFT;
524	balance[tokenCount] = balancePrev;
525	balance[balancePrev++] = tokenCount;
526	for (; balancePrev < tokenCount; balancePrev++) {
527	if (balance[balancePrev] === sourceLength) {
528	balance[balancePrev] = tokenCount;
529	}
530	}
531	break;
532
533	case TYPE.LeftParenthesis:
534	case TYPE.Function:
535	balance[tokenCount] = balanceStart;
536	balanceCloseType = TYPE.RightParenthesis;
537	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
538	break;
539
540	case TYPE.LeftSquareBracket:
541	balance[tokenCount] = balanceStart;
542	balanceCloseType = TYPE.RightSquareBracket;
543	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
544	break;
545
546	case TYPE.LeftCurlyBracket:
547	balance[tokenCount] = balanceStart;
548	balanceCloseType = TYPE.RightCurlyBracket;
549	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
550	break;
551	}
552
553	offsetAndType[tokenCount++] = (type << TYPE_SHIFT) \| offset;
554	}
555
556	// finalize buffers
557	offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) \| offset; // <EOF-token>
558	balance[tokenCount] = sourceLength;
559	balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
560	while (balanceStart !== 0) {
561	balancePrev = balanceStart & OFFSET_MASK;
562	balanceStart = balance[balancePrev];
563	balance[balancePrev] = sourceLength;
564	}
565
566	// update stream
567	stream.source = source;
568	stream.firstCharOffset = start;
569	stream.offsetAndType = offsetAndType;
570	stream.tokenCount = tokenCount;
571	stream.balance = balance;
572	stream.reset();
573	stream.next();
574
575	return stream;
576	}
577
578	// extend tokenizer with constants
579	Object.keys(constants).forEach(function(key) {
580	tokenize[key] = constants[key];
581	});
582
583	// extend tokenizer with static methods from utils
584	Object.keys(charCodeDefinitions).forEach(function(key) {
585	tokenize[key] = charCodeDefinitions[key];
586	});
587	Object.keys(utils).forEach(function(key) {
588	tokenize[key] = utils[key];
589	});
590
591	module.exports = tokenize;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: