Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: imaps-frontend/node_modules/css-tree/lib/tokenizer/index.js@ d565449

main

Last change on this file since d565449 was d565449, checked in by stefan toskovski <stefantoska84@…>, 4 weeks ago
Update repo after prototype presentation
Property mode set to `100644`
File size: 23.3 KB

Rev	Line
[d565449]	1	var TokenStream = require('../common/TokenStream');
	2	var adoptBuffer = require('../common/adopt-buffer');
	3
	4	var constants = require('./const');
	5	var TYPE = constants.TYPE;
	6
	7	var charCodeDefinitions = require('./char-code-definitions');
	8	var isNewline = charCodeDefinitions.isNewline;
	9	var isName = charCodeDefinitions.isName;
	10	var isValidEscape = charCodeDefinitions.isValidEscape;
	11	var isNumberStart = charCodeDefinitions.isNumberStart;
	12	var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
	13	var charCodeCategory = charCodeDefinitions.charCodeCategory;
	14	var isBOM = charCodeDefinitions.isBOM;
	15
	16	var utils = require('./utils');
	17	var cmpStr = utils.cmpStr;
	18	var getNewlineLength = utils.getNewlineLength;
	19	var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
	20	var consumeEscaped = utils.consumeEscaped;
	21	var consumeName = utils.consumeName;
	22	var consumeNumber = utils.consumeNumber;
	23	var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
	24
	25	var OFFSET_MASK = 0x00FFFFFF;
	26	var TYPE_SHIFT = 24;
	27
	28	function tokenize(source, stream) {
	29	function getCharCode(offset) {
	30	return offset < sourceLength ? source.charCodeAt(offset) : 0;
	31	}
	32
	33	// § 4.3.3. Consume a numeric token
	34	function consumeNumericToken() {
	35	// Consume a number and let number be the result.
	36	offset = consumeNumber(source, offset);
	37
	38	// If the next 3 input code points would start an identifier, then:
	39	if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
	40	// Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
	41	// Consume a name. Set the <dimension-token>’s unit to the returned value.
	42	// Return the <dimension-token>.
	43	type = TYPE.Dimension;
	44	offset = consumeName(source, offset);
	45	return;
	46	}
	47
	48	// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
	49	if (getCharCode(offset) === 0x0025) {
	50	// Create a <percentage-token> with the same value as number, and return it.
	51	type = TYPE.Percentage;
	52	offset++;
	53	return;
	54	}
	55
	56	// Otherwise, create a <number-token> with the same value and type flag as number, and return it.
	57	type = TYPE.Number;
	58	}
	59
	60	// § 4.3.4. Consume an ident-like token
	61	function consumeIdentLikeToken() {
	62	const nameStartOffset = offset;
	63
	64	// Consume a name, and let string be the result.
	65	offset = consumeName(source, offset);
	66
	67	// If string’s value is an ASCII case-insensitive match for "url",
	68	// and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	69	if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
	70	// While the next two input code points are whitespace, consume the next input code point.
	71	offset = findWhiteSpaceEnd(source, offset + 1);
	72
	73	// If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
	74	// or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
	75	// then create a <function-token> with its value set to string and return it.
	76	if (getCharCode(offset) === 0x0022 \|\|
	77	getCharCode(offset) === 0x0027) {
	78	type = TYPE.Function;
	79	offset = nameStartOffset + 4;
	80	return;
	81	}
	82
	83	// Otherwise, consume a url token, and return it.
	84	consumeUrlToken();
	85	return;
	86	}
	87
	88	// Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	89	// Create a <function-token> with its value set to string and return it.
	90	if (getCharCode(offset) === 0x0028) {
	91	type = TYPE.Function;
	92	offset++;
	93	return;
	94	}
	95
	96	// Otherwise, create an <ident-token> with its value set to string and return it.
	97	type = TYPE.Ident;
	98	}
	99
	100	// § 4.3.5. Consume a string token
	101	function consumeStringToken(endingCodePoint) {
	102	// This algorithm may be called with an ending code point, which denotes the code point
	103	// that ends the string. If an ending code point is not specified,
	104	// the current input code point is used.
	105	if (!endingCodePoint) {
	106	endingCodePoint = getCharCode(offset++);
	107	}
	108
	109	// Initially create a <string-token> with its value set to the empty string.
	110	type = TYPE.String;
	111
	112	// Repeatedly consume the next input code point from the stream:
	113	for (; offset < source.length; offset++) {
	114	var code = source.charCodeAt(offset);
	115
	116	switch (charCodeCategory(code)) {
	117	// ending code point
	118	case endingCodePoint:
	119	// Return the <string-token>.
	120	offset++;
	121	return;
	122
	123	// EOF
	124	case charCodeCategory.Eof:
	125	// This is a parse error. Return the <string-token>.
	126	return;
	127
	128	// newline
	129	case charCodeCategory.WhiteSpace:
	130	if (isNewline(code)) {
	131	// This is a parse error. Reconsume the current input code point,
	132	// create a <bad-string-token>, and return it.
	133	offset += getNewlineLength(source, offset, code);
	134	type = TYPE.BadString;
	135	return;
	136	}
	137	break;
	138
	139	// U+005C REVERSE SOLIDUS (\)
	140	case 0x005C:
	141	// If the next input code point is EOF, do nothing.
	142	if (offset === source.length - 1) {
	143	break;
	144	}
	145
	146	var nextCode = getCharCode(offset + 1);
	147
	148	// Otherwise, if the next input code point is a newline, consume it.
	149	if (isNewline(nextCode)) {
	150	offset += getNewlineLength(source, offset + 1, nextCode);
	151	} else if (isValidEscape(code, nextCode)) {
	152	// Otherwise, (the stream starts with a valid escape) consume
	153	// an escaped code point and append the returned code point to
	154	// the <string-token>’s value.
	155	offset = consumeEscaped(source, offset) - 1;
	156	}
	157	break;
	158
	159	// anything else
	160	// Append the current input code point to the <string-token>’s value.
	161	}
	162	}
	163	}
	164
	165	// § 4.3.6. Consume a url token
	166	// Note: This algorithm assumes that the initial "url(" has already been consumed.
	167	// This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
	168	// A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
	169	// automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
	170	function consumeUrlToken() {
	171	// Initially create a <url-token> with its value set to the empty string.
	172	type = TYPE.Url;
	173
	174	// Consume as much whitespace as possible.
	175	offset = findWhiteSpaceEnd(source, offset);
	176
	177	// Repeatedly consume the next input code point from the stream:
	178	for (; offset < source.length; offset++) {
	179	var code = source.charCodeAt(offset);
	180
	181	switch (charCodeCategory(code)) {
	182	// U+0029 RIGHT PARENTHESIS ())
	183	case 0x0029:
	184	// Return the <url-token>.
	185	offset++;
	186	return;
	187
	188	// EOF
	189	case charCodeCategory.Eof:
	190	// This is a parse error. Return the <url-token>.
	191	return;
	192
	193	// whitespace
	194	case charCodeCategory.WhiteSpace:
	195	// Consume as much whitespace as possible.
	196	offset = findWhiteSpaceEnd(source, offset);
	197
	198	// If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
	199	// consume it and return the <url-token>
	200	// (if EOF was encountered, this is a parse error);
	201	if (getCharCode(offset) === 0x0029 \|\| offset >= source.length) {
	202	if (offset < source.length) {
	203	offset++;
	204	}
	205	return;
	206	}
	207
	208	// otherwise, consume the remnants of a bad url, create a <bad-url-token>,
	209	// and return it.
	210	offset = consumeBadUrlRemnants(source, offset);
	211	type = TYPE.BadUrl;
	212	return;
	213
	214	// U+0022 QUOTATION MARK (")
	215	// U+0027 APOSTROPHE (')
	216	// U+0028 LEFT PARENTHESIS (()
	217	// non-printable code point
	218	case 0x0022:
	219	case 0x0027:
	220	case 0x0028:
	221	case charCodeCategory.NonPrintable:
	222	// This is a parse error. Consume the remnants of a bad url,
	223	// create a <bad-url-token>, and return it.
	224	offset = consumeBadUrlRemnants(source, offset);
	225	type = TYPE.BadUrl;
	226	return;
	227
	228	// U+005C REVERSE SOLIDUS (\)
	229	case 0x005C:
	230	// If the stream starts with a valid escape, consume an escaped code point and
	231	// append the returned code point to the <url-token>’s value.
	232	if (isValidEscape(code, getCharCode(offset + 1))) {
	233	offset = consumeEscaped(source, offset) - 1;
	234	break;
	235	}
	236
	237	// Otherwise, this is a parse error. Consume the remnants of a bad url,
	238	// create a <bad-url-token>, and return it.
	239	offset = consumeBadUrlRemnants(source, offset);
	240	type = TYPE.BadUrl;
	241	return;
	242
	243	// anything else
	244	// Append the current input code point to the <url-token>’s value.
	245	}
	246	}
	247	}
	248
	249	if (!stream) {
	250	stream = new TokenStream();
	251	}
	252
	253	// ensure source is a string
	254	source = String(source \|\| '');
	255
	256	var sourceLength = source.length;
	257	var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
	258	var balance = adoptBuffer(stream.balance, sourceLength + 1);
	259	var tokenCount = 0;
	260	var start = isBOM(getCharCode(0));
	261	var offset = start;
	262	var balanceCloseType = 0;
	263	var balanceStart = 0;
	264	var balancePrev = 0;
	265
	266	// https://drafts.csswg.org/css-syntax-3/#consume-token
	267	// § 4.3.1. Consume a token
	268	while (offset < sourceLength) {
	269	var code = source.charCodeAt(offset);
	270	var type = 0;
	271
	272	balance[tokenCount] = sourceLength;
	273
	274	switch (charCodeCategory(code)) {
	275	// whitespace
	276	case charCodeCategory.WhiteSpace:
	277	// Consume as much whitespace as possible. Return a <whitespace-token>.
	278	type = TYPE.WhiteSpace;
	279	offset = findWhiteSpaceEnd(source, offset + 1);
	280	break;
	281
	282	// U+0022 QUOTATION MARK (")
	283	case 0x0022:
	284	// Consume a string token and return it.
	285	consumeStringToken();
	286	break;
	287
	288	// U+0023 NUMBER SIGN (#)
	289	case 0x0023:
	290	// If the next input code point is a name code point or the next two input code points are a valid escape, then:
	291	if (isName(getCharCode(offset + 1)) \|\| isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
	292	// Create a <hash-token>.
	293	type = TYPE.Hash;
	294
	295	// If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
	296	// if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	297	// // TODO: set id flag
	298	// }
	299
	300	// Consume a name, and set the <hash-token>’s value to the returned string.
	301	offset = consumeName(source, offset + 1);
	302
	303	// Return the <hash-token>.
	304	} else {
	305	// Otherwise, return a <delim-token> with its value set to the current input code point.
	306	type = TYPE.Delim;
	307	offset++;
	308	}
	309
	310	break;
	311
	312	// U+0027 APOSTROPHE (')
	313	case 0x0027:
	314	// Consume a string token and return it.
	315	consumeStringToken();
	316	break;
	317
	318	// U+0028 LEFT PARENTHESIS (()
	319	case 0x0028:
	320	// Return a <(-token>.
	321	type = TYPE.LeftParenthesis;
	322	offset++;
	323	break;
	324
	325	// U+0029 RIGHT PARENTHESIS ())
	326	case 0x0029:
	327	// Return a <)-token>.
	328	type = TYPE.RightParenthesis;
	329	offset++;
	330	break;
	331
	332	// U+002B PLUS SIGN (+)
	333	case 0x002B:
	334	// If the input stream starts with a number, ...
	335	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	336	// ... reconsume the current input code point, consume a numeric token, and return it.
	337	consumeNumericToken();
	338	} else {
	339	// Otherwise, return a <delim-token> with its value set to the current input code point.
	340	type = TYPE.Delim;
	341	offset++;
	342	}
	343	break;
	344
	345	// U+002C COMMA (,)
	346	case 0x002C:
	347	// Return a <comma-token>.
	348	type = TYPE.Comma;
	349	offset++;
	350	break;
	351
	352	// U+002D HYPHEN-MINUS (-)
	353	case 0x002D:
	354	// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
	355	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	356	consumeNumericToken();
	357	} else {
	358	// Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
	359	if (getCharCode(offset + 1) === 0x002D &&
	360	getCharCode(offset + 2) === 0x003E) {
	361	type = TYPE.CDC;
	362	offset = offset + 3;
	363	} else {
	364	// Otherwise, if the input stream starts with an identifier, ...
	365	if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	366	// ... reconsume the current input code point, consume an ident-like token, and return it.
	367	consumeIdentLikeToken();
	368	} else {
	369	// Otherwise, return a <delim-token> with its value set to the current input code point.
	370	type = TYPE.Delim;
	371	offset++;
	372	}
	373	}
	374	}
	375	break;
	376
	377	// U+002E FULL STOP (.)
	378	case 0x002E:
	379	// If the input stream starts with a number, ...
	380	if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	381	// ... reconsume the current input code point, consume a numeric token, and return it.
	382	consumeNumericToken();
	383	} else {
	384	// Otherwise, return a <delim-token> with its value set to the current input code point.
	385	type = TYPE.Delim;
	386	offset++;
	387	}
	388
	389	break;
	390
	391	// U+002F SOLIDUS (/)
	392	case 0x002F:
	393	// If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
	394	if (getCharCode(offset + 1) === 0x002A) {
	395	// ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
	396	// followed by a U+002F SOLIDUS (/), or up to an EOF code point.
	397	type = TYPE.Comment;
	398	offset = source.indexOf('*/', offset + 2) + 2;
	399	if (offset === 1) {
	400	offset = source.length;
	401	}
	402	} else {
	403	type = TYPE.Delim;
	404	offset++;
	405	}
	406	break;
	407
	408	// U+003A COLON (:)
	409	case 0x003A:
	410	// Return a <colon-token>.
	411	type = TYPE.Colon;
	412	offset++;
	413	break;
	414
	415	// U+003B SEMICOLON (;)
	416	case 0x003B:
	417	// Return a <semicolon-token>.
	418	type = TYPE.Semicolon;
	419	offset++;
	420	break;
	421
	422	// U+003C LESS-THAN SIGN (<)
	423	case 0x003C:
	424	// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
	425	if (getCharCode(offset + 1) === 0x0021 &&
	426	getCharCode(offset + 2) === 0x002D &&
	427	getCharCode(offset + 3) === 0x002D) {
	428	// ... consume them and return a <CDO-token>.
	429	type = TYPE.CDO;
	430	offset = offset + 4;
	431	} else {
	432	// Otherwise, return a <delim-token> with its value set to the current input code point.
	433	type = TYPE.Delim;
	434	offset++;
	435	}
	436
	437	break;
	438
	439	// U+0040 COMMERCIAL AT (@)
	440	case 0x0040:
	441	// If the next 3 input code points would start an identifier, ...
	442	if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	443	// ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
	444	type = TYPE.AtKeyword;
	445	offset = consumeName(source, offset + 1);
	446	} else {
	447	// Otherwise, return a <delim-token> with its value set to the current input code point.
	448	type = TYPE.Delim;
	449	offset++;
	450	}
	451
	452	break;
	453
	454	// U+005B LEFT SQUARE BRACKET ([)
	455	case 0x005B:
	456	// Return a <[-token>.
	457	type = TYPE.LeftSquareBracket;
	458	offset++;
	459	break;
	460
	461	// U+005C REVERSE SOLIDUS (\)
	462	case 0x005C:
	463	// If the input stream starts with a valid escape, ...
	464	if (isValidEscape(code, getCharCode(offset + 1))) {
	465	// ... reconsume the current input code point, consume an ident-like token, and return it.
	466	consumeIdentLikeToken();
	467	} else {
	468	// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
	469	type = TYPE.Delim;
	470	offset++;
	471	}
	472	break;
	473
	474	// U+005D RIGHT SQUARE BRACKET (])
	475	case 0x005D:
	476	// Return a <]-token>.
	477	type = TYPE.RightSquareBracket;
	478	offset++;
	479	break;
	480
	481	// U+007B LEFT CURLY BRACKET ({)
	482	case 0x007B:
	483	// Return a <{-token>.
	484	type = TYPE.LeftCurlyBracket;
	485	offset++;
	486	break;
	487
	488	// U+007D RIGHT CURLY BRACKET (})
	489	case 0x007D:
	490	// Return a <}-token>.
	491	type = TYPE.RightCurlyBracket;
	492	offset++;
	493	break;
	494
	495	// digit
	496	case charCodeCategory.Digit:
	497	// Reconsume the current input code point, consume a numeric token, and return it.
	498	consumeNumericToken();
	499	break;
	500
	501	// name-start code point
	502	case charCodeCategory.NameStart:
	503	// Reconsume the current input code point, consume an ident-like token, and return it.
	504	consumeIdentLikeToken();
	505	break;
	506
	507	// EOF
	508	case charCodeCategory.Eof:
	509	// Return an <EOF-token>.
	510	break;
	511
	512	// anything else
	513	default:
	514	// Return a <delim-token> with its value set to the current input code point.
	515	type = TYPE.Delim;
	516	offset++;
	517	}
	518
	519	switch (type) {
	520	case balanceCloseType:
	521	balancePrev = balanceStart & OFFSET_MASK;
	522	balanceStart = balance[balancePrev];
	523	balanceCloseType = balanceStart >> TYPE_SHIFT;
	524	balance[tokenCount] = balancePrev;
	525	balance[balancePrev++] = tokenCount;
	526	for (; balancePrev < tokenCount; balancePrev++) {
	527	if (balance[balancePrev] === sourceLength) {
	528	balance[balancePrev] = tokenCount;
	529	}
	530	}
	531	break;
	532
	533	case TYPE.LeftParenthesis:
	534	case TYPE.Function:
	535	balance[tokenCount] = balanceStart;
	536	balanceCloseType = TYPE.RightParenthesis;
	537	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	538	break;
	539
	540	case TYPE.LeftSquareBracket:
	541	balance[tokenCount] = balanceStart;
	542	balanceCloseType = TYPE.RightSquareBracket;
	543	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	544	break;
	545
	546	case TYPE.LeftCurlyBracket:
	547	balance[tokenCount] = balanceStart;
	548	balanceCloseType = TYPE.RightCurlyBracket;
	549	balanceStart = (balanceCloseType << TYPE_SHIFT) \| tokenCount;
	550	break;
	551	}
	552
	553	offsetAndType[tokenCount++] = (type << TYPE_SHIFT) \| offset;
	554	}
	555
	556	// finalize buffers
	557	offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) \| offset; // <EOF-token>
	558	balance[tokenCount] = sourceLength;
	559	balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
	560	while (balanceStart !== 0) {
	561	balancePrev = balanceStart & OFFSET_MASK;
	562	balanceStart = balance[balancePrev];
	563	balance[balancePrev] = sourceLength;
	564	}
	565
	566	// update stream
	567	stream.source = source;
	568	stream.firstCharOffset = start;
	569	stream.offsetAndType = offsetAndType;
	570	stream.tokenCount = tokenCount;
	571	stream.balance = balance;
	572	stream.reset();
	573	stream.next();
	574
	575	return stream;
	576	}
	577
	578	// extend tokenizer with constants
	579	Object.keys(constants).forEach(function(key) {
	580	tokenize[key] = constants[key];
	581	});
	582
	583	// extend tokenizer with static methods from utils
	584	Object.keys(charCodeDefinitions).forEach(function(key) {
	585	tokenize[key] = charCodeDefinitions[key];
	586	});
	587	Object.keys(utils).forEach(function(key) {
	588	tokenize[key] = utils[key];
	589	});
	590
	591	module.exports = tokenize;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: