1 | /**
|
---|
2 | * @license
|
---|
3 | * Copyright Google LLC All Rights Reserved.
|
---|
4 | *
|
---|
5 | * Use of this source code is governed by an MIT-style license that can be
|
---|
6 | * found in the LICENSE file at https://angular.io/license
|
---|
7 | */
|
---|
8 | import * as chars from '../chars';
|
---|
9 | export var TokenType;
|
---|
10 | (function (TokenType) {
|
---|
11 | TokenType[TokenType["Character"] = 0] = "Character";
|
---|
12 | TokenType[TokenType["Identifier"] = 1] = "Identifier";
|
---|
13 | TokenType[TokenType["PrivateIdentifier"] = 2] = "PrivateIdentifier";
|
---|
14 | TokenType[TokenType["Keyword"] = 3] = "Keyword";
|
---|
15 | TokenType[TokenType["String"] = 4] = "String";
|
---|
16 | TokenType[TokenType["Operator"] = 5] = "Operator";
|
---|
17 | TokenType[TokenType["Number"] = 6] = "Number";
|
---|
18 | TokenType[TokenType["Error"] = 7] = "Error";
|
---|
19 | })(TokenType || (TokenType = {}));
|
---|
20 | const KEYWORDS = ['var', 'let', 'as', 'null', 'undefined', 'true', 'false', 'if', 'else', 'this'];
|
---|
21 | export class Lexer {
|
---|
22 | tokenize(text) {
|
---|
23 | const scanner = new _Scanner(text);
|
---|
24 | const tokens = [];
|
---|
25 | let token = scanner.scanToken();
|
---|
26 | while (token != null) {
|
---|
27 | tokens.push(token);
|
---|
28 | token = scanner.scanToken();
|
---|
29 | }
|
---|
30 | return tokens;
|
---|
31 | }
|
---|
32 | }
|
---|
33 | export class Token {
|
---|
34 | constructor(index, end, type, numValue, strValue) {
|
---|
35 | this.index = index;
|
---|
36 | this.end = end;
|
---|
37 | this.type = type;
|
---|
38 | this.numValue = numValue;
|
---|
39 | this.strValue = strValue;
|
---|
40 | }
|
---|
41 | isCharacter(code) {
|
---|
42 | return this.type == TokenType.Character && this.numValue == code;
|
---|
43 | }
|
---|
44 | isNumber() {
|
---|
45 | return this.type == TokenType.Number;
|
---|
46 | }
|
---|
47 | isString() {
|
---|
48 | return this.type == TokenType.String;
|
---|
49 | }
|
---|
50 | isOperator(operator) {
|
---|
51 | return this.type == TokenType.Operator && this.strValue == operator;
|
---|
52 | }
|
---|
53 | isIdentifier() {
|
---|
54 | return this.type == TokenType.Identifier;
|
---|
55 | }
|
---|
56 | isPrivateIdentifier() {
|
---|
57 | return this.type == TokenType.PrivateIdentifier;
|
---|
58 | }
|
---|
59 | isKeyword() {
|
---|
60 | return this.type == TokenType.Keyword;
|
---|
61 | }
|
---|
62 | isKeywordLet() {
|
---|
63 | return this.type == TokenType.Keyword && this.strValue == 'let';
|
---|
64 | }
|
---|
65 | isKeywordAs() {
|
---|
66 | return this.type == TokenType.Keyword && this.strValue == 'as';
|
---|
67 | }
|
---|
68 | isKeywordNull() {
|
---|
69 | return this.type == TokenType.Keyword && this.strValue == 'null';
|
---|
70 | }
|
---|
71 | isKeywordUndefined() {
|
---|
72 | return this.type == TokenType.Keyword && this.strValue == 'undefined';
|
---|
73 | }
|
---|
74 | isKeywordTrue() {
|
---|
75 | return this.type == TokenType.Keyword && this.strValue == 'true';
|
---|
76 | }
|
---|
77 | isKeywordFalse() {
|
---|
78 | return this.type == TokenType.Keyword && this.strValue == 'false';
|
---|
79 | }
|
---|
80 | isKeywordThis() {
|
---|
81 | return this.type == TokenType.Keyword && this.strValue == 'this';
|
---|
82 | }
|
---|
83 | isError() {
|
---|
84 | return this.type == TokenType.Error;
|
---|
85 | }
|
---|
86 | toNumber() {
|
---|
87 | return this.type == TokenType.Number ? this.numValue : -1;
|
---|
88 | }
|
---|
89 | toString() {
|
---|
90 | switch (this.type) {
|
---|
91 | case TokenType.Character:
|
---|
92 | case TokenType.Identifier:
|
---|
93 | case TokenType.Keyword:
|
---|
94 | case TokenType.Operator:
|
---|
95 | case TokenType.PrivateIdentifier:
|
---|
96 | case TokenType.String:
|
---|
97 | case TokenType.Error:
|
---|
98 | return this.strValue;
|
---|
99 | case TokenType.Number:
|
---|
100 | return this.numValue.toString();
|
---|
101 | default:
|
---|
102 | return null;
|
---|
103 | }
|
---|
104 | }
|
---|
105 | }
|
---|
106 | function newCharacterToken(index, end, code) {
|
---|
107 | return new Token(index, end, TokenType.Character, code, String.fromCharCode(code));
|
---|
108 | }
|
---|
109 | function newIdentifierToken(index, end, text) {
|
---|
110 | return new Token(index, end, TokenType.Identifier, 0, text);
|
---|
111 | }
|
---|
112 | function newPrivateIdentifierToken(index, end, text) {
|
---|
113 | return new Token(index, end, TokenType.PrivateIdentifier, 0, text);
|
---|
114 | }
|
---|
115 | function newKeywordToken(index, end, text) {
|
---|
116 | return new Token(index, end, TokenType.Keyword, 0, text);
|
---|
117 | }
|
---|
118 | function newOperatorToken(index, end, text) {
|
---|
119 | return new Token(index, end, TokenType.Operator, 0, text);
|
---|
120 | }
|
---|
121 | function newStringToken(index, end, text) {
|
---|
122 | return new Token(index, end, TokenType.String, 0, text);
|
---|
123 | }
|
---|
124 | function newNumberToken(index, end, n) {
|
---|
125 | return new Token(index, end, TokenType.Number, n, '');
|
---|
126 | }
|
---|
127 | function newErrorToken(index, end, message) {
|
---|
128 | return new Token(index, end, TokenType.Error, 0, message);
|
---|
129 | }
|
---|
130 | export const EOF = new Token(-1, -1, TokenType.Character, 0, '');
|
---|
131 | class _Scanner {
|
---|
132 | constructor(input) {
|
---|
133 | this.input = input;
|
---|
134 | this.peek = 0;
|
---|
135 | this.index = -1;
|
---|
136 | this.length = input.length;
|
---|
137 | this.advance();
|
---|
138 | }
|
---|
139 | advance() {
|
---|
140 | this.peek = ++this.index >= this.length ? chars.$EOF : this.input.charCodeAt(this.index);
|
---|
141 | }
|
---|
142 | scanToken() {
|
---|
143 | const input = this.input, length = this.length;
|
---|
144 | let peek = this.peek, index = this.index;
|
---|
145 | // Skip whitespace.
|
---|
146 | while (peek <= chars.$SPACE) {
|
---|
147 | if (++index >= length) {
|
---|
148 | peek = chars.$EOF;
|
---|
149 | break;
|
---|
150 | }
|
---|
151 | else {
|
---|
152 | peek = input.charCodeAt(index);
|
---|
153 | }
|
---|
154 | }
|
---|
155 | this.peek = peek;
|
---|
156 | this.index = index;
|
---|
157 | if (index >= length) {
|
---|
158 | return null;
|
---|
159 | }
|
---|
160 | // Handle identifiers and numbers.
|
---|
161 | if (isIdentifierStart(peek))
|
---|
162 | return this.scanIdentifier();
|
---|
163 | if (chars.isDigit(peek))
|
---|
164 | return this.scanNumber(index);
|
---|
165 | const start = index;
|
---|
166 | switch (peek) {
|
---|
167 | case chars.$PERIOD:
|
---|
168 | this.advance();
|
---|
169 | return chars.isDigit(this.peek) ? this.scanNumber(start) :
|
---|
170 | newCharacterToken(start, this.index, chars.$PERIOD);
|
---|
171 | case chars.$LPAREN:
|
---|
172 | case chars.$RPAREN:
|
---|
173 | case chars.$LBRACE:
|
---|
174 | case chars.$RBRACE:
|
---|
175 | case chars.$LBRACKET:
|
---|
176 | case chars.$RBRACKET:
|
---|
177 | case chars.$COMMA:
|
---|
178 | case chars.$COLON:
|
---|
179 | case chars.$SEMICOLON:
|
---|
180 | return this.scanCharacter(start, peek);
|
---|
181 | case chars.$SQ:
|
---|
182 | case chars.$DQ:
|
---|
183 | return this.scanString();
|
---|
184 | case chars.$HASH:
|
---|
185 | return this.scanPrivateIdentifier();
|
---|
186 | case chars.$PLUS:
|
---|
187 | case chars.$MINUS:
|
---|
188 | case chars.$STAR:
|
---|
189 | case chars.$SLASH:
|
---|
190 | case chars.$PERCENT:
|
---|
191 | case chars.$CARET:
|
---|
192 | return this.scanOperator(start, String.fromCharCode(peek));
|
---|
193 | case chars.$QUESTION:
|
---|
194 | return this.scanQuestion(start);
|
---|
195 | case chars.$LT:
|
---|
196 | case chars.$GT:
|
---|
197 | return this.scanComplexOperator(start, String.fromCharCode(peek), chars.$EQ, '=');
|
---|
198 | case chars.$BANG:
|
---|
199 | case chars.$EQ:
|
---|
200 | return this.scanComplexOperator(start, String.fromCharCode(peek), chars.$EQ, '=', chars.$EQ, '=');
|
---|
201 | case chars.$AMPERSAND:
|
---|
202 | return this.scanComplexOperator(start, '&', chars.$AMPERSAND, '&');
|
---|
203 | case chars.$BAR:
|
---|
204 | return this.scanComplexOperator(start, '|', chars.$BAR, '|');
|
---|
205 | case chars.$NBSP:
|
---|
206 | while (chars.isWhitespace(this.peek))
|
---|
207 | this.advance();
|
---|
208 | return this.scanToken();
|
---|
209 | }
|
---|
210 | this.advance();
|
---|
211 | return this.error(`Unexpected character [${String.fromCharCode(peek)}]`, 0);
|
---|
212 | }
|
---|
213 | scanCharacter(start, code) {
|
---|
214 | this.advance();
|
---|
215 | return newCharacterToken(start, this.index, code);
|
---|
216 | }
|
---|
217 | scanOperator(start, str) {
|
---|
218 | this.advance();
|
---|
219 | return newOperatorToken(start, this.index, str);
|
---|
220 | }
|
---|
221 | /**
|
---|
222 | * Tokenize a 2/3 char long operator
|
---|
223 | *
|
---|
224 | * @param start start index in the expression
|
---|
225 | * @param one first symbol (always part of the operator)
|
---|
226 | * @param twoCode code point for the second symbol
|
---|
227 | * @param two second symbol (part of the operator when the second code point matches)
|
---|
228 | * @param threeCode code point for the third symbol
|
---|
229 | * @param three third symbol (part of the operator when provided and matches source expression)
|
---|
230 | */
|
---|
231 | scanComplexOperator(start, one, twoCode, two, threeCode, three) {
|
---|
232 | this.advance();
|
---|
233 | let str = one;
|
---|
234 | if (this.peek == twoCode) {
|
---|
235 | this.advance();
|
---|
236 | str += two;
|
---|
237 | }
|
---|
238 | if (threeCode != null && this.peek == threeCode) {
|
---|
239 | this.advance();
|
---|
240 | str += three;
|
---|
241 | }
|
---|
242 | return newOperatorToken(start, this.index, str);
|
---|
243 | }
|
---|
244 | scanIdentifier() {
|
---|
245 | const start = this.index;
|
---|
246 | this.advance();
|
---|
247 | while (isIdentifierPart(this.peek))
|
---|
248 | this.advance();
|
---|
249 | const str = this.input.substring(start, this.index);
|
---|
250 | return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, this.index, str) :
|
---|
251 | newIdentifierToken(start, this.index, str);
|
---|
252 | }
|
---|
253 | /** Scans an ECMAScript private identifier. */
|
---|
254 | scanPrivateIdentifier() {
|
---|
255 | const start = this.index;
|
---|
256 | this.advance();
|
---|
257 | if (!isIdentifierStart(this.peek)) {
|
---|
258 | return this.error('Invalid character [#]', -1);
|
---|
259 | }
|
---|
260 | while (isIdentifierPart(this.peek))
|
---|
261 | this.advance();
|
---|
262 | const identifierName = this.input.substring(start, this.index);
|
---|
263 | return newPrivateIdentifierToken(start, this.index, identifierName);
|
---|
264 | }
|
---|
265 | scanNumber(start) {
|
---|
266 | let simple = (this.index === start);
|
---|
267 | let hasSeparators = false;
|
---|
268 | this.advance(); // Skip initial digit.
|
---|
269 | while (true) {
|
---|
270 | if (chars.isDigit(this.peek)) {
|
---|
271 | // Do nothing.
|
---|
272 | }
|
---|
273 | else if (this.peek === chars.$_) {
|
---|
274 | // Separators are only valid when they're surrounded by digits. E.g. `1_0_1` is
|
---|
275 | // valid while `_101` and `101_` are not. The separator can't be next to the decimal
|
---|
276 | // point or another separator either. Note that it's unlikely that we'll hit a case where
|
---|
277 | // the underscore is at the start, because that's a valid identifier and it will be picked
|
---|
278 | // up earlier in the parsing. We validate for it anyway just in case.
|
---|
279 | if (!chars.isDigit(this.input.charCodeAt(this.index - 1)) ||
|
---|
280 | !chars.isDigit(this.input.charCodeAt(this.index + 1))) {
|
---|
281 | return this.error('Invalid numeric separator', 0);
|
---|
282 | }
|
---|
283 | hasSeparators = true;
|
---|
284 | }
|
---|
285 | else if (this.peek === chars.$PERIOD) {
|
---|
286 | simple = false;
|
---|
287 | }
|
---|
288 | else if (isExponentStart(this.peek)) {
|
---|
289 | this.advance();
|
---|
290 | if (isExponentSign(this.peek))
|
---|
291 | this.advance();
|
---|
292 | if (!chars.isDigit(this.peek))
|
---|
293 | return this.error('Invalid exponent', -1);
|
---|
294 | simple = false;
|
---|
295 | }
|
---|
296 | else {
|
---|
297 | break;
|
---|
298 | }
|
---|
299 | this.advance();
|
---|
300 | }
|
---|
301 | let str = this.input.substring(start, this.index);
|
---|
302 | if (hasSeparators) {
|
---|
303 | str = str.replace(/_/g, '');
|
---|
304 | }
|
---|
305 | const value = simple ? parseIntAutoRadix(str) : parseFloat(str);
|
---|
306 | return newNumberToken(start, this.index, value);
|
---|
307 | }
|
---|
308 | scanString() {
|
---|
309 | const start = this.index;
|
---|
310 | const quote = this.peek;
|
---|
311 | this.advance(); // Skip initial quote.
|
---|
312 | let buffer = '';
|
---|
313 | let marker = this.index;
|
---|
314 | const input = this.input;
|
---|
315 | while (this.peek != quote) {
|
---|
316 | if (this.peek == chars.$BACKSLASH) {
|
---|
317 | buffer += input.substring(marker, this.index);
|
---|
318 | this.advance();
|
---|
319 | let unescapedCode;
|
---|
320 | // Workaround for TS2.1-introduced type strictness
|
---|
321 | this.peek = this.peek;
|
---|
322 | if (this.peek == chars.$u) {
|
---|
323 | // 4 character hex code for unicode character.
|
---|
324 | const hex = input.substring(this.index + 1, this.index + 5);
|
---|
325 | if (/^[0-9a-f]+$/i.test(hex)) {
|
---|
326 | unescapedCode = parseInt(hex, 16);
|
---|
327 | }
|
---|
328 | else {
|
---|
329 | return this.error(`Invalid unicode escape [\\u${hex}]`, 0);
|
---|
330 | }
|
---|
331 | for (let i = 0; i < 5; i++) {
|
---|
332 | this.advance();
|
---|
333 | }
|
---|
334 | }
|
---|
335 | else {
|
---|
336 | unescapedCode = unescape(this.peek);
|
---|
337 | this.advance();
|
---|
338 | }
|
---|
339 | buffer += String.fromCharCode(unescapedCode);
|
---|
340 | marker = this.index;
|
---|
341 | }
|
---|
342 | else if (this.peek == chars.$EOF) {
|
---|
343 | return this.error('Unterminated quote', 0);
|
---|
344 | }
|
---|
345 | else {
|
---|
346 | this.advance();
|
---|
347 | }
|
---|
348 | }
|
---|
349 | const last = input.substring(marker, this.index);
|
---|
350 | this.advance(); // Skip terminating quote.
|
---|
351 | return newStringToken(start, this.index, buffer + last);
|
---|
352 | }
|
---|
353 | scanQuestion(start) {
|
---|
354 | this.advance();
|
---|
355 | let str = '?';
|
---|
356 | // Either `a ?? b` or 'a?.b'.
|
---|
357 | if (this.peek === chars.$QUESTION || this.peek === chars.$PERIOD) {
|
---|
358 | str += this.peek === chars.$PERIOD ? '.' : '?';
|
---|
359 | this.advance();
|
---|
360 | }
|
---|
361 | return newOperatorToken(start, this.index, str);
|
---|
362 | }
|
---|
363 | error(message, offset) {
|
---|
364 | const position = this.index + offset;
|
---|
365 | return newErrorToken(position, this.index, `Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
|
---|
366 | }
|
---|
367 | }
|
---|
368 | function isIdentifierStart(code) {
|
---|
369 | return (chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) ||
|
---|
370 | (code == chars.$_) || (code == chars.$$);
|
---|
371 | }
|
---|
372 | export function isIdentifier(input) {
|
---|
373 | if (input.length == 0)
|
---|
374 | return false;
|
---|
375 | const scanner = new _Scanner(input);
|
---|
376 | if (!isIdentifierStart(scanner.peek))
|
---|
377 | return false;
|
---|
378 | scanner.advance();
|
---|
379 | while (scanner.peek !== chars.$EOF) {
|
---|
380 | if (!isIdentifierPart(scanner.peek))
|
---|
381 | return false;
|
---|
382 | scanner.advance();
|
---|
383 | }
|
---|
384 | return true;
|
---|
385 | }
|
---|
386 | function isIdentifierPart(code) {
|
---|
387 | return chars.isAsciiLetter(code) || chars.isDigit(code) || (code == chars.$_) ||
|
---|
388 | (code == chars.$$);
|
---|
389 | }
|
---|
390 | function isExponentStart(code) {
|
---|
391 | return code == chars.$e || code == chars.$E;
|
---|
392 | }
|
---|
393 | function isExponentSign(code) {
|
---|
394 | return code == chars.$MINUS || code == chars.$PLUS;
|
---|
395 | }
|
---|
396 | function unescape(code) {
|
---|
397 | switch (code) {
|
---|
398 | case chars.$n:
|
---|
399 | return chars.$LF;
|
---|
400 | case chars.$f:
|
---|
401 | return chars.$FF;
|
---|
402 | case chars.$r:
|
---|
403 | return chars.$CR;
|
---|
404 | case chars.$t:
|
---|
405 | return chars.$TAB;
|
---|
406 | case chars.$v:
|
---|
407 | return chars.$VTAB;
|
---|
408 | default:
|
---|
409 | return code;
|
---|
410 | }
|
---|
411 | }
|
---|
412 | function parseIntAutoRadix(text) {
|
---|
413 | const result = parseInt(text);
|
---|
414 | if (isNaN(result)) {
|
---|
415 | throw new Error('Invalid integer literal when parsing ' + text);
|
---|
416 | }
|
---|
417 | return result;
|
---|
418 | }
|
---|
419 | //# sourceMappingURL=data:application/json;base64, |
---|