1 | /**
|
---|
2 | * @license
|
---|
3 | * Copyright Google LLC All Rights Reserved.
|
---|
4 | *
|
---|
5 | * Use of this source code is governed by an MIT-style license that can be
|
---|
6 | * found in the LICENSE file at https://angular.io/license
|
---|
7 | */
|
---|
8 | import * as chars from '../chars';
|
---|
9 | import { ParseError, ParseLocation, ParseSourceFile, ParseSourceSpan } from '../parse_util';
|
---|
10 | import { NAMED_ENTITIES } from './entities';
|
---|
11 | import { DEFAULT_INTERPOLATION_CONFIG } from './interpolation_config';
|
---|
12 | import { TagContentType } from './tags';
|
---|
13 | export class TokenError extends ParseError {
|
---|
14 | constructor(errorMsg, tokenType, span) {
|
---|
15 | super(span, errorMsg);
|
---|
16 | this.tokenType = tokenType;
|
---|
17 | }
|
---|
18 | }
|
---|
19 | export class TokenizeResult {
|
---|
20 | constructor(tokens, errors, nonNormalizedIcuExpressions) {
|
---|
21 | this.tokens = tokens;
|
---|
22 | this.errors = errors;
|
---|
23 | this.nonNormalizedIcuExpressions = nonNormalizedIcuExpressions;
|
---|
24 | }
|
---|
25 | }
|
---|
26 | export function tokenize(source, url, getTagDefinition, options = {}) {
|
---|
27 | const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options);
|
---|
28 | tokenizer.tokenize();
|
---|
29 | return new TokenizeResult(mergeTextTokens(tokenizer.tokens), tokenizer.errors, tokenizer.nonNormalizedIcuExpressions);
|
---|
30 | }
|
---|
31 | const _CR_OR_CRLF_REGEXP = /\r\n?/g;
|
---|
32 | function _unexpectedCharacterErrorMsg(charCode) {
|
---|
33 | const char = charCode === chars.$EOF ? 'EOF' : String.fromCharCode(charCode);
|
---|
34 | return `Unexpected character "${char}"`;
|
---|
35 | }
|
---|
36 | function _unknownEntityErrorMsg(entitySrc) {
|
---|
37 | return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
|
---|
38 | }
|
---|
39 | function _unparsableEntityErrorMsg(type, entityStr) {
|
---|
40 | return `Unable to parse entity "${entityStr}" - ${type} character reference entities must end with ";"`;
|
---|
41 | }
|
---|
42 | var CharacterReferenceType;
|
---|
43 | (function (CharacterReferenceType) {
|
---|
44 | CharacterReferenceType["HEX"] = "hexadecimal";
|
---|
45 | CharacterReferenceType["DEC"] = "decimal";
|
---|
46 | })(CharacterReferenceType || (CharacterReferenceType = {}));
|
---|
47 | class _ControlFlowError {
|
---|
48 | constructor(error) {
|
---|
49 | this.error = error;
|
---|
50 | }
|
---|
51 | }
|
---|
52 | // See https://www.w3.org/TR/html51/syntax.html#writing-html-documents
|
---|
53 | class _Tokenizer {
|
---|
54 | /**
|
---|
55 | * @param _file The html source file being tokenized.
|
---|
56 | * @param _getTagDefinition A function that will retrieve a tag definition for a given tag name.
|
---|
57 | * @param options Configuration of the tokenization.
|
---|
58 | */
|
---|
59 | constructor(_file, _getTagDefinition, options) {
|
---|
60 | this._getTagDefinition = _getTagDefinition;
|
---|
61 | this._currentTokenStart = null;
|
---|
62 | this._currentTokenType = null;
|
---|
63 | this._expansionCaseStack = [];
|
---|
64 | this._inInterpolation = false;
|
---|
65 | this.tokens = [];
|
---|
66 | this.errors = [];
|
---|
67 | this.nonNormalizedIcuExpressions = [];
|
---|
68 | this._tokenizeIcu = options.tokenizeExpansionForms || false;
|
---|
69 | this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
|
---|
70 | this._leadingTriviaCodePoints =
|
---|
71 | options.leadingTriviaChars && options.leadingTriviaChars.map(c => c.codePointAt(0) || 0);
|
---|
72 | const range = options.range || { endPos: _file.content.length, startPos: 0, startLine: 0, startCol: 0 };
|
---|
73 | this._cursor = options.escapedString ? new EscapedCharacterCursor(_file, range) :
|
---|
74 | new PlainCharacterCursor(_file, range);
|
---|
75 | this._preserveLineEndings = options.preserveLineEndings || false;
|
---|
76 | this._escapedString = options.escapedString || false;
|
---|
77 | this._i18nNormalizeLineEndingsInICUs = options.i18nNormalizeLineEndingsInICUs || false;
|
---|
78 | try {
|
---|
79 | this._cursor.init();
|
---|
80 | }
|
---|
81 | catch (e) {
|
---|
82 | this.handleError(e);
|
---|
83 | }
|
---|
84 | }
|
---|
85 | _processCarriageReturns(content) {
|
---|
86 | if (this._preserveLineEndings) {
|
---|
87 | return content;
|
---|
88 | }
|
---|
89 | // https://www.w3.org/TR/html51/syntax.html#preprocessing-the-input-stream
|
---|
90 | // In order to keep the original position in the source, we can not
|
---|
91 | // pre-process it.
|
---|
92 | // Instead CRs are processed right before instantiating the tokens.
|
---|
93 | return content.replace(_CR_OR_CRLF_REGEXP, '\n');
|
---|
94 | }
|
---|
95 | tokenize() {
|
---|
96 | while (this._cursor.peek() !== chars.$EOF) {
|
---|
97 | const start = this._cursor.clone();
|
---|
98 | try {
|
---|
99 | if (this._attemptCharCode(chars.$LT)) {
|
---|
100 | if (this._attemptCharCode(chars.$BANG)) {
|
---|
101 | if (this._attemptCharCode(chars.$LBRACKET)) {
|
---|
102 | this._consumeCdata(start);
|
---|
103 | }
|
---|
104 | else if (this._attemptCharCode(chars.$MINUS)) {
|
---|
105 | this._consumeComment(start);
|
---|
106 | }
|
---|
107 | else {
|
---|
108 | this._consumeDocType(start);
|
---|
109 | }
|
---|
110 | }
|
---|
111 | else if (this._attemptCharCode(chars.$SLASH)) {
|
---|
112 | this._consumeTagClose(start);
|
---|
113 | }
|
---|
114 | else {
|
---|
115 | this._consumeTagOpen(start);
|
---|
116 | }
|
---|
117 | }
|
---|
118 | else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
|
---|
119 | // In (possibly interpolated) text the end of the text is given by `isTextEnd()`, while
|
---|
120 | // the premature end of an interpolation is given by the start of a new HTML element.
|
---|
121 | this._consumeWithInterpolation(5 /* TEXT */, 8 /* INTERPOLATION */, () => this._isTextEnd(), () => this._isTagStart());
|
---|
122 | }
|
---|
123 | }
|
---|
124 | catch (e) {
|
---|
125 | this.handleError(e);
|
---|
126 | }
|
---|
127 | }
|
---|
128 | this._beginToken(24 /* EOF */);
|
---|
129 | this._endToken([]);
|
---|
130 | }
|
---|
131 | /**
|
---|
132 | * @returns whether an ICU token has been created
|
---|
133 | * @internal
|
---|
134 | */
|
---|
135 | _tokenizeExpansionForm() {
|
---|
136 | if (this.isExpansionFormStart()) {
|
---|
137 | this._consumeExpansionFormStart();
|
---|
138 | return true;
|
---|
139 | }
|
---|
140 | if (isExpansionCaseStart(this._cursor.peek()) && this._isInExpansionForm()) {
|
---|
141 | this._consumeExpansionCaseStart();
|
---|
142 | return true;
|
---|
143 | }
|
---|
144 | if (this._cursor.peek() === chars.$RBRACE) {
|
---|
145 | if (this._isInExpansionCase()) {
|
---|
146 | this._consumeExpansionCaseEnd();
|
---|
147 | return true;
|
---|
148 | }
|
---|
149 | if (this._isInExpansionForm()) {
|
---|
150 | this._consumeExpansionFormEnd();
|
---|
151 | return true;
|
---|
152 | }
|
---|
153 | }
|
---|
154 | return false;
|
---|
155 | }
|
---|
156 | _beginToken(type, start = this._cursor.clone()) {
|
---|
157 | this._currentTokenStart = start;
|
---|
158 | this._currentTokenType = type;
|
---|
159 | }
|
---|
160 | _endToken(parts, end) {
|
---|
161 | if (this._currentTokenStart === null) {
|
---|
162 | throw new TokenError('Programming error - attempted to end a token when there was no start to the token', this._currentTokenType, this._cursor.getSpan(end));
|
---|
163 | }
|
---|
164 | if (this._currentTokenType === null) {
|
---|
165 | throw new TokenError('Programming error - attempted to end a token which has no token type', null, this._cursor.getSpan(this._currentTokenStart));
|
---|
166 | }
|
---|
167 | const token = {
|
---|
168 | type: this._currentTokenType,
|
---|
169 | parts,
|
---|
170 | sourceSpan: (end !== null && end !== void 0 ? end : this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints),
|
---|
171 | };
|
---|
172 | this.tokens.push(token);
|
---|
173 | this._currentTokenStart = null;
|
---|
174 | this._currentTokenType = null;
|
---|
175 | return token;
|
---|
176 | }
|
---|
177 | _createError(msg, span) {
|
---|
178 | if (this._isInExpansionForm()) {
|
---|
179 | msg += ` (Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)`;
|
---|
180 | }
|
---|
181 | const error = new TokenError(msg, this._currentTokenType, span);
|
---|
182 | this._currentTokenStart = null;
|
---|
183 | this._currentTokenType = null;
|
---|
184 | return new _ControlFlowError(error);
|
---|
185 | }
|
---|
186 | handleError(e) {
|
---|
187 | if (e instanceof CursorError) {
|
---|
188 | e = this._createError(e.msg, this._cursor.getSpan(e.cursor));
|
---|
189 | }
|
---|
190 | if (e instanceof _ControlFlowError) {
|
---|
191 | this.errors.push(e.error);
|
---|
192 | }
|
---|
193 | else {
|
---|
194 | throw e;
|
---|
195 | }
|
---|
196 | }
|
---|
197 | _attemptCharCode(charCode) {
|
---|
198 | if (this._cursor.peek() === charCode) {
|
---|
199 | this._cursor.advance();
|
---|
200 | return true;
|
---|
201 | }
|
---|
202 | return false;
|
---|
203 | }
|
---|
204 | _attemptCharCodeCaseInsensitive(charCode) {
|
---|
205 | if (compareCharCodeCaseInsensitive(this._cursor.peek(), charCode)) {
|
---|
206 | this._cursor.advance();
|
---|
207 | return true;
|
---|
208 | }
|
---|
209 | return false;
|
---|
210 | }
|
---|
211 | _requireCharCode(charCode) {
|
---|
212 | const location = this._cursor.clone();
|
---|
213 | if (!this._attemptCharCode(charCode)) {
|
---|
214 | throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
|
---|
215 | }
|
---|
216 | }
|
---|
217 | _attemptStr(chars) {
|
---|
218 | const len = chars.length;
|
---|
219 | if (this._cursor.charsLeft() < len) {
|
---|
220 | return false;
|
---|
221 | }
|
---|
222 | const initialPosition = this._cursor.clone();
|
---|
223 | for (let i = 0; i < len; i++) {
|
---|
224 | if (!this._attemptCharCode(chars.charCodeAt(i))) {
|
---|
225 | // If attempting to parse the string fails, we want to reset the parser
|
---|
226 | // to where it was before the attempt
|
---|
227 | this._cursor = initialPosition;
|
---|
228 | return false;
|
---|
229 | }
|
---|
230 | }
|
---|
231 | return true;
|
---|
232 | }
|
---|
233 | _attemptStrCaseInsensitive(chars) {
|
---|
234 | for (let i = 0; i < chars.length; i++) {
|
---|
235 | if (!this._attemptCharCodeCaseInsensitive(chars.charCodeAt(i))) {
|
---|
236 | return false;
|
---|
237 | }
|
---|
238 | }
|
---|
239 | return true;
|
---|
240 | }
|
---|
241 | _requireStr(chars) {
|
---|
242 | const location = this._cursor.clone();
|
---|
243 | if (!this._attemptStr(chars)) {
|
---|
244 | throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
|
---|
245 | }
|
---|
246 | }
|
---|
247 | _attemptCharCodeUntilFn(predicate) {
|
---|
248 | while (!predicate(this._cursor.peek())) {
|
---|
249 | this._cursor.advance();
|
---|
250 | }
|
---|
251 | }
|
---|
252 | _requireCharCodeUntilFn(predicate, len) {
|
---|
253 | const start = this._cursor.clone();
|
---|
254 | this._attemptCharCodeUntilFn(predicate);
|
---|
255 | if (this._cursor.diff(start) < len) {
|
---|
256 | throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
|
---|
257 | }
|
---|
258 | }
|
---|
259 | _attemptUntilChar(char) {
|
---|
260 | while (this._cursor.peek() !== char) {
|
---|
261 | this._cursor.advance();
|
---|
262 | }
|
---|
263 | }
|
---|
264 | _readChar() {
|
---|
265 | // Don't rely upon reading directly from `_input` as the actual char value
|
---|
266 | // may have been generated from an escape sequence.
|
---|
267 | const char = String.fromCodePoint(this._cursor.peek());
|
---|
268 | this._cursor.advance();
|
---|
269 | return char;
|
---|
270 | }
|
---|
271 | _consumeEntity(textTokenType) {
|
---|
272 | this._beginToken(9 /* ENCODED_ENTITY */);
|
---|
273 | const start = this._cursor.clone();
|
---|
274 | this._cursor.advance();
|
---|
275 | if (this._attemptCharCode(chars.$HASH)) {
|
---|
276 | const isHex = this._attemptCharCode(chars.$x) || this._attemptCharCode(chars.$X);
|
---|
277 | const codeStart = this._cursor.clone();
|
---|
278 | this._attemptCharCodeUntilFn(isDigitEntityEnd);
|
---|
279 | if (this._cursor.peek() != chars.$SEMICOLON) {
|
---|
280 | // Advance cursor to include the peeked character in the string provided to the error
|
---|
281 | // message.
|
---|
282 | this._cursor.advance();
|
---|
283 | const entityType = isHex ? CharacterReferenceType.HEX : CharacterReferenceType.DEC;
|
---|
284 | throw this._createError(_unparsableEntityErrorMsg(entityType, this._cursor.getChars(start)), this._cursor.getSpan());
|
---|
285 | }
|
---|
286 | const strNum = this._cursor.getChars(codeStart);
|
---|
287 | this._cursor.advance();
|
---|
288 | try {
|
---|
289 | const charCode = parseInt(strNum, isHex ? 16 : 10);
|
---|
290 | this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
|
---|
291 | }
|
---|
292 | catch (_a) {
|
---|
293 | throw this._createError(_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
|
---|
294 | }
|
---|
295 | }
|
---|
296 | else {
|
---|
297 | const nameStart = this._cursor.clone();
|
---|
298 | this._attemptCharCodeUntilFn(isNamedEntityEnd);
|
---|
299 | if (this._cursor.peek() != chars.$SEMICOLON) {
|
---|
300 | // No semicolon was found so abort the encoded entity token that was in progress, and treat
|
---|
301 | // this as a text token
|
---|
302 | this._beginToken(textTokenType, start);
|
---|
303 | this._cursor = nameStart;
|
---|
304 | this._endToken(['&']);
|
---|
305 | }
|
---|
306 | else {
|
---|
307 | const name = this._cursor.getChars(nameStart);
|
---|
308 | this._cursor.advance();
|
---|
309 | const char = NAMED_ENTITIES[name];
|
---|
310 | if (!char) {
|
---|
311 | throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
|
---|
312 | }
|
---|
313 | this._endToken([char, `&${name};`]);
|
---|
314 | }
|
---|
315 | }
|
---|
316 | }
|
---|
317 | _consumeRawText(consumeEntities, endMarkerPredicate) {
|
---|
318 | this._beginToken(consumeEntities ? 6 /* ESCAPABLE_RAW_TEXT */ : 7 /* RAW_TEXT */);
|
---|
319 | const parts = [];
|
---|
320 | while (true) {
|
---|
321 | const tagCloseStart = this._cursor.clone();
|
---|
322 | const foundEndMarker = endMarkerPredicate();
|
---|
323 | this._cursor = tagCloseStart;
|
---|
324 | if (foundEndMarker) {
|
---|
325 | break;
|
---|
326 | }
|
---|
327 | if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
|
---|
328 | this._endToken([this._processCarriageReturns(parts.join(''))]);
|
---|
329 | parts.length = 0;
|
---|
330 | this._consumeEntity(6 /* ESCAPABLE_RAW_TEXT */);
|
---|
331 | this._beginToken(6 /* ESCAPABLE_RAW_TEXT */);
|
---|
332 | }
|
---|
333 | else {
|
---|
334 | parts.push(this._readChar());
|
---|
335 | }
|
---|
336 | }
|
---|
337 | this._endToken([this._processCarriageReturns(parts.join(''))]);
|
---|
338 | }
|
---|
339 | _consumeComment(start) {
|
---|
340 | this._beginToken(10 /* COMMENT_START */, start);
|
---|
341 | this._requireCharCode(chars.$MINUS);
|
---|
342 | this._endToken([]);
|
---|
343 | this._consumeRawText(false, () => this._attemptStr('-->'));
|
---|
344 | this._beginToken(11 /* COMMENT_END */);
|
---|
345 | this._requireStr('-->');
|
---|
346 | this._endToken([]);
|
---|
347 | }
|
---|
348 | _consumeCdata(start) {
|
---|
349 | this._beginToken(12 /* CDATA_START */, start);
|
---|
350 | this._requireStr('CDATA[');
|
---|
351 | this._endToken([]);
|
---|
352 | this._consumeRawText(false, () => this._attemptStr(']]>'));
|
---|
353 | this._beginToken(13 /* CDATA_END */);
|
---|
354 | this._requireStr(']]>');
|
---|
355 | this._endToken([]);
|
---|
356 | }
|
---|
357 | _consumeDocType(start) {
|
---|
358 | this._beginToken(18 /* DOC_TYPE */, start);
|
---|
359 | const contentStart = this._cursor.clone();
|
---|
360 | this._attemptUntilChar(chars.$GT);
|
---|
361 | const content = this._cursor.getChars(contentStart);
|
---|
362 | this._cursor.advance();
|
---|
363 | this._endToken([content]);
|
---|
364 | }
|
---|
365 | _consumePrefixAndName() {
|
---|
366 | const nameOrPrefixStart = this._cursor.clone();
|
---|
367 | let prefix = '';
|
---|
368 | while (this._cursor.peek() !== chars.$COLON && !isPrefixEnd(this._cursor.peek())) {
|
---|
369 | this._cursor.advance();
|
---|
370 | }
|
---|
371 | let nameStart;
|
---|
372 | if (this._cursor.peek() === chars.$COLON) {
|
---|
373 | prefix = this._cursor.getChars(nameOrPrefixStart);
|
---|
374 | this._cursor.advance();
|
---|
375 | nameStart = this._cursor.clone();
|
---|
376 | }
|
---|
377 | else {
|
---|
378 | nameStart = nameOrPrefixStart;
|
---|
379 | }
|
---|
380 | this._requireCharCodeUntilFn(isNameEnd, prefix === '' ? 0 : 1);
|
---|
381 | const name = this._cursor.getChars(nameStart);
|
---|
382 | return [prefix, name];
|
---|
383 | }
|
---|
384 | _consumeTagOpen(start) {
|
---|
385 | let tagName;
|
---|
386 | let prefix;
|
---|
387 | let openTagToken;
|
---|
388 | try {
|
---|
389 | if (!chars.isAsciiLetter(this._cursor.peek())) {
|
---|
390 | throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
|
---|
391 | }
|
---|
392 | openTagToken = this._consumeTagOpenStart(start);
|
---|
393 | prefix = openTagToken.parts[0];
|
---|
394 | tagName = openTagToken.parts[1];
|
---|
395 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
396 | while (this._cursor.peek() !== chars.$SLASH && this._cursor.peek() !== chars.$GT &&
|
---|
397 | this._cursor.peek() !== chars.$LT && this._cursor.peek() !== chars.$EOF) {
|
---|
398 | this._consumeAttributeName();
|
---|
399 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
400 | if (this._attemptCharCode(chars.$EQ)) {
|
---|
401 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
402 | this._consumeAttributeValue();
|
---|
403 | }
|
---|
404 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
405 | }
|
---|
406 | this._consumeTagOpenEnd();
|
---|
407 | }
|
---|
408 | catch (e) {
|
---|
409 | if (e instanceof _ControlFlowError) {
|
---|
410 | if (openTagToken) {
|
---|
411 | // We errored before we could close the opening tag, so it is incomplete.
|
---|
412 | openTagToken.type = 4 /* INCOMPLETE_TAG_OPEN */;
|
---|
413 | }
|
---|
414 | else {
|
---|
415 | // When the start tag is invalid, assume we want a "<" as text.
|
---|
416 | // Back to back text tokens are merged at the end.
|
---|
417 | this._beginToken(5 /* TEXT */, start);
|
---|
418 | this._endToken(['<']);
|
---|
419 | }
|
---|
420 | return;
|
---|
421 | }
|
---|
422 | throw e;
|
---|
423 | }
|
---|
424 | const contentTokenType = this._getTagDefinition(tagName).getContentType(prefix);
|
---|
425 | if (contentTokenType === TagContentType.RAW_TEXT) {
|
---|
426 | this._consumeRawTextWithTagClose(prefix, tagName, false);
|
---|
427 | }
|
---|
428 | else if (contentTokenType === TagContentType.ESCAPABLE_RAW_TEXT) {
|
---|
429 | this._consumeRawTextWithTagClose(prefix, tagName, true);
|
---|
430 | }
|
---|
431 | }
|
---|
432 | _consumeRawTextWithTagClose(prefix, tagName, consumeEntities) {
|
---|
433 | this._consumeRawText(consumeEntities, () => {
|
---|
434 | if (!this._attemptCharCode(chars.$LT))
|
---|
435 | return false;
|
---|
436 | if (!this._attemptCharCode(chars.$SLASH))
|
---|
437 | return false;
|
---|
438 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
439 | if (!this._attemptStrCaseInsensitive(tagName))
|
---|
440 | return false;
|
---|
441 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
442 | return this._attemptCharCode(chars.$GT);
|
---|
443 | });
|
---|
444 | this._beginToken(3 /* TAG_CLOSE */);
|
---|
445 | this._requireCharCodeUntilFn(code => code === chars.$GT, 3);
|
---|
446 | this._cursor.advance(); // Consume the `>`
|
---|
447 | this._endToken([prefix, tagName]);
|
---|
448 | }
|
---|
449 | _consumeTagOpenStart(start) {
|
---|
450 | this._beginToken(0 /* TAG_OPEN_START */, start);
|
---|
451 | const parts = this._consumePrefixAndName();
|
---|
452 | return this._endToken(parts);
|
---|
453 | }
|
---|
454 | _consumeAttributeName() {
|
---|
455 | const attrNameStart = this._cursor.peek();
|
---|
456 | if (attrNameStart === chars.$SQ || attrNameStart === chars.$DQ) {
|
---|
457 | throw this._createError(_unexpectedCharacterErrorMsg(attrNameStart), this._cursor.getSpan());
|
---|
458 | }
|
---|
459 | this._beginToken(14 /* ATTR_NAME */);
|
---|
460 | const prefixAndName = this._consumePrefixAndName();
|
---|
461 | this._endToken(prefixAndName);
|
---|
462 | }
|
---|
463 | _consumeAttributeValue() {
|
---|
464 | let value;
|
---|
465 | if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
|
---|
466 | const quoteChar = this._cursor.peek();
|
---|
467 | this._consumeQuote(quoteChar);
|
---|
468 | // In an attribute then end of the attribute value and the premature end to an interpolation
|
---|
469 | // are both triggered by the `quoteChar`.
|
---|
470 | const endPredicate = () => this._cursor.peek() === quoteChar;
|
---|
471 | this._consumeWithInterpolation(16 /* ATTR_VALUE_TEXT */, 17 /* ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
|
---|
472 | this._consumeQuote(quoteChar);
|
---|
473 | }
|
---|
474 | else {
|
---|
475 | const endPredicate = () => isNameEnd(this._cursor.peek());
|
---|
476 | this._consumeWithInterpolation(16 /* ATTR_VALUE_TEXT */, 17 /* ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
|
---|
477 | }
|
---|
478 | }
|
---|
479 | _consumeQuote(quoteChar) {
|
---|
480 | this._beginToken(15 /* ATTR_QUOTE */);
|
---|
481 | this._requireCharCode(quoteChar);
|
---|
482 | this._endToken([String.fromCodePoint(quoteChar)]);
|
---|
483 | }
|
---|
484 | _consumeTagOpenEnd() {
|
---|
485 | const tokenType = this._attemptCharCode(chars.$SLASH) ? 2 /* TAG_OPEN_END_VOID */ : 1 /* TAG_OPEN_END */;
|
---|
486 | this._beginToken(tokenType);
|
---|
487 | this._requireCharCode(chars.$GT);
|
---|
488 | this._endToken([]);
|
---|
489 | }
|
---|
490 | _consumeTagClose(start) {
|
---|
491 | this._beginToken(3 /* TAG_CLOSE */, start);
|
---|
492 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
493 | const prefixAndName = this._consumePrefixAndName();
|
---|
494 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
495 | this._requireCharCode(chars.$GT);
|
---|
496 | this._endToken(prefixAndName);
|
---|
497 | }
|
---|
498 | _consumeExpansionFormStart() {
|
---|
499 | this._beginToken(19 /* EXPANSION_FORM_START */);
|
---|
500 | this._requireCharCode(chars.$LBRACE);
|
---|
501 | this._endToken([]);
|
---|
502 | this._expansionCaseStack.push(19 /* EXPANSION_FORM_START */);
|
---|
503 | this._beginToken(7 /* RAW_TEXT */);
|
---|
504 | const condition = this._readUntil(chars.$COMMA);
|
---|
505 | const normalizedCondition = this._processCarriageReturns(condition);
|
---|
506 | if (this._i18nNormalizeLineEndingsInICUs) {
|
---|
507 | // We explicitly want to normalize line endings for this text.
|
---|
508 | this._endToken([normalizedCondition]);
|
---|
509 | }
|
---|
510 | else {
|
---|
511 | // We are not normalizing line endings.
|
---|
512 | const conditionToken = this._endToken([condition]);
|
---|
513 | if (normalizedCondition !== condition) {
|
---|
514 | this.nonNormalizedIcuExpressions.push(conditionToken);
|
---|
515 | }
|
---|
516 | }
|
---|
517 | this._requireCharCode(chars.$COMMA);
|
---|
518 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
519 | this._beginToken(7 /* RAW_TEXT */);
|
---|
520 | const type = this._readUntil(chars.$COMMA);
|
---|
521 | this._endToken([type]);
|
---|
522 | this._requireCharCode(chars.$COMMA);
|
---|
523 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
524 | }
|
---|
525 | _consumeExpansionCaseStart() {
|
---|
526 | this._beginToken(20 /* EXPANSION_CASE_VALUE */);
|
---|
527 | const value = this._readUntil(chars.$LBRACE).trim();
|
---|
528 | this._endToken([value]);
|
---|
529 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
530 | this._beginToken(21 /* EXPANSION_CASE_EXP_START */);
|
---|
531 | this._requireCharCode(chars.$LBRACE);
|
---|
532 | this._endToken([]);
|
---|
533 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
534 | this._expansionCaseStack.push(21 /* EXPANSION_CASE_EXP_START */);
|
---|
535 | }
|
---|
536 | _consumeExpansionCaseEnd() {
|
---|
537 | this._beginToken(22 /* EXPANSION_CASE_EXP_END */);
|
---|
538 | this._requireCharCode(chars.$RBRACE);
|
---|
539 | this._endToken([]);
|
---|
540 | this._attemptCharCodeUntilFn(isNotWhitespace);
|
---|
541 | this._expansionCaseStack.pop();
|
---|
542 | }
|
---|
543 | _consumeExpansionFormEnd() {
|
---|
544 | this._beginToken(23 /* EXPANSION_FORM_END */);
|
---|
545 | this._requireCharCode(chars.$RBRACE);
|
---|
546 | this._endToken([]);
|
---|
547 | this._expansionCaseStack.pop();
|
---|
548 | }
|
---|
549 | /**
|
---|
550 | * Consume a string that may contain interpolation expressions.
|
---|
551 | *
|
---|
552 | * The first token consumed will be of `tokenType` and then there will be alternating
|
---|
553 | * `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true.
|
---|
554 | *
|
---|
555 | * If an interpolation token ends prematurely it will have no end marker in its `parts` array.
|
---|
556 | *
|
---|
557 | * @param textTokenType the kind of tokens to interleave around interpolation tokens.
|
---|
558 | * @param interpolationTokenType the kind of tokens that contain interpolation.
|
---|
559 | * @param endPredicate a function that should return true when we should stop consuming.
|
---|
560 | * @param endInterpolation a function that should return true if there is a premature end to an
|
---|
561 | * interpolation expression - i.e. before we get to the normal interpolation closing marker.
|
---|
562 | */
|
---|
563 | _consumeWithInterpolation(textTokenType, interpolationTokenType, endPredicate, endInterpolation) {
|
---|
564 | this._beginToken(textTokenType);
|
---|
565 | const parts = [];
|
---|
566 | while (!endPredicate()) {
|
---|
567 | const current = this._cursor.clone();
|
---|
568 | if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
|
---|
569 | this._endToken([this._processCarriageReturns(parts.join(''))], current);
|
---|
570 | parts.length = 0;
|
---|
571 | this._consumeInterpolation(interpolationTokenType, current, endInterpolation);
|
---|
572 | this._beginToken(textTokenType);
|
---|
573 | }
|
---|
574 | else if (this._cursor.peek() === chars.$AMPERSAND) {
|
---|
575 | this._endToken([this._processCarriageReturns(parts.join(''))]);
|
---|
576 | parts.length = 0;
|
---|
577 | this._consumeEntity(textTokenType);
|
---|
578 | this._beginToken(textTokenType);
|
---|
579 | }
|
---|
580 | else {
|
---|
581 | parts.push(this._readChar());
|
---|
582 | }
|
---|
583 | }
|
---|
584 | // It is possible that an interpolation was started but not ended inside this text token.
|
---|
585 | // Make sure that we reset the state of the lexer correctly.
|
---|
586 | this._inInterpolation = false;
|
---|
587 | this._endToken([this._processCarriageReturns(parts.join(''))]);
|
---|
588 | }
|
---|
589 | /**
|
---|
590 | * Consume a block of text that has been interpreted as an Angular interpolation.
|
---|
591 | *
|
---|
592 | * @param interpolationTokenType the type of the interpolation token to generate.
|
---|
593 | * @param interpolationStart a cursor that points to the start of this interpolation.
|
---|
594 | * @param prematureEndPredicate a function that should return true if the next characters indicate
|
---|
595 | * an end to the interpolation before its normal closing marker.
|
---|
596 | */
|
---|
597 | _consumeInterpolation(interpolationTokenType, interpolationStart, prematureEndPredicate) {
|
---|
598 | const parts = [];
|
---|
599 | this._beginToken(interpolationTokenType, interpolationStart);
|
---|
600 | parts.push(this._interpolationConfig.start);
|
---|
601 | // Find the end of the interpolation, ignoring content inside quotes.
|
---|
602 | const expressionStart = this._cursor.clone();
|
---|
603 | let inQuote = null;
|
---|
604 | let inComment = false;
|
---|
605 | while (this._cursor.peek() !== chars.$EOF &&
|
---|
606 | (prematureEndPredicate === null || !prematureEndPredicate())) {
|
---|
607 | const current = this._cursor.clone();
|
---|
608 | if (this._isTagStart()) {
|
---|
609 | // We are starting what looks like an HTML element in the middle of this interpolation.
|
---|
610 | // Reset the cursor to before the `<` character and end the interpolation token.
|
---|
611 | // (This is actually wrong but here for backward compatibility).
|
---|
612 | this._cursor = current;
|
---|
613 | parts.push(this._getProcessedChars(expressionStart, current));
|
---|
614 | this._endToken(parts);
|
---|
615 | return;
|
---|
616 | }
|
---|
617 | if (inQuote === null) {
|
---|
618 | if (this._attemptStr(this._interpolationConfig.end)) {
|
---|
619 | // We are not in a string, and we hit the end interpolation marker
|
---|
620 | parts.push(this._getProcessedChars(expressionStart, current));
|
---|
621 | parts.push(this._interpolationConfig.end);
|
---|
622 | this._endToken(parts);
|
---|
623 | return;
|
---|
624 | }
|
---|
625 | else if (this._attemptStr('//')) {
|
---|
626 | // Once we are in a comment we ignore any quotes
|
---|
627 | inComment = true;
|
---|
628 | }
|
---|
629 | }
|
---|
630 | const char = this._cursor.peek();
|
---|
631 | this._cursor.advance();
|
---|
632 | if (char === chars.$BACKSLASH) {
|
---|
633 | // Skip the next character because it was escaped.
|
---|
634 | this._cursor.advance();
|
---|
635 | }
|
---|
636 | else if (char === inQuote) {
|
---|
637 | // Exiting the current quoted string
|
---|
638 | inQuote = null;
|
---|
639 | }
|
---|
640 | else if (!inComment && inQuote === null && chars.isQuote(char)) {
|
---|
641 | // Entering a new quoted string
|
---|
642 | inQuote = char;
|
---|
643 | }
|
---|
644 | }
|
---|
645 | // We hit EOF without finding a closing interpolation marker
|
---|
646 | parts.push(this._getProcessedChars(expressionStart, this._cursor));
|
---|
647 | this._endToken(parts);
|
---|
648 | }
|
---|
649 | _getProcessedChars(start, end) {
|
---|
650 | return this._processCarriageReturns(end.getChars(start));
|
---|
651 | }
|
---|
652 | _isTextEnd() {
|
---|
653 | if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
|
---|
654 | return true;
|
---|
655 | }
|
---|
656 | if (this._tokenizeIcu && !this._inInterpolation) {
|
---|
657 | if (this.isExpansionFormStart()) {
|
---|
658 | // start of an expansion form
|
---|
659 | return true;
|
---|
660 | }
|
---|
661 | if (this._cursor.peek() === chars.$RBRACE && this._isInExpansionCase()) {
|
---|
662 | // end of and expansion case
|
---|
663 | return true;
|
---|
664 | }
|
---|
665 | }
|
---|
666 | return false;
|
---|
667 | }
|
---|
668 | /**
|
---|
669 | * Returns true if the current cursor is pointing to the start of a tag
|
---|
670 | * (opening/closing/comments/cdata/etc).
|
---|
671 | */
|
---|
672 | _isTagStart() {
|
---|
673 | if (this._cursor.peek() === chars.$LT) {
|
---|
674 | // We assume that `<` followed by whitespace is not the start of an HTML element.
|
---|
675 | const tmp = this._cursor.clone();
|
---|
676 | tmp.advance();
|
---|
677 | // If the next character is alphabetic, ! nor / then it is a tag start
|
---|
678 | const code = tmp.peek();
|
---|
679 | if ((chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) ||
|
---|
680 | code === chars.$SLASH || code === chars.$BANG) {
|
---|
681 | return true;
|
---|
682 | }
|
---|
683 | }
|
---|
684 | return false;
|
---|
685 | }
|
---|
686 | _readUntil(char) {
|
---|
687 | const start = this._cursor.clone();
|
---|
688 | this._attemptUntilChar(char);
|
---|
689 | return this._cursor.getChars(start);
|
---|
690 | }
|
---|
691 | _isInExpansionCase() {
|
---|
692 | return this._expansionCaseStack.length > 0 &&
|
---|
693 | this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
|
---|
694 | 21 /* EXPANSION_CASE_EXP_START */;
|
---|
695 | }
|
---|
696 | _isInExpansionForm() {
|
---|
697 | return this._expansionCaseStack.length > 0 &&
|
---|
698 | this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
|
---|
699 | 19 /* EXPANSION_FORM_START */;
|
---|
700 | }
|
---|
701 | isExpansionFormStart() {
|
---|
702 | if (this._cursor.peek() !== chars.$LBRACE) {
|
---|
703 | return false;
|
---|
704 | }
|
---|
705 | if (this._interpolationConfig) {
|
---|
706 | const start = this._cursor.clone();
|
---|
707 | const isInterpolation = this._attemptStr(this._interpolationConfig.start);
|
---|
708 | this._cursor = start;
|
---|
709 | return !isInterpolation;
|
---|
710 | }
|
---|
711 | return true;
|
---|
712 | }
|
---|
713 | }
|
---|
714 | function isNotWhitespace(code) {
|
---|
715 | return !chars.isWhitespace(code) || code === chars.$EOF;
|
---|
716 | }
|
---|
717 | function isNameEnd(code) {
|
---|
718 | return chars.isWhitespace(code) || code === chars.$GT || code === chars.$LT ||
|
---|
719 | code === chars.$SLASH || code === chars.$SQ || code === chars.$DQ || code === chars.$EQ ||
|
---|
720 | code === chars.$EOF;
|
---|
721 | }
|
---|
722 | function isPrefixEnd(code) {
|
---|
723 | return (code < chars.$a || chars.$z < code) && (code < chars.$A || chars.$Z < code) &&
|
---|
724 | (code < chars.$0 || code > chars.$9);
|
---|
725 | }
|
---|
726 | function isDigitEntityEnd(code) {
|
---|
727 | return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiHexDigit(code);
|
---|
728 | }
|
---|
729 | function isNamedEntityEnd(code) {
|
---|
730 | return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiLetter(code);
|
---|
731 | }
|
---|
732 | function isExpansionCaseStart(peek) {
|
---|
733 | return peek !== chars.$RBRACE;
|
---|
734 | }
|
---|
735 | function compareCharCodeCaseInsensitive(code1, code2) {
|
---|
736 | return toUpperCaseCharCode(code1) === toUpperCaseCharCode(code2);
|
---|
737 | }
|
---|
738 | function toUpperCaseCharCode(code) {
|
---|
739 | return code >= chars.$a && code <= chars.$z ? code - chars.$a + chars.$A : code;
|
---|
740 | }
|
---|
741 | function mergeTextTokens(srcTokens) {
|
---|
742 | const dstTokens = [];
|
---|
743 | let lastDstToken = undefined;
|
---|
744 | for (let i = 0; i < srcTokens.length; i++) {
|
---|
745 | const token = srcTokens[i];
|
---|
746 | if ((lastDstToken && lastDstToken.type === 5 /* TEXT */ && token.type === 5 /* TEXT */) ||
|
---|
747 | (lastDstToken && lastDstToken.type === 16 /* ATTR_VALUE_TEXT */ &&
|
---|
748 | token.type === 16 /* ATTR_VALUE_TEXT */)) {
|
---|
749 | lastDstToken.parts[0] += token.parts[0];
|
---|
750 | lastDstToken.sourceSpan.end = token.sourceSpan.end;
|
---|
751 | }
|
---|
752 | else {
|
---|
753 | lastDstToken = token;
|
---|
754 | dstTokens.push(lastDstToken);
|
---|
755 | }
|
---|
756 | }
|
---|
757 | return dstTokens;
|
---|
758 | }
|
---|
759 | class PlainCharacterCursor {
|
---|
760 | constructor(fileOrCursor, range) {
|
---|
761 | if (fileOrCursor instanceof PlainCharacterCursor) {
|
---|
762 | this.file = fileOrCursor.file;
|
---|
763 | this.input = fileOrCursor.input;
|
---|
764 | this.end = fileOrCursor.end;
|
---|
765 | const state = fileOrCursor.state;
|
---|
766 | // Note: avoid using `{...fileOrCursor.state}` here as that has a severe performance penalty.
|
---|
767 | // In ES5 bundles the object spread operator is translated into the `__assign` helper, which
|
---|
768 | // is not optimized by VMs as efficiently as a raw object literal. Since this constructor is
|
---|
769 | // called in tight loops, this difference matters.
|
---|
770 | this.state = {
|
---|
771 | peek: state.peek,
|
---|
772 | offset: state.offset,
|
---|
773 | line: state.line,
|
---|
774 | column: state.column,
|
---|
775 | };
|
---|
776 | }
|
---|
777 | else {
|
---|
778 | if (!range) {
|
---|
779 | throw new Error('Programming error: the range argument must be provided with a file argument.');
|
---|
780 | }
|
---|
781 | this.file = fileOrCursor;
|
---|
782 | this.input = fileOrCursor.content;
|
---|
783 | this.end = range.endPos;
|
---|
784 | this.state = {
|
---|
785 | peek: -1,
|
---|
786 | offset: range.startPos,
|
---|
787 | line: range.startLine,
|
---|
788 | column: range.startCol,
|
---|
789 | };
|
---|
790 | }
|
---|
791 | }
|
---|
792 | clone() {
|
---|
793 | return new PlainCharacterCursor(this);
|
---|
794 | }
|
---|
795 | peek() {
|
---|
796 | return this.state.peek;
|
---|
797 | }
|
---|
798 | charsLeft() {
|
---|
799 | return this.end - this.state.offset;
|
---|
800 | }
|
---|
801 | diff(other) {
|
---|
802 | return this.state.offset - other.state.offset;
|
---|
803 | }
|
---|
804 | advance() {
|
---|
805 | this.advanceState(this.state);
|
---|
806 | }
|
---|
807 | init() {
|
---|
808 | this.updatePeek(this.state);
|
---|
809 | }
|
---|
810 | getSpan(start, leadingTriviaCodePoints) {
|
---|
811 | start = start || this;
|
---|
812 | let fullStart = start;
|
---|
813 | if (leadingTriviaCodePoints) {
|
---|
814 | while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
|
---|
815 | if (fullStart === start) {
|
---|
816 | start = start.clone();
|
---|
817 | }
|
---|
818 | start.advance();
|
---|
819 | }
|
---|
820 | }
|
---|
821 | const startLocation = this.locationFromCursor(start);
|
---|
822 | const endLocation = this.locationFromCursor(this);
|
---|
823 | const fullStartLocation = fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
|
---|
824 | return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
|
---|
825 | }
|
---|
826 | getChars(start) {
|
---|
827 | return this.input.substring(start.state.offset, this.state.offset);
|
---|
828 | }
|
---|
829 | charAt(pos) {
|
---|
830 | return this.input.charCodeAt(pos);
|
---|
831 | }
|
---|
832 | advanceState(state) {
|
---|
833 | if (state.offset >= this.end) {
|
---|
834 | this.state = state;
|
---|
835 | throw new CursorError('Unexpected character "EOF"', this);
|
---|
836 | }
|
---|
837 | const currentChar = this.charAt(state.offset);
|
---|
838 | if (currentChar === chars.$LF) {
|
---|
839 | state.line++;
|
---|
840 | state.column = 0;
|
---|
841 | }
|
---|
842 | else if (!chars.isNewLine(currentChar)) {
|
---|
843 | state.column++;
|
---|
844 | }
|
---|
845 | state.offset++;
|
---|
846 | this.updatePeek(state);
|
---|
847 | }
|
---|
848 | updatePeek(state) {
|
---|
849 | state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
|
---|
850 | }
|
---|
851 | locationFromCursor(cursor) {
|
---|
852 | return new ParseLocation(cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
|
---|
853 | }
|
---|
854 | }
|
---|
855 | class EscapedCharacterCursor extends PlainCharacterCursor {
|
---|
856 | constructor(fileOrCursor, range) {
|
---|
857 | if (fileOrCursor instanceof EscapedCharacterCursor) {
|
---|
858 | super(fileOrCursor);
|
---|
859 | this.internalState = Object.assign({}, fileOrCursor.internalState);
|
---|
860 | }
|
---|
861 | else {
|
---|
862 | super(fileOrCursor, range);
|
---|
863 | this.internalState = this.state;
|
---|
864 | }
|
---|
865 | }
|
---|
866 | advance() {
|
---|
867 | this.state = this.internalState;
|
---|
868 | super.advance();
|
---|
869 | this.processEscapeSequence();
|
---|
870 | }
|
---|
871 | init() {
|
---|
872 | super.init();
|
---|
873 | this.processEscapeSequence();
|
---|
874 | }
|
---|
875 | clone() {
|
---|
876 | return new EscapedCharacterCursor(this);
|
---|
877 | }
|
---|
878 | getChars(start) {
|
---|
879 | const cursor = start.clone();
|
---|
880 | let chars = '';
|
---|
881 | while (cursor.internalState.offset < this.internalState.offset) {
|
---|
882 | chars += String.fromCodePoint(cursor.peek());
|
---|
883 | cursor.advance();
|
---|
884 | }
|
---|
885 | return chars;
|
---|
886 | }
|
---|
887 | /**
|
---|
888 | * Process the escape sequence that starts at the current position in the text.
|
---|
889 | *
|
---|
890 | * This method is called to ensure that `peek` has the unescaped value of escape sequences.
|
---|
891 | */
|
---|
892 | processEscapeSequence() {
|
---|
893 | const peek = () => this.internalState.peek;
|
---|
894 | if (peek() === chars.$BACKSLASH) {
|
---|
895 | // We have hit an escape sequence so we need the internal state to become independent
|
---|
896 | // of the external state.
|
---|
897 | this.internalState = Object.assign({}, this.state);
|
---|
898 | // Move past the backslash
|
---|
899 | this.advanceState(this.internalState);
|
---|
900 | // First check for standard control char sequences
|
---|
901 | if (peek() === chars.$n) {
|
---|
902 | this.state.peek = chars.$LF;
|
---|
903 | }
|
---|
904 | else if (peek() === chars.$r) {
|
---|
905 | this.state.peek = chars.$CR;
|
---|
906 | }
|
---|
907 | else if (peek() === chars.$v) {
|
---|
908 | this.state.peek = chars.$VTAB;
|
---|
909 | }
|
---|
910 | else if (peek() === chars.$t) {
|
---|
911 | this.state.peek = chars.$TAB;
|
---|
912 | }
|
---|
913 | else if (peek() === chars.$b) {
|
---|
914 | this.state.peek = chars.$BSPACE;
|
---|
915 | }
|
---|
916 | else if (peek() === chars.$f) {
|
---|
917 | this.state.peek = chars.$FF;
|
---|
918 | }
|
---|
919 | // Now consider more complex sequences
|
---|
920 | else if (peek() === chars.$u) {
|
---|
921 | // Unicode code-point sequence
|
---|
922 | this.advanceState(this.internalState); // advance past the `u` char
|
---|
923 | if (peek() === chars.$LBRACE) {
|
---|
924 | // Variable length Unicode, e.g. `\x{123}`
|
---|
925 | this.advanceState(this.internalState); // advance past the `{` char
|
---|
926 | // Advance past the variable number of hex digits until we hit a `}` char
|
---|
927 | const digitStart = this.clone();
|
---|
928 | let length = 0;
|
---|
929 | while (peek() !== chars.$RBRACE) {
|
---|
930 | this.advanceState(this.internalState);
|
---|
931 | length++;
|
---|
932 | }
|
---|
933 | this.state.peek = this.decodeHexDigits(digitStart, length);
|
---|
934 | }
|
---|
935 | else {
|
---|
936 | // Fixed length Unicode, e.g. `\u1234`
|
---|
937 | const digitStart = this.clone();
|
---|
938 | this.advanceState(this.internalState);
|
---|
939 | this.advanceState(this.internalState);
|
---|
940 | this.advanceState(this.internalState);
|
---|
941 | this.state.peek = this.decodeHexDigits(digitStart, 4);
|
---|
942 | }
|
---|
943 | }
|
---|
944 | else if (peek() === chars.$x) {
|
---|
945 | // Hex char code, e.g. `\x2F`
|
---|
946 | this.advanceState(this.internalState); // advance past the `x` char
|
---|
947 | const digitStart = this.clone();
|
---|
948 | this.advanceState(this.internalState);
|
---|
949 | this.state.peek = this.decodeHexDigits(digitStart, 2);
|
---|
950 | }
|
---|
951 | else if (chars.isOctalDigit(peek())) {
|
---|
952 | // Octal char code, e.g. `\012`,
|
---|
953 | let octal = '';
|
---|
954 | let length = 0;
|
---|
955 | let previous = this.clone();
|
---|
956 | while (chars.isOctalDigit(peek()) && length < 3) {
|
---|
957 | previous = this.clone();
|
---|
958 | octal += String.fromCodePoint(peek());
|
---|
959 | this.advanceState(this.internalState);
|
---|
960 | length++;
|
---|
961 | }
|
---|
962 | this.state.peek = parseInt(octal, 8);
|
---|
963 | // Backup one char
|
---|
964 | this.internalState = previous.internalState;
|
---|
965 | }
|
---|
966 | else if (chars.isNewLine(this.internalState.peek)) {
|
---|
967 | // Line continuation `\` followed by a new line
|
---|
968 | this.advanceState(this.internalState); // advance over the newline
|
---|
969 | this.state = this.internalState;
|
---|
970 | }
|
---|
971 | else {
|
---|
972 | // If none of the `if` blocks were executed then we just have an escaped normal character.
|
---|
973 | // In that case we just, effectively, skip the backslash from the character.
|
---|
974 | this.state.peek = this.internalState.peek;
|
---|
975 | }
|
---|
976 | }
|
---|
977 | }
|
---|
978 | decodeHexDigits(start, length) {
|
---|
979 | const hex = this.input.substr(start.internalState.offset, length);
|
---|
980 | const charCode = parseInt(hex, 16);
|
---|
981 | if (!isNaN(charCode)) {
|
---|
982 | return charCode;
|
---|
983 | }
|
---|
984 | else {
|
---|
985 | start.state = start.internalState;
|
---|
986 | throw new CursorError('Invalid hexadecimal escape sequence', start);
|
---|
987 | }
|
---|
988 | }
|
---|
989 | }
|
---|
990 | export class CursorError {
|
---|
991 | constructor(msg, cursor) {
|
---|
992 | this.msg = msg;
|
---|
993 | this.cursor = cursor;
|
---|
994 | }
|
---|
995 | }
|
---|
996 | //# sourceMappingURL=data:application/json;base64, |
---|