1 | /**
|
---|
2 | * @license
|
---|
3 | * Copyright Google LLC All Rights Reserved.
|
---|
4 | *
|
---|
5 | * Use of this source code is governed by an MIT-style license that can be
|
---|
6 | * found in the LICENSE file at https://angular.io/license
|
---|
7 | */
|
---|
8 | import { ParseError, ParseSourceSpan } from '../parse_util';
|
---|
9 | import * as html from './ast';
|
---|
10 | import { NAMED_ENTITIES } from './entities';
|
---|
11 | import { tokenize } from './lexer';
|
---|
12 | import { getNsPrefix, mergeNsAndName, splitNsName } from './tags';
|
---|
13 | export class TreeError extends ParseError {
|
---|
14 | constructor(elementName, span, msg) {
|
---|
15 | super(span, msg);
|
---|
16 | this.elementName = elementName;
|
---|
17 | }
|
---|
18 | static create(elementName, span, msg) {
|
---|
19 | return new TreeError(elementName, span, msg);
|
---|
20 | }
|
---|
21 | }
|
---|
22 | export class ParseTreeResult {
|
---|
23 | constructor(rootNodes, errors) {
|
---|
24 | this.rootNodes = rootNodes;
|
---|
25 | this.errors = errors;
|
---|
26 | }
|
---|
27 | }
|
---|
28 | export class Parser {
|
---|
29 | constructor(getTagDefinition) {
|
---|
30 | this.getTagDefinition = getTagDefinition;
|
---|
31 | }
|
---|
32 | parse(source, url, options) {
|
---|
33 | const tokenizeResult = tokenize(source, url, this.getTagDefinition, options);
|
---|
34 | const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition);
|
---|
35 | parser.build();
|
---|
36 | return new ParseTreeResult(parser.rootNodes, tokenizeResult.errors.concat(parser.errors));
|
---|
37 | }
|
---|
38 | }
|
---|
39 | class _TreeBuilder {
|
---|
40 | constructor(tokens, getTagDefinition) {
|
---|
41 | this.tokens = tokens;
|
---|
42 | this.getTagDefinition = getTagDefinition;
|
---|
43 | this._index = -1;
|
---|
44 | this._elementStack = [];
|
---|
45 | this.rootNodes = [];
|
---|
46 | this.errors = [];
|
---|
47 | this._advance();
|
---|
48 | }
|
---|
49 | build() {
|
---|
50 | while (this._peek.type !== 24 /* EOF */) {
|
---|
51 | if (this._peek.type === 0 /* TAG_OPEN_START */ ||
|
---|
52 | this._peek.type === 4 /* INCOMPLETE_TAG_OPEN */) {
|
---|
53 | this._consumeStartTag(this._advance());
|
---|
54 | }
|
---|
55 | else if (this._peek.type === 3 /* TAG_CLOSE */) {
|
---|
56 | this._consumeEndTag(this._advance());
|
---|
57 | }
|
---|
58 | else if (this._peek.type === 12 /* CDATA_START */) {
|
---|
59 | this._closeVoidElement();
|
---|
60 | this._consumeCdata(this._advance());
|
---|
61 | }
|
---|
62 | else if (this._peek.type === 10 /* COMMENT_START */) {
|
---|
63 | this._closeVoidElement();
|
---|
64 | this._consumeComment(this._advance());
|
---|
65 | }
|
---|
66 | else if (this._peek.type === 5 /* TEXT */ || this._peek.type === 7 /* RAW_TEXT */ ||
|
---|
67 | this._peek.type === 6 /* ESCAPABLE_RAW_TEXT */) {
|
---|
68 | this._closeVoidElement();
|
---|
69 | this._consumeText(this._advance());
|
---|
70 | }
|
---|
71 | else if (this._peek.type === 19 /* EXPANSION_FORM_START */) {
|
---|
72 | this._consumeExpansion(this._advance());
|
---|
73 | }
|
---|
74 | else {
|
---|
75 | // Skip all other tokens...
|
---|
76 | this._advance();
|
---|
77 | }
|
---|
78 | }
|
---|
79 | }
|
---|
80 | _advance() {
|
---|
81 | const prev = this._peek;
|
---|
82 | if (this._index < this.tokens.length - 1) {
|
---|
83 | // Note: there is always an EOF token at the end
|
---|
84 | this._index++;
|
---|
85 | }
|
---|
86 | this._peek = this.tokens[this._index];
|
---|
87 | return prev;
|
---|
88 | }
|
---|
89 | _advanceIf(type) {
|
---|
90 | if (this._peek.type === type) {
|
---|
91 | return this._advance();
|
---|
92 | }
|
---|
93 | return null;
|
---|
94 | }
|
---|
95 | _consumeCdata(_startToken) {
|
---|
96 | this._consumeText(this._advance());
|
---|
97 | this._advanceIf(13 /* CDATA_END */);
|
---|
98 | }
|
---|
99 | _consumeComment(token) {
|
---|
100 | const text = this._advanceIf(7 /* RAW_TEXT */);
|
---|
101 | this._advanceIf(11 /* COMMENT_END */);
|
---|
102 | const value = text != null ? text.parts[0].trim() : null;
|
---|
103 | this._addToParent(new html.Comment(value, token.sourceSpan));
|
---|
104 | }
|
---|
105 | _consumeExpansion(token) {
|
---|
106 | const switchValue = this._advance();
|
---|
107 | const type = this._advance();
|
---|
108 | const cases = [];
|
---|
109 | // read =
|
---|
110 | while (this._peek.type === 20 /* EXPANSION_CASE_VALUE */) {
|
---|
111 | const expCase = this._parseExpansionCase();
|
---|
112 | if (!expCase)
|
---|
113 | return; // error
|
---|
114 | cases.push(expCase);
|
---|
115 | }
|
---|
116 | // read the final }
|
---|
117 | if (this._peek.type !== 23 /* EXPANSION_FORM_END */) {
|
---|
118 | this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`));
|
---|
119 | return;
|
---|
120 | }
|
---|
121 | const sourceSpan = new ParseSourceSpan(token.sourceSpan.start, this._peek.sourceSpan.end, token.sourceSpan.fullStart);
|
---|
122 | this._addToParent(new html.Expansion(switchValue.parts[0], type.parts[0], cases, sourceSpan, switchValue.sourceSpan));
|
---|
123 | this._advance();
|
---|
124 | }
|
---|
125 | _parseExpansionCase() {
|
---|
126 | const value = this._advance();
|
---|
127 | // read {
|
---|
128 | if (this._peek.type !== 21 /* EXPANSION_CASE_EXP_START */) {
|
---|
129 | this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`));
|
---|
130 | return null;
|
---|
131 | }
|
---|
132 | // read until }
|
---|
133 | const start = this._advance();
|
---|
134 | const exp = this._collectExpansionExpTokens(start);
|
---|
135 | if (!exp)
|
---|
136 | return null;
|
---|
137 | const end = this._advance();
|
---|
138 | exp.push({ type: 24 /* EOF */, parts: [], sourceSpan: end.sourceSpan });
|
---|
139 | // parse everything in between { and }
|
---|
140 | const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition);
|
---|
141 | expansionCaseParser.build();
|
---|
142 | if (expansionCaseParser.errors.length > 0) {
|
---|
143 | this.errors = this.errors.concat(expansionCaseParser.errors);
|
---|
144 | return null;
|
---|
145 | }
|
---|
146 | const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end, value.sourceSpan.fullStart);
|
---|
147 | const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end, start.sourceSpan.fullStart);
|
---|
148 | return new html.ExpansionCase(value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan);
|
---|
149 | }
|
---|
150 | _collectExpansionExpTokens(start) {
|
---|
151 | const exp = [];
|
---|
152 | const expansionFormStack = [21 /* EXPANSION_CASE_EXP_START */];
|
---|
153 | while (true) {
|
---|
154 | if (this._peek.type === 19 /* EXPANSION_FORM_START */ ||
|
---|
155 | this._peek.type === 21 /* EXPANSION_CASE_EXP_START */) {
|
---|
156 | expansionFormStack.push(this._peek.type);
|
---|
157 | }
|
---|
158 | if (this._peek.type === 22 /* EXPANSION_CASE_EXP_END */) {
|
---|
159 | if (lastOnStack(expansionFormStack, 21 /* EXPANSION_CASE_EXP_START */)) {
|
---|
160 | expansionFormStack.pop();
|
---|
161 | if (expansionFormStack.length === 0)
|
---|
162 | return exp;
|
---|
163 | }
|
---|
164 | else {
|
---|
165 | this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
|
---|
166 | return null;
|
---|
167 | }
|
---|
168 | }
|
---|
169 | if (this._peek.type === 23 /* EXPANSION_FORM_END */) {
|
---|
170 | if (lastOnStack(expansionFormStack, 19 /* EXPANSION_FORM_START */)) {
|
---|
171 | expansionFormStack.pop();
|
---|
172 | }
|
---|
173 | else {
|
---|
174 | this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
|
---|
175 | return null;
|
---|
176 | }
|
---|
177 | }
|
---|
178 | if (this._peek.type === 24 /* EOF */) {
|
---|
179 | this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
|
---|
180 | return null;
|
---|
181 | }
|
---|
182 | exp.push(this._advance());
|
---|
183 | }
|
---|
184 | }
|
---|
185 | _consumeText(token) {
|
---|
186 | const tokens = [token];
|
---|
187 | const startSpan = token.sourceSpan;
|
---|
188 | let text = token.parts[0];
|
---|
189 | if (text.length > 0 && text[0] === '\n') {
|
---|
190 | const parent = this._getParentElement();
|
---|
191 | if (parent != null && parent.children.length === 0 &&
|
---|
192 | this.getTagDefinition(parent.name).ignoreFirstLf) {
|
---|
193 | text = text.substring(1);
|
---|
194 | tokens[0] = { type: token.type, sourceSpan: token.sourceSpan, parts: [text] };
|
---|
195 | }
|
---|
196 | }
|
---|
197 | while (this._peek.type === 8 /* INTERPOLATION */ || this._peek.type === 5 /* TEXT */ ||
|
---|
198 | this._peek.type === 9 /* ENCODED_ENTITY */) {
|
---|
199 | token = this._advance();
|
---|
200 | tokens.push(token);
|
---|
201 | if (token.type === 8 /* INTERPOLATION */) {
|
---|
202 | // For backward compatibility we decode HTML entities that appear in interpolation
|
---|
203 | // expressions. This is arguably a bug, but it could be a considerable breaking change to
|
---|
204 | // fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
---|
205 | // chain after View Engine has been removed.
|
---|
206 | text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
---|
207 | }
|
---|
208 | else if (token.type === 9 /* ENCODED_ENTITY */) {
|
---|
209 | text += token.parts[0];
|
---|
210 | }
|
---|
211 | else {
|
---|
212 | text += token.parts.join('');
|
---|
213 | }
|
---|
214 | }
|
---|
215 | if (text.length > 0) {
|
---|
216 | const endSpan = token.sourceSpan;
|
---|
217 | this._addToParent(new html.Text(text, new ParseSourceSpan(startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details), tokens));
|
---|
218 | }
|
---|
219 | }
|
---|
220 | _closeVoidElement() {
|
---|
221 | const el = this._getParentElement();
|
---|
222 | if (el && this.getTagDefinition(el.name).isVoid) {
|
---|
223 | this._elementStack.pop();
|
---|
224 | }
|
---|
225 | }
|
---|
226 | _consumeStartTag(startTagToken) {
|
---|
227 | const [prefix, name] = startTagToken.parts;
|
---|
228 | const attrs = [];
|
---|
229 | while (this._peek.type === 14 /* ATTR_NAME */) {
|
---|
230 | attrs.push(this._consumeAttr(this._advance()));
|
---|
231 | }
|
---|
232 | const fullName = this._getElementFullName(prefix, name, this._getParentElement());
|
---|
233 | let selfClosing = false;
|
---|
234 | // Note: There could have been a tokenizer error
|
---|
235 | // so that we don't get a token for the end tag...
|
---|
236 | if (this._peek.type === 2 /* TAG_OPEN_END_VOID */) {
|
---|
237 | this._advance();
|
---|
238 | selfClosing = true;
|
---|
239 | const tagDef = this.getTagDefinition(fullName);
|
---|
240 | if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) {
|
---|
241 | this.errors.push(TreeError.create(fullName, startTagToken.sourceSpan, `Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`));
|
---|
242 | }
|
---|
243 | }
|
---|
244 | else if (this._peek.type === 1 /* TAG_OPEN_END */) {
|
---|
245 | this._advance();
|
---|
246 | selfClosing = false;
|
---|
247 | }
|
---|
248 | const end = this._peek.sourceSpan.fullStart;
|
---|
249 | const span = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
|
---|
250 | // Create a separate `startSpan` because `span` will be modified when there is an `end` span.
|
---|
251 | const startSpan = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
|
---|
252 | const el = new html.Element(fullName, attrs, [], span, startSpan, undefined);
|
---|
253 | this._pushElement(el);
|
---|
254 | if (selfClosing) {
|
---|
255 | // Elements that are self-closed have their `endSourceSpan` set to the full span, as the
|
---|
256 | // element start tag also represents the end tag.
|
---|
257 | this._popElement(fullName, span);
|
---|
258 | }
|
---|
259 | else if (startTagToken.type === 4 /* INCOMPLETE_TAG_OPEN */) {
|
---|
260 | // We already know the opening tag is not complete, so it is unlikely it has a corresponding
|
---|
261 | // close tag. Let's optimistically parse it as a full element and emit an error.
|
---|
262 | this._popElement(fullName, null);
|
---|
263 | this.errors.push(TreeError.create(fullName, span, `Opening tag "${fullName}" not terminated.`));
|
---|
264 | }
|
---|
265 | }
|
---|
266 | _pushElement(el) {
|
---|
267 | const parentEl = this._getParentElement();
|
---|
268 | if (parentEl && this.getTagDefinition(parentEl.name).isClosedByChild(el.name)) {
|
---|
269 | this._elementStack.pop();
|
---|
270 | }
|
---|
271 | this._addToParent(el);
|
---|
272 | this._elementStack.push(el);
|
---|
273 | }
|
---|
274 | _consumeEndTag(endTagToken) {
|
---|
275 | const fullName = this._getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
|
---|
276 | if (this.getTagDefinition(fullName).isVoid) {
|
---|
277 | this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, `Void elements do not have end tags "${endTagToken.parts[1]}"`));
|
---|
278 | }
|
---|
279 | else if (!this._popElement(fullName, endTagToken.sourceSpan)) {
|
---|
280 | const errMsg = `Unexpected closing tag "${fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`;
|
---|
281 | this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg));
|
---|
282 | }
|
---|
283 | }
|
---|
284 | /**
|
---|
285 | * Closes the nearest element with the tag name `fullName` in the parse tree.
|
---|
286 | * `endSourceSpan` is the span of the closing tag, or null if the element does
|
---|
287 | * not have a closing tag (for example, this happens when an incomplete
|
---|
288 | * opening tag is recovered).
|
---|
289 | */
|
---|
290 | _popElement(fullName, endSourceSpan) {
|
---|
291 | let unexpectedCloseTagDetected = false;
|
---|
292 | for (let stackIndex = this._elementStack.length - 1; stackIndex >= 0; stackIndex--) {
|
---|
293 | const el = this._elementStack[stackIndex];
|
---|
294 | if (el.name === fullName) {
|
---|
295 | // Record the parse span with the element that is being closed. Any elements that are
|
---|
296 | // removed from the element stack at this point are closed implicitly, so they won't get
|
---|
297 | // an end source span (as there is no explicit closing element).
|
---|
298 | el.endSourceSpan = endSourceSpan;
|
---|
299 | el.sourceSpan.end = endSourceSpan !== null ? endSourceSpan.end : el.sourceSpan.end;
|
---|
300 | this._elementStack.splice(stackIndex, this._elementStack.length - stackIndex);
|
---|
301 | return !unexpectedCloseTagDetected;
|
---|
302 | }
|
---|
303 | if (!this.getTagDefinition(el.name).closedByParent) {
|
---|
304 | // Note that we encountered an unexpected close tag but continue processing the element
|
---|
305 | // stack so we can assign an `endSourceSpan` if there is a corresponding start tag for this
|
---|
306 | // end tag in the stack.
|
---|
307 | unexpectedCloseTagDetected = true;
|
---|
308 | }
|
---|
309 | }
|
---|
310 | return false;
|
---|
311 | }
|
---|
312 | _consumeAttr(attrName) {
|
---|
313 | const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
|
---|
314 | let attrEnd = attrName.sourceSpan.end;
|
---|
315 | // Consume any quote
|
---|
316 | if (this._peek.type === 15 /* ATTR_QUOTE */) {
|
---|
317 | this._advance();
|
---|
318 | }
|
---|
319 | // Consume the attribute value
|
---|
320 | let value = '';
|
---|
321 | const valueTokens = [];
|
---|
322 | let valueStartSpan = undefined;
|
---|
323 | let valueEnd = undefined;
|
---|
324 | // NOTE: We need to use a new variable `nextTokenType` here to hide the actual type of
|
---|
325 | // `_peek.type` from TS. Otherwise TS will narrow the type of `_peek.type` preventing it from
|
---|
326 | // being able to consider `ATTR_VALUE_INTERPOLATION` as an option. This is because TS is not
|
---|
327 | // able to see that `_advance()` will actually mutate `_peek`.
|
---|
328 | const nextTokenType = this._peek.type;
|
---|
329 | if (nextTokenType === 16 /* ATTR_VALUE_TEXT */) {
|
---|
330 | valueStartSpan = this._peek.sourceSpan;
|
---|
331 | valueEnd = this._peek.sourceSpan.end;
|
---|
332 | while (this._peek.type === 16 /* ATTR_VALUE_TEXT */ ||
|
---|
333 | this._peek.type === 17 /* ATTR_VALUE_INTERPOLATION */ ||
|
---|
334 | this._peek.type === 9 /* ENCODED_ENTITY */) {
|
---|
335 | const valueToken = this._advance();
|
---|
336 | valueTokens.push(valueToken);
|
---|
337 | if (valueToken.type === 17 /* ATTR_VALUE_INTERPOLATION */) {
|
---|
338 | // For backward compatibility we decode HTML entities that appear in interpolation
|
---|
339 | // expressions. This is arguably a bug, but it could be a considerable breaking change to
|
---|
340 | // fix it. It should be addressed in a larger project to refactor the entire parser/lexer
|
---|
341 | // chain after View Engine has been removed.
|
---|
342 | value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
|
---|
343 | }
|
---|
344 | else if (valueToken.type === 9 /* ENCODED_ENTITY */) {
|
---|
345 | value += valueToken.parts[0];
|
---|
346 | }
|
---|
347 | else {
|
---|
348 | value += valueToken.parts.join('');
|
---|
349 | }
|
---|
350 | valueEnd = attrEnd = valueToken.sourceSpan.end;
|
---|
351 | }
|
---|
352 | }
|
---|
353 | // Consume any quote
|
---|
354 | if (this._peek.type === 15 /* ATTR_QUOTE */) {
|
---|
355 | const quoteToken = this._advance();
|
---|
356 | attrEnd = quoteToken.sourceSpan.end;
|
---|
357 | }
|
---|
358 | const valueSpan = valueStartSpan && valueEnd &&
|
---|
359 | new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart);
|
---|
360 | return new html.Attribute(fullName, value, new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart), attrName.sourceSpan, valueSpan, valueTokens.length > 0 ? valueTokens : undefined, undefined);
|
---|
361 | }
|
---|
362 | _getParentElement() {
|
---|
363 | return this._elementStack.length > 0 ? this._elementStack[this._elementStack.length - 1] : null;
|
---|
364 | }
|
---|
365 | _addToParent(node) {
|
---|
366 | const parent = this._getParentElement();
|
---|
367 | if (parent != null) {
|
---|
368 | parent.children.push(node);
|
---|
369 | }
|
---|
370 | else {
|
---|
371 | this.rootNodes.push(node);
|
---|
372 | }
|
---|
373 | }
|
---|
374 | _getElementFullName(prefix, localName, parentElement) {
|
---|
375 | if (prefix === '') {
|
---|
376 | prefix = this.getTagDefinition(localName).implicitNamespacePrefix || '';
|
---|
377 | if (prefix === '' && parentElement != null) {
|
---|
378 | const parentTagName = splitNsName(parentElement.name)[1];
|
---|
379 | const parentTagDefinition = this.getTagDefinition(parentTagName);
|
---|
380 | if (!parentTagDefinition.preventNamespaceInheritance) {
|
---|
381 | prefix = getNsPrefix(parentElement.name);
|
---|
382 | }
|
---|
383 | }
|
---|
384 | }
|
---|
385 | return mergeNsAndName(prefix, localName);
|
---|
386 | }
|
---|
387 | }
|
---|
388 | function lastOnStack(stack, element) {
|
---|
389 | return stack.length > 0 && stack[stack.length - 1] === element;
|
---|
390 | }
|
---|
391 | /**
|
---|
392 | * Decode the `entity` string, which we believe is the contents of an HTML entity.
|
---|
393 | *
|
---|
394 | * If the string is not actually a valid/known entity then just return the original `match` string.
|
---|
395 | */
|
---|
396 | function decodeEntity(match, entity) {
|
---|
397 | if (NAMED_ENTITIES[entity] !== undefined) {
|
---|
398 | return NAMED_ENTITIES[entity] || match;
|
---|
399 | }
|
---|
400 | if (/^#x[a-f0-9]+$/i.test(entity)) {
|
---|
401 | return String.fromCodePoint(parseInt(entity.slice(2), 16));
|
---|
402 | }
|
---|
403 | if (/^#\d+$/.test(entity)) {
|
---|
404 | return String.fromCodePoint(parseInt(entity.slice(1), 10));
|
---|
405 | }
|
---|
406 | return match;
|
---|
407 | }
|
---|
408 | //# sourceMappingURL=data:application/json;base64, |
---|