1 | 'use strict';
|
---|
2 |
|
---|
3 | const Preprocessor = require('./preprocessor');
|
---|
4 | const unicode = require('../common/unicode');
|
---|
5 | const neTree = require('./named-entity-data');
|
---|
6 | const ERR = require('../common/error-codes');
|
---|
7 |
|
---|
8 | //Aliases
|
---|
9 | const $ = unicode.CODE_POINTS;
|
---|
10 | const $$ = unicode.CODE_POINT_SEQUENCES;
|
---|
11 |
|
---|
12 | //C1 Unicode control character reference replacements
|
---|
13 | const C1_CONTROLS_REFERENCE_REPLACEMENTS = {
|
---|
14 | 0x80: 0x20ac,
|
---|
15 | 0x82: 0x201a,
|
---|
16 | 0x83: 0x0192,
|
---|
17 | 0x84: 0x201e,
|
---|
18 | 0x85: 0x2026,
|
---|
19 | 0x86: 0x2020,
|
---|
20 | 0x87: 0x2021,
|
---|
21 | 0x88: 0x02c6,
|
---|
22 | 0x89: 0x2030,
|
---|
23 | 0x8a: 0x0160,
|
---|
24 | 0x8b: 0x2039,
|
---|
25 | 0x8c: 0x0152,
|
---|
26 | 0x8e: 0x017d,
|
---|
27 | 0x91: 0x2018,
|
---|
28 | 0x92: 0x2019,
|
---|
29 | 0x93: 0x201c,
|
---|
30 | 0x94: 0x201d,
|
---|
31 | 0x95: 0x2022,
|
---|
32 | 0x96: 0x2013,
|
---|
33 | 0x97: 0x2014,
|
---|
34 | 0x98: 0x02dc,
|
---|
35 | 0x99: 0x2122,
|
---|
36 | 0x9a: 0x0161,
|
---|
37 | 0x9b: 0x203a,
|
---|
38 | 0x9c: 0x0153,
|
---|
39 | 0x9e: 0x017e,
|
---|
40 | 0x9f: 0x0178
|
---|
41 | };
|
---|
42 |
|
---|
43 | // Named entity tree flags
|
---|
44 | const HAS_DATA_FLAG = 1 << 0;
|
---|
45 | const DATA_DUPLET_FLAG = 1 << 1;
|
---|
46 | const HAS_BRANCHES_FLAG = 1 << 2;
|
---|
47 | const MAX_BRANCH_MARKER_VALUE = HAS_DATA_FLAG | DATA_DUPLET_FLAG | HAS_BRANCHES_FLAG;
|
---|
48 |
|
---|
49 | //States
|
---|
50 | const DATA_STATE = 'DATA_STATE';
|
---|
51 | const RCDATA_STATE = 'RCDATA_STATE';
|
---|
52 | const RAWTEXT_STATE = 'RAWTEXT_STATE';
|
---|
53 | const SCRIPT_DATA_STATE = 'SCRIPT_DATA_STATE';
|
---|
54 | const PLAINTEXT_STATE = 'PLAINTEXT_STATE';
|
---|
55 | const TAG_OPEN_STATE = 'TAG_OPEN_STATE';
|
---|
56 | const END_TAG_OPEN_STATE = 'END_TAG_OPEN_STATE';
|
---|
57 | const TAG_NAME_STATE = 'TAG_NAME_STATE';
|
---|
58 | const RCDATA_LESS_THAN_SIGN_STATE = 'RCDATA_LESS_THAN_SIGN_STATE';
|
---|
59 | const RCDATA_END_TAG_OPEN_STATE = 'RCDATA_END_TAG_OPEN_STATE';
|
---|
60 | const RCDATA_END_TAG_NAME_STATE = 'RCDATA_END_TAG_NAME_STATE';
|
---|
61 | const RAWTEXT_LESS_THAN_SIGN_STATE = 'RAWTEXT_LESS_THAN_SIGN_STATE';
|
---|
62 | const RAWTEXT_END_TAG_OPEN_STATE = 'RAWTEXT_END_TAG_OPEN_STATE';
|
---|
63 | const RAWTEXT_END_TAG_NAME_STATE = 'RAWTEXT_END_TAG_NAME_STATE';
|
---|
64 | const SCRIPT_DATA_LESS_THAN_SIGN_STATE = 'SCRIPT_DATA_LESS_THAN_SIGN_STATE';
|
---|
65 | const SCRIPT_DATA_END_TAG_OPEN_STATE = 'SCRIPT_DATA_END_TAG_OPEN_STATE';
|
---|
66 | const SCRIPT_DATA_END_TAG_NAME_STATE = 'SCRIPT_DATA_END_TAG_NAME_STATE';
|
---|
67 | const SCRIPT_DATA_ESCAPE_START_STATE = 'SCRIPT_DATA_ESCAPE_START_STATE';
|
---|
68 | const SCRIPT_DATA_ESCAPE_START_DASH_STATE = 'SCRIPT_DATA_ESCAPE_START_DASH_STATE';
|
---|
69 | const SCRIPT_DATA_ESCAPED_STATE = 'SCRIPT_DATA_ESCAPED_STATE';
|
---|
70 | const SCRIPT_DATA_ESCAPED_DASH_STATE = 'SCRIPT_DATA_ESCAPED_DASH_STATE';
|
---|
71 | const SCRIPT_DATA_ESCAPED_DASH_DASH_STATE = 'SCRIPT_DATA_ESCAPED_DASH_DASH_STATE';
|
---|
72 | const SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE = 'SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE';
|
---|
73 | const SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE = 'SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE';
|
---|
74 | const SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE = 'SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE';
|
---|
75 | const SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE';
|
---|
76 | const SCRIPT_DATA_DOUBLE_ESCAPED_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPED_STATE';
|
---|
77 | const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE';
|
---|
78 | const SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE';
|
---|
79 | const SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE';
|
---|
80 | const SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE = 'SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE';
|
---|
81 | const BEFORE_ATTRIBUTE_NAME_STATE = 'BEFORE_ATTRIBUTE_NAME_STATE';
|
---|
82 | const ATTRIBUTE_NAME_STATE = 'ATTRIBUTE_NAME_STATE';
|
---|
83 | const AFTER_ATTRIBUTE_NAME_STATE = 'AFTER_ATTRIBUTE_NAME_STATE';
|
---|
84 | const BEFORE_ATTRIBUTE_VALUE_STATE = 'BEFORE_ATTRIBUTE_VALUE_STATE';
|
---|
85 | const ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE = 'ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE';
|
---|
86 | const ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE = 'ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE';
|
---|
87 | const ATTRIBUTE_VALUE_UNQUOTED_STATE = 'ATTRIBUTE_VALUE_UNQUOTED_STATE';
|
---|
88 | const AFTER_ATTRIBUTE_VALUE_QUOTED_STATE = 'AFTER_ATTRIBUTE_VALUE_QUOTED_STATE';
|
---|
89 | const SELF_CLOSING_START_TAG_STATE = 'SELF_CLOSING_START_TAG_STATE';
|
---|
90 | const BOGUS_COMMENT_STATE = 'BOGUS_COMMENT_STATE';
|
---|
91 | const MARKUP_DECLARATION_OPEN_STATE = 'MARKUP_DECLARATION_OPEN_STATE';
|
---|
92 | const COMMENT_START_STATE = 'COMMENT_START_STATE';
|
---|
93 | const COMMENT_START_DASH_STATE = 'COMMENT_START_DASH_STATE';
|
---|
94 | const COMMENT_STATE = 'COMMENT_STATE';
|
---|
95 | const COMMENT_LESS_THAN_SIGN_STATE = 'COMMENT_LESS_THAN_SIGN_STATE';
|
---|
96 | const COMMENT_LESS_THAN_SIGN_BANG_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_STATE';
|
---|
97 | const COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE';
|
---|
98 | const COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE';
|
---|
99 | const COMMENT_END_DASH_STATE = 'COMMENT_END_DASH_STATE';
|
---|
100 | const COMMENT_END_STATE = 'COMMENT_END_STATE';
|
---|
101 | const COMMENT_END_BANG_STATE = 'COMMENT_END_BANG_STATE';
|
---|
102 | const DOCTYPE_STATE = 'DOCTYPE_STATE';
|
---|
103 | const BEFORE_DOCTYPE_NAME_STATE = 'BEFORE_DOCTYPE_NAME_STATE';
|
---|
104 | const DOCTYPE_NAME_STATE = 'DOCTYPE_NAME_STATE';
|
---|
105 | const AFTER_DOCTYPE_NAME_STATE = 'AFTER_DOCTYPE_NAME_STATE';
|
---|
106 | const AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE = 'AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE';
|
---|
107 | const BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 'BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE';
|
---|
108 | const DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE = 'DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE';
|
---|
109 | const DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE = 'DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE';
|
---|
110 | const AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE = 'AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE';
|
---|
111 | const BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE = 'BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE';
|
---|
112 | const AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE = 'AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE';
|
---|
113 | const BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 'BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE';
|
---|
114 | const DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE = 'DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE';
|
---|
115 | const DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE = 'DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE';
|
---|
116 | const AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE = 'AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE';
|
---|
117 | const BOGUS_DOCTYPE_STATE = 'BOGUS_DOCTYPE_STATE';
|
---|
118 | const CDATA_SECTION_STATE = 'CDATA_SECTION_STATE';
|
---|
119 | const CDATA_SECTION_BRACKET_STATE = 'CDATA_SECTION_BRACKET_STATE';
|
---|
120 | const CDATA_SECTION_END_STATE = 'CDATA_SECTION_END_STATE';
|
---|
121 | const CHARACTER_REFERENCE_STATE = 'CHARACTER_REFERENCE_STATE';
|
---|
122 | const NAMED_CHARACTER_REFERENCE_STATE = 'NAMED_CHARACTER_REFERENCE_STATE';
|
---|
123 | const AMBIGUOUS_AMPERSAND_STATE = 'AMBIGUOS_AMPERSAND_STATE';
|
---|
124 | const NUMERIC_CHARACTER_REFERENCE_STATE = 'NUMERIC_CHARACTER_REFERENCE_STATE';
|
---|
125 | const HEXADEMICAL_CHARACTER_REFERENCE_START_STATE = 'HEXADEMICAL_CHARACTER_REFERENCE_START_STATE';
|
---|
126 | const DECIMAL_CHARACTER_REFERENCE_START_STATE = 'DECIMAL_CHARACTER_REFERENCE_START_STATE';
|
---|
127 | const HEXADEMICAL_CHARACTER_REFERENCE_STATE = 'HEXADEMICAL_CHARACTER_REFERENCE_STATE';
|
---|
128 | const DECIMAL_CHARACTER_REFERENCE_STATE = 'DECIMAL_CHARACTER_REFERENCE_STATE';
|
---|
129 | const NUMERIC_CHARACTER_REFERENCE_END_STATE = 'NUMERIC_CHARACTER_REFERENCE_END_STATE';
|
---|
130 |
|
---|
131 | //Utils
|
---|
132 |
|
---|
133 | //OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
|
---|
134 | //this functions if they will be situated in another module due to context switch.
|
---|
135 | //Always perform inlining check before modifying this functions ('node --trace-inlining').
|
---|
136 | function isWhitespace(cp) {
|
---|
137 | return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED;
|
---|
138 | }
|
---|
139 |
|
---|
140 | function isAsciiDigit(cp) {
|
---|
141 | return cp >= $.DIGIT_0 && cp <= $.DIGIT_9;
|
---|
142 | }
|
---|
143 |
|
---|
144 | function isAsciiUpper(cp) {
|
---|
145 | return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z;
|
---|
146 | }
|
---|
147 |
|
---|
148 | function isAsciiLower(cp) {
|
---|
149 | return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
|
---|
150 | }
|
---|
151 |
|
---|
152 | function isAsciiLetter(cp) {
|
---|
153 | return isAsciiLower(cp) || isAsciiUpper(cp);
|
---|
154 | }
|
---|
155 |
|
---|
156 | function isAsciiAlphaNumeric(cp) {
|
---|
157 | return isAsciiLetter(cp) || isAsciiDigit(cp);
|
---|
158 | }
|
---|
159 |
|
---|
160 | function isAsciiUpperHexDigit(cp) {
|
---|
161 | return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_F;
|
---|
162 | }
|
---|
163 |
|
---|
164 | function isAsciiLowerHexDigit(cp) {
|
---|
165 | return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_F;
|
---|
166 | }
|
---|
167 |
|
---|
168 | function isAsciiHexDigit(cp) {
|
---|
169 | return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
|
---|
170 | }
|
---|
171 |
|
---|
172 | function toAsciiLowerCodePoint(cp) {
|
---|
173 | return cp + 0x0020;
|
---|
174 | }
|
---|
175 |
|
---|
176 | //NOTE: String.fromCharCode() function can handle only characters from BMP subset.
|
---|
177 | //So, we need to workaround this manually.
|
---|
178 | //(see: https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/fromCharCode#Getting_it_to_work_with_higher_values)
|
---|
179 | function toChar(cp) {
|
---|
180 | if (cp <= 0xffff) {
|
---|
181 | return String.fromCharCode(cp);
|
---|
182 | }
|
---|
183 |
|
---|
184 | cp -= 0x10000;
|
---|
185 | return String.fromCharCode(((cp >>> 10) & 0x3ff) | 0xd800) + String.fromCharCode(0xdc00 | (cp & 0x3ff));
|
---|
186 | }
|
---|
187 |
|
---|
188 | function toAsciiLowerChar(cp) {
|
---|
189 | return String.fromCharCode(toAsciiLowerCodePoint(cp));
|
---|
190 | }
|
---|
191 |
|
---|
192 | function findNamedEntityTreeBranch(nodeIx, cp) {
|
---|
193 | const branchCount = neTree[++nodeIx];
|
---|
194 | let lo = ++nodeIx;
|
---|
195 | let hi = lo + branchCount - 1;
|
---|
196 |
|
---|
197 | while (lo <= hi) {
|
---|
198 | const mid = (lo + hi) >>> 1;
|
---|
199 | const midCp = neTree[mid];
|
---|
200 |
|
---|
201 | if (midCp < cp) {
|
---|
202 | lo = mid + 1;
|
---|
203 | } else if (midCp > cp) {
|
---|
204 | hi = mid - 1;
|
---|
205 | } else {
|
---|
206 | return neTree[mid + branchCount];
|
---|
207 | }
|
---|
208 | }
|
---|
209 |
|
---|
210 | return -1;
|
---|
211 | }
|
---|
212 |
|
---|
213 | //Tokenizer
|
---|
214 | class Tokenizer {
|
---|
215 | constructor() {
|
---|
216 | this.preprocessor = new Preprocessor();
|
---|
217 |
|
---|
218 | this.tokenQueue = [];
|
---|
219 |
|
---|
220 | this.allowCDATA = false;
|
---|
221 |
|
---|
222 | this.state = DATA_STATE;
|
---|
223 | this.returnState = '';
|
---|
224 |
|
---|
225 | this.charRefCode = -1;
|
---|
226 | this.tempBuff = [];
|
---|
227 | this.lastStartTagName = '';
|
---|
228 |
|
---|
229 | this.consumedAfterSnapshot = -1;
|
---|
230 | this.active = false;
|
---|
231 |
|
---|
232 | this.currentCharacterToken = null;
|
---|
233 | this.currentToken = null;
|
---|
234 | this.currentAttr = null;
|
---|
235 | }
|
---|
236 |
|
---|
237 | //Errors
|
---|
238 | _err() {
|
---|
239 | // NOTE: err reporting is noop by default. Enabled by mixin.
|
---|
240 | }
|
---|
241 |
|
---|
242 | _errOnNextCodePoint(err) {
|
---|
243 | this._consume();
|
---|
244 | this._err(err);
|
---|
245 | this._unconsume();
|
---|
246 | }
|
---|
247 |
|
---|
248 | //API
|
---|
249 | getNextToken() {
|
---|
250 | while (!this.tokenQueue.length && this.active) {
|
---|
251 | this.consumedAfterSnapshot = 0;
|
---|
252 |
|
---|
253 | const cp = this._consume();
|
---|
254 |
|
---|
255 | if (!this._ensureHibernation()) {
|
---|
256 | this[this.state](cp);
|
---|
257 | }
|
---|
258 | }
|
---|
259 |
|
---|
260 | return this.tokenQueue.shift();
|
---|
261 | }
|
---|
262 |
|
---|
263 | write(chunk, isLastChunk) {
|
---|
264 | this.active = true;
|
---|
265 | this.preprocessor.write(chunk, isLastChunk);
|
---|
266 | }
|
---|
267 |
|
---|
268 | insertHtmlAtCurrentPos(chunk) {
|
---|
269 | this.active = true;
|
---|
270 | this.preprocessor.insertHtmlAtCurrentPos(chunk);
|
---|
271 | }
|
---|
272 |
|
---|
273 | //Hibernation
|
---|
274 | _ensureHibernation() {
|
---|
275 | if (this.preprocessor.endOfChunkHit) {
|
---|
276 | for (; this.consumedAfterSnapshot > 0; this.consumedAfterSnapshot--) {
|
---|
277 | this.preprocessor.retreat();
|
---|
278 | }
|
---|
279 |
|
---|
280 | this.active = false;
|
---|
281 | this.tokenQueue.push({ type: Tokenizer.HIBERNATION_TOKEN });
|
---|
282 |
|
---|
283 | return true;
|
---|
284 | }
|
---|
285 |
|
---|
286 | return false;
|
---|
287 | }
|
---|
288 |
|
---|
289 | //Consumption
|
---|
290 | _consume() {
|
---|
291 | this.consumedAfterSnapshot++;
|
---|
292 | return this.preprocessor.advance();
|
---|
293 | }
|
---|
294 |
|
---|
295 | _unconsume() {
|
---|
296 | this.consumedAfterSnapshot--;
|
---|
297 | this.preprocessor.retreat();
|
---|
298 | }
|
---|
299 |
|
---|
300 | _reconsumeInState(state) {
|
---|
301 | this.state = state;
|
---|
302 | this._unconsume();
|
---|
303 | }
|
---|
304 |
|
---|
305 | _consumeSequenceIfMatch(pattern, startCp, caseSensitive) {
|
---|
306 | let consumedCount = 0;
|
---|
307 | let isMatch = true;
|
---|
308 | const patternLength = pattern.length;
|
---|
309 | let patternPos = 0;
|
---|
310 | let cp = startCp;
|
---|
311 | let patternCp = void 0;
|
---|
312 |
|
---|
313 | for (; patternPos < patternLength; patternPos++) {
|
---|
314 | if (patternPos > 0) {
|
---|
315 | cp = this._consume();
|
---|
316 | consumedCount++;
|
---|
317 | }
|
---|
318 |
|
---|
319 | if (cp === $.EOF) {
|
---|
320 | isMatch = false;
|
---|
321 | break;
|
---|
322 | }
|
---|
323 |
|
---|
324 | patternCp = pattern[patternPos];
|
---|
325 |
|
---|
326 | if (cp !== patternCp && (caseSensitive || cp !== toAsciiLowerCodePoint(patternCp))) {
|
---|
327 | isMatch = false;
|
---|
328 | break;
|
---|
329 | }
|
---|
330 | }
|
---|
331 |
|
---|
332 | if (!isMatch) {
|
---|
333 | while (consumedCount--) {
|
---|
334 | this._unconsume();
|
---|
335 | }
|
---|
336 | }
|
---|
337 |
|
---|
338 | return isMatch;
|
---|
339 | }
|
---|
340 |
|
---|
341 | //Temp buffer
|
---|
342 | _isTempBufferEqualToScriptString() {
|
---|
343 | if (this.tempBuff.length !== $$.SCRIPT_STRING.length) {
|
---|
344 | return false;
|
---|
345 | }
|
---|
346 |
|
---|
347 | for (let i = 0; i < this.tempBuff.length; i++) {
|
---|
348 | if (this.tempBuff[i] !== $$.SCRIPT_STRING[i]) {
|
---|
349 | return false;
|
---|
350 | }
|
---|
351 | }
|
---|
352 |
|
---|
353 | return true;
|
---|
354 | }
|
---|
355 |
|
---|
356 | //Token creation
|
---|
357 | _createStartTagToken() {
|
---|
358 | this.currentToken = {
|
---|
359 | type: Tokenizer.START_TAG_TOKEN,
|
---|
360 | tagName: '',
|
---|
361 | selfClosing: false,
|
---|
362 | ackSelfClosing: false,
|
---|
363 | attrs: []
|
---|
364 | };
|
---|
365 | }
|
---|
366 |
|
---|
367 | _createEndTagToken() {
|
---|
368 | this.currentToken = {
|
---|
369 | type: Tokenizer.END_TAG_TOKEN,
|
---|
370 | tagName: '',
|
---|
371 | selfClosing: false,
|
---|
372 | attrs: []
|
---|
373 | };
|
---|
374 | }
|
---|
375 |
|
---|
376 | _createCommentToken() {
|
---|
377 | this.currentToken = {
|
---|
378 | type: Tokenizer.COMMENT_TOKEN,
|
---|
379 | data: ''
|
---|
380 | };
|
---|
381 | }
|
---|
382 |
|
---|
383 | _createDoctypeToken(initialName) {
|
---|
384 | this.currentToken = {
|
---|
385 | type: Tokenizer.DOCTYPE_TOKEN,
|
---|
386 | name: initialName,
|
---|
387 | forceQuirks: false,
|
---|
388 | publicId: null,
|
---|
389 | systemId: null
|
---|
390 | };
|
---|
391 | }
|
---|
392 |
|
---|
393 | _createCharacterToken(type, ch) {
|
---|
394 | this.currentCharacterToken = {
|
---|
395 | type: type,
|
---|
396 | chars: ch
|
---|
397 | };
|
---|
398 | }
|
---|
399 |
|
---|
400 | _createEOFToken() {
|
---|
401 | this.currentToken = { type: Tokenizer.EOF_TOKEN };
|
---|
402 | }
|
---|
403 |
|
---|
404 | //Tag attributes
|
---|
405 | _createAttr(attrNameFirstCh) {
|
---|
406 | this.currentAttr = {
|
---|
407 | name: attrNameFirstCh,
|
---|
408 | value: ''
|
---|
409 | };
|
---|
410 | }
|
---|
411 |
|
---|
412 | _leaveAttrName(toState) {
|
---|
413 | if (Tokenizer.getTokenAttr(this.currentToken, this.currentAttr.name) === null) {
|
---|
414 | this.currentToken.attrs.push(this.currentAttr);
|
---|
415 | } else {
|
---|
416 | this._err(ERR.duplicateAttribute);
|
---|
417 | }
|
---|
418 |
|
---|
419 | this.state = toState;
|
---|
420 | }
|
---|
421 |
|
---|
422 | _leaveAttrValue(toState) {
|
---|
423 | this.state = toState;
|
---|
424 | }
|
---|
425 |
|
---|
426 | //Token emission
|
---|
427 | _emitCurrentToken() {
|
---|
428 | this._emitCurrentCharacterToken();
|
---|
429 |
|
---|
430 | const ct = this.currentToken;
|
---|
431 |
|
---|
432 | this.currentToken = null;
|
---|
433 |
|
---|
434 | //NOTE: store emited start tag's tagName to determine is the following end tag token is appropriate.
|
---|
435 | if (ct.type === Tokenizer.START_TAG_TOKEN) {
|
---|
436 | this.lastStartTagName = ct.tagName;
|
---|
437 | } else if (ct.type === Tokenizer.END_TAG_TOKEN) {
|
---|
438 | if (ct.attrs.length > 0) {
|
---|
439 | this._err(ERR.endTagWithAttributes);
|
---|
440 | }
|
---|
441 |
|
---|
442 | if (ct.selfClosing) {
|
---|
443 | this._err(ERR.endTagWithTrailingSolidus);
|
---|
444 | }
|
---|
445 | }
|
---|
446 |
|
---|
447 | this.tokenQueue.push(ct);
|
---|
448 | }
|
---|
449 |
|
---|
450 | _emitCurrentCharacterToken() {
|
---|
451 | if (this.currentCharacterToken) {
|
---|
452 | this.tokenQueue.push(this.currentCharacterToken);
|
---|
453 | this.currentCharacterToken = null;
|
---|
454 | }
|
---|
455 | }
|
---|
456 |
|
---|
457 | _emitEOFToken() {
|
---|
458 | this._createEOFToken();
|
---|
459 | this._emitCurrentToken();
|
---|
460 | }
|
---|
461 |
|
---|
462 | //Characters emission
|
---|
463 |
|
---|
464 | //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
|
---|
465 | //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
|
---|
466 | //If we have a sequence of characters that belong to the same group, parser can process it
|
---|
467 | //as a single solid character token.
|
---|
468 | //So, there are 3 types of character tokens in parse5:
|
---|
469 | //1)NULL_CHARACTER_TOKEN - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
|
---|
470 | //2)WHITESPACE_CHARACTER_TOKEN - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
|
---|
471 | //3)CHARACTER_TOKEN - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
|
---|
472 | _appendCharToCurrentCharacterToken(type, ch) {
|
---|
473 | if (this.currentCharacterToken && this.currentCharacterToken.type !== type) {
|
---|
474 | this._emitCurrentCharacterToken();
|
---|
475 | }
|
---|
476 |
|
---|
477 | if (this.currentCharacterToken) {
|
---|
478 | this.currentCharacterToken.chars += ch;
|
---|
479 | } else {
|
---|
480 | this._createCharacterToken(type, ch);
|
---|
481 | }
|
---|
482 | }
|
---|
483 |
|
---|
484 | _emitCodePoint(cp) {
|
---|
485 | let type = Tokenizer.CHARACTER_TOKEN;
|
---|
486 |
|
---|
487 | if (isWhitespace(cp)) {
|
---|
488 | type = Tokenizer.WHITESPACE_CHARACTER_TOKEN;
|
---|
489 | } else if (cp === $.NULL) {
|
---|
490 | type = Tokenizer.NULL_CHARACTER_TOKEN;
|
---|
491 | }
|
---|
492 |
|
---|
493 | this._appendCharToCurrentCharacterToken(type, toChar(cp));
|
---|
494 | }
|
---|
495 |
|
---|
496 | _emitSeveralCodePoints(codePoints) {
|
---|
497 | for (let i = 0; i < codePoints.length; i++) {
|
---|
498 | this._emitCodePoint(codePoints[i]);
|
---|
499 | }
|
---|
500 | }
|
---|
501 |
|
---|
502 | //NOTE: used then we emit character explicitly. This is always a non-whitespace and a non-null character.
|
---|
503 | //So we can avoid additional checks here.
|
---|
504 | _emitChars(ch) {
|
---|
505 | this._appendCharToCurrentCharacterToken(Tokenizer.CHARACTER_TOKEN, ch);
|
---|
506 | }
|
---|
507 |
|
---|
508 | // Character reference helpers
|
---|
509 | _matchNamedCharacterReference(startCp) {
|
---|
510 | let result = null;
|
---|
511 | let excess = 1;
|
---|
512 | let i = findNamedEntityTreeBranch(0, startCp);
|
---|
513 |
|
---|
514 | this.tempBuff.push(startCp);
|
---|
515 |
|
---|
516 | while (i > -1) {
|
---|
517 | const current = neTree[i];
|
---|
518 | const inNode = current < MAX_BRANCH_MARKER_VALUE;
|
---|
519 | const nodeWithData = inNode && current & HAS_DATA_FLAG;
|
---|
520 |
|
---|
521 | if (nodeWithData) {
|
---|
522 | //NOTE: we use greedy search, so we continue lookup at this point
|
---|
523 | result = current & DATA_DUPLET_FLAG ? [neTree[++i], neTree[++i]] : [neTree[++i]];
|
---|
524 | excess = 0;
|
---|
525 | }
|
---|
526 |
|
---|
527 | const cp = this._consume();
|
---|
528 |
|
---|
529 | this.tempBuff.push(cp);
|
---|
530 | excess++;
|
---|
531 |
|
---|
532 | if (cp === $.EOF) {
|
---|
533 | break;
|
---|
534 | }
|
---|
535 |
|
---|
536 | if (inNode) {
|
---|
537 | i = current & HAS_BRANCHES_FLAG ? findNamedEntityTreeBranch(i, cp) : -1;
|
---|
538 | } else {
|
---|
539 | i = cp === current ? ++i : -1;
|
---|
540 | }
|
---|
541 | }
|
---|
542 |
|
---|
543 | while (excess--) {
|
---|
544 | this.tempBuff.pop();
|
---|
545 | this._unconsume();
|
---|
546 | }
|
---|
547 |
|
---|
548 | return result;
|
---|
549 | }
|
---|
550 |
|
---|
551 | _isCharacterReferenceInAttribute() {
|
---|
552 | return (
|
---|
553 | this.returnState === ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ||
|
---|
554 | this.returnState === ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ||
|
---|
555 | this.returnState === ATTRIBUTE_VALUE_UNQUOTED_STATE
|
---|
556 | );
|
---|
557 | }
|
---|
558 |
|
---|
559 | _isCharacterReferenceAttributeQuirk(withSemicolon) {
|
---|
560 | if (!withSemicolon && this._isCharacterReferenceInAttribute()) {
|
---|
561 | const nextCp = this._consume();
|
---|
562 |
|
---|
563 | this._unconsume();
|
---|
564 |
|
---|
565 | return nextCp === $.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
|
---|
566 | }
|
---|
567 |
|
---|
568 | return false;
|
---|
569 | }
|
---|
570 |
|
---|
571 | _flushCodePointsConsumedAsCharacterReference() {
|
---|
572 | if (this._isCharacterReferenceInAttribute()) {
|
---|
573 | for (let i = 0; i < this.tempBuff.length; i++) {
|
---|
574 | this.currentAttr.value += toChar(this.tempBuff[i]);
|
---|
575 | }
|
---|
576 | } else {
|
---|
577 | this._emitSeveralCodePoints(this.tempBuff);
|
---|
578 | }
|
---|
579 |
|
---|
580 | this.tempBuff = [];
|
---|
581 | }
|
---|
582 |
|
---|
583 | // State machine
|
---|
584 |
|
---|
585 | // Data state
|
---|
586 | //------------------------------------------------------------------
|
---|
587 | [DATA_STATE](cp) {
|
---|
588 | this.preprocessor.dropParsedChunk();
|
---|
589 |
|
---|
590 | if (cp === $.LESS_THAN_SIGN) {
|
---|
591 | this.state = TAG_OPEN_STATE;
|
---|
592 | } else if (cp === $.AMPERSAND) {
|
---|
593 | this.returnState = DATA_STATE;
|
---|
594 | this.state = CHARACTER_REFERENCE_STATE;
|
---|
595 | } else if (cp === $.NULL) {
|
---|
596 | this._err(ERR.unexpectedNullCharacter);
|
---|
597 | this._emitCodePoint(cp);
|
---|
598 | } else if (cp === $.EOF) {
|
---|
599 | this._emitEOFToken();
|
---|
600 | } else {
|
---|
601 | this._emitCodePoint(cp);
|
---|
602 | }
|
---|
603 | }
|
---|
604 |
|
---|
605 | // RCDATA state
|
---|
606 | //------------------------------------------------------------------
|
---|
607 | [RCDATA_STATE](cp) {
|
---|
608 | this.preprocessor.dropParsedChunk();
|
---|
609 |
|
---|
610 | if (cp === $.AMPERSAND) {
|
---|
611 | this.returnState = RCDATA_STATE;
|
---|
612 | this.state = CHARACTER_REFERENCE_STATE;
|
---|
613 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
614 | this.state = RCDATA_LESS_THAN_SIGN_STATE;
|
---|
615 | } else if (cp === $.NULL) {
|
---|
616 | this._err(ERR.unexpectedNullCharacter);
|
---|
617 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
618 | } else if (cp === $.EOF) {
|
---|
619 | this._emitEOFToken();
|
---|
620 | } else {
|
---|
621 | this._emitCodePoint(cp);
|
---|
622 | }
|
---|
623 | }
|
---|
624 |
|
---|
625 | // RAWTEXT state
|
---|
626 | //------------------------------------------------------------------
|
---|
627 | [RAWTEXT_STATE](cp) {
|
---|
628 | this.preprocessor.dropParsedChunk();
|
---|
629 |
|
---|
630 | if (cp === $.LESS_THAN_SIGN) {
|
---|
631 | this.state = RAWTEXT_LESS_THAN_SIGN_STATE;
|
---|
632 | } else if (cp === $.NULL) {
|
---|
633 | this._err(ERR.unexpectedNullCharacter);
|
---|
634 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
635 | } else if (cp === $.EOF) {
|
---|
636 | this._emitEOFToken();
|
---|
637 | } else {
|
---|
638 | this._emitCodePoint(cp);
|
---|
639 | }
|
---|
640 | }
|
---|
641 |
|
---|
642 | // Script data state
|
---|
643 | //------------------------------------------------------------------
|
---|
644 | [SCRIPT_DATA_STATE](cp) {
|
---|
645 | this.preprocessor.dropParsedChunk();
|
---|
646 |
|
---|
647 | if (cp === $.LESS_THAN_SIGN) {
|
---|
648 | this.state = SCRIPT_DATA_LESS_THAN_SIGN_STATE;
|
---|
649 | } else if (cp === $.NULL) {
|
---|
650 | this._err(ERR.unexpectedNullCharacter);
|
---|
651 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
652 | } else if (cp === $.EOF) {
|
---|
653 | this._emitEOFToken();
|
---|
654 | } else {
|
---|
655 | this._emitCodePoint(cp);
|
---|
656 | }
|
---|
657 | }
|
---|
658 |
|
---|
659 | // PLAINTEXT state
|
---|
660 | //------------------------------------------------------------------
|
---|
661 | [PLAINTEXT_STATE](cp) {
|
---|
662 | this.preprocessor.dropParsedChunk();
|
---|
663 |
|
---|
664 | if (cp === $.NULL) {
|
---|
665 | this._err(ERR.unexpectedNullCharacter);
|
---|
666 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
667 | } else if (cp === $.EOF) {
|
---|
668 | this._emitEOFToken();
|
---|
669 | } else {
|
---|
670 | this._emitCodePoint(cp);
|
---|
671 | }
|
---|
672 | }
|
---|
673 |
|
---|
674 | // Tag open state
|
---|
675 | //------------------------------------------------------------------
|
---|
676 | [TAG_OPEN_STATE](cp) {
|
---|
677 | if (cp === $.EXCLAMATION_MARK) {
|
---|
678 | this.state = MARKUP_DECLARATION_OPEN_STATE;
|
---|
679 | } else if (cp === $.SOLIDUS) {
|
---|
680 | this.state = END_TAG_OPEN_STATE;
|
---|
681 | } else if (isAsciiLetter(cp)) {
|
---|
682 | this._createStartTagToken();
|
---|
683 | this._reconsumeInState(TAG_NAME_STATE);
|
---|
684 | } else if (cp === $.QUESTION_MARK) {
|
---|
685 | this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
|
---|
686 | this._createCommentToken();
|
---|
687 | this._reconsumeInState(BOGUS_COMMENT_STATE);
|
---|
688 | } else if (cp === $.EOF) {
|
---|
689 | this._err(ERR.eofBeforeTagName);
|
---|
690 | this._emitChars('<');
|
---|
691 | this._emitEOFToken();
|
---|
692 | } else {
|
---|
693 | this._err(ERR.invalidFirstCharacterOfTagName);
|
---|
694 | this._emitChars('<');
|
---|
695 | this._reconsumeInState(DATA_STATE);
|
---|
696 | }
|
---|
697 | }
|
---|
698 |
|
---|
699 | // End tag open state
|
---|
700 | //------------------------------------------------------------------
|
---|
701 | [END_TAG_OPEN_STATE](cp) {
|
---|
702 | if (isAsciiLetter(cp)) {
|
---|
703 | this._createEndTagToken();
|
---|
704 | this._reconsumeInState(TAG_NAME_STATE);
|
---|
705 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
706 | this._err(ERR.missingEndTagName);
|
---|
707 | this.state = DATA_STATE;
|
---|
708 | } else if (cp === $.EOF) {
|
---|
709 | this._err(ERR.eofBeforeTagName);
|
---|
710 | this._emitChars('</');
|
---|
711 | this._emitEOFToken();
|
---|
712 | } else {
|
---|
713 | this._err(ERR.invalidFirstCharacterOfTagName);
|
---|
714 | this._createCommentToken();
|
---|
715 | this._reconsumeInState(BOGUS_COMMENT_STATE);
|
---|
716 | }
|
---|
717 | }
|
---|
718 |
|
---|
719 | // Tag name state
|
---|
720 | //------------------------------------------------------------------
|
---|
721 | [TAG_NAME_STATE](cp) {
|
---|
722 | if (isWhitespace(cp)) {
|
---|
723 | this.state = BEFORE_ATTRIBUTE_NAME_STATE;
|
---|
724 | } else if (cp === $.SOLIDUS) {
|
---|
725 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
726 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
727 | this.state = DATA_STATE;
|
---|
728 | this._emitCurrentToken();
|
---|
729 | } else if (isAsciiUpper(cp)) {
|
---|
730 | this.currentToken.tagName += toAsciiLowerChar(cp);
|
---|
731 | } else if (cp === $.NULL) {
|
---|
732 | this._err(ERR.unexpectedNullCharacter);
|
---|
733 | this.currentToken.tagName += unicode.REPLACEMENT_CHARACTER;
|
---|
734 | } else if (cp === $.EOF) {
|
---|
735 | this._err(ERR.eofInTag);
|
---|
736 | this._emitEOFToken();
|
---|
737 | } else {
|
---|
738 | this.currentToken.tagName += toChar(cp);
|
---|
739 | }
|
---|
740 | }
|
---|
741 |
|
---|
742 | // RCDATA less-than sign state
|
---|
743 | //------------------------------------------------------------------
|
---|
744 | [RCDATA_LESS_THAN_SIGN_STATE](cp) {
|
---|
745 | if (cp === $.SOLIDUS) {
|
---|
746 | this.tempBuff = [];
|
---|
747 | this.state = RCDATA_END_TAG_OPEN_STATE;
|
---|
748 | } else {
|
---|
749 | this._emitChars('<');
|
---|
750 | this._reconsumeInState(RCDATA_STATE);
|
---|
751 | }
|
---|
752 | }
|
---|
753 |
|
---|
754 | // RCDATA end tag open state
|
---|
755 | //------------------------------------------------------------------
|
---|
756 | [RCDATA_END_TAG_OPEN_STATE](cp) {
|
---|
757 | if (isAsciiLetter(cp)) {
|
---|
758 | this._createEndTagToken();
|
---|
759 | this._reconsumeInState(RCDATA_END_TAG_NAME_STATE);
|
---|
760 | } else {
|
---|
761 | this._emitChars('</');
|
---|
762 | this._reconsumeInState(RCDATA_STATE);
|
---|
763 | }
|
---|
764 | }
|
---|
765 |
|
---|
766 | // RCDATA end tag name state
|
---|
767 | //------------------------------------------------------------------
|
---|
768 | [RCDATA_END_TAG_NAME_STATE](cp) {
|
---|
769 | if (isAsciiUpper(cp)) {
|
---|
770 | this.currentToken.tagName += toAsciiLowerChar(cp);
|
---|
771 | this.tempBuff.push(cp);
|
---|
772 | } else if (isAsciiLower(cp)) {
|
---|
773 | this.currentToken.tagName += toChar(cp);
|
---|
774 | this.tempBuff.push(cp);
|
---|
775 | } else {
|
---|
776 | if (this.lastStartTagName === this.currentToken.tagName) {
|
---|
777 | if (isWhitespace(cp)) {
|
---|
778 | this.state = BEFORE_ATTRIBUTE_NAME_STATE;
|
---|
779 | return;
|
---|
780 | }
|
---|
781 |
|
---|
782 | if (cp === $.SOLIDUS) {
|
---|
783 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
784 | return;
|
---|
785 | }
|
---|
786 |
|
---|
787 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
788 | this.state = DATA_STATE;
|
---|
789 | this._emitCurrentToken();
|
---|
790 | return;
|
---|
791 | }
|
---|
792 | }
|
---|
793 |
|
---|
794 | this._emitChars('</');
|
---|
795 | this._emitSeveralCodePoints(this.tempBuff);
|
---|
796 | this._reconsumeInState(RCDATA_STATE);
|
---|
797 | }
|
---|
798 | }
|
---|
799 |
|
---|
800 | // RAWTEXT less-than sign state
|
---|
801 | //------------------------------------------------------------------
|
---|
802 | [RAWTEXT_LESS_THAN_SIGN_STATE](cp) {
|
---|
803 | if (cp === $.SOLIDUS) {
|
---|
804 | this.tempBuff = [];
|
---|
805 | this.state = RAWTEXT_END_TAG_OPEN_STATE;
|
---|
806 | } else {
|
---|
807 | this._emitChars('<');
|
---|
808 | this._reconsumeInState(RAWTEXT_STATE);
|
---|
809 | }
|
---|
810 | }
|
---|
811 |
|
---|
812 | // RAWTEXT end tag open state
|
---|
813 | //------------------------------------------------------------------
|
---|
814 | [RAWTEXT_END_TAG_OPEN_STATE](cp) {
|
---|
815 | if (isAsciiLetter(cp)) {
|
---|
816 | this._createEndTagToken();
|
---|
817 | this._reconsumeInState(RAWTEXT_END_TAG_NAME_STATE);
|
---|
818 | } else {
|
---|
819 | this._emitChars('</');
|
---|
820 | this._reconsumeInState(RAWTEXT_STATE);
|
---|
821 | }
|
---|
822 | }
|
---|
823 |
|
---|
824 | // RAWTEXT end tag name state
|
---|
825 | //------------------------------------------------------------------
|
---|
826 | [RAWTEXT_END_TAG_NAME_STATE](cp) {
|
---|
827 | if (isAsciiUpper(cp)) {
|
---|
828 | this.currentToken.tagName += toAsciiLowerChar(cp);
|
---|
829 | this.tempBuff.push(cp);
|
---|
830 | } else if (isAsciiLower(cp)) {
|
---|
831 | this.currentToken.tagName += toChar(cp);
|
---|
832 | this.tempBuff.push(cp);
|
---|
833 | } else {
|
---|
834 | if (this.lastStartTagName === this.currentToken.tagName) {
|
---|
835 | if (isWhitespace(cp)) {
|
---|
836 | this.state = BEFORE_ATTRIBUTE_NAME_STATE;
|
---|
837 | return;
|
---|
838 | }
|
---|
839 |
|
---|
840 | if (cp === $.SOLIDUS) {
|
---|
841 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
842 | return;
|
---|
843 | }
|
---|
844 |
|
---|
845 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
846 | this._emitCurrentToken();
|
---|
847 | this.state = DATA_STATE;
|
---|
848 | return;
|
---|
849 | }
|
---|
850 | }
|
---|
851 |
|
---|
852 | this._emitChars('</');
|
---|
853 | this._emitSeveralCodePoints(this.tempBuff);
|
---|
854 | this._reconsumeInState(RAWTEXT_STATE);
|
---|
855 | }
|
---|
856 | }
|
---|
857 |
|
---|
858 | // Script data less-than sign state
|
---|
859 | //------------------------------------------------------------------
|
---|
860 | [SCRIPT_DATA_LESS_THAN_SIGN_STATE](cp) {
|
---|
861 | if (cp === $.SOLIDUS) {
|
---|
862 | this.tempBuff = [];
|
---|
863 | this.state = SCRIPT_DATA_END_TAG_OPEN_STATE;
|
---|
864 | } else if (cp === $.EXCLAMATION_MARK) {
|
---|
865 | this.state = SCRIPT_DATA_ESCAPE_START_STATE;
|
---|
866 | this._emitChars('<!');
|
---|
867 | } else {
|
---|
868 | this._emitChars('<');
|
---|
869 | this._reconsumeInState(SCRIPT_DATA_STATE);
|
---|
870 | }
|
---|
871 | }
|
---|
872 |
|
---|
873 | // Script data end tag open state
|
---|
874 | //------------------------------------------------------------------
|
---|
875 | [SCRIPT_DATA_END_TAG_OPEN_STATE](cp) {
|
---|
876 | if (isAsciiLetter(cp)) {
|
---|
877 | this._createEndTagToken();
|
---|
878 | this._reconsumeInState(SCRIPT_DATA_END_TAG_NAME_STATE);
|
---|
879 | } else {
|
---|
880 | this._emitChars('</');
|
---|
881 | this._reconsumeInState(SCRIPT_DATA_STATE);
|
---|
882 | }
|
---|
883 | }
|
---|
884 |
|
---|
885 | // Script data end tag name state
|
---|
886 | //------------------------------------------------------------------
|
---|
887 | [SCRIPT_DATA_END_TAG_NAME_STATE](cp) {
|
---|
888 | if (isAsciiUpper(cp)) {
|
---|
889 | this.currentToken.tagName += toAsciiLowerChar(cp);
|
---|
890 | this.tempBuff.push(cp);
|
---|
891 | } else if (isAsciiLower(cp)) {
|
---|
892 | this.currentToken.tagName += toChar(cp);
|
---|
893 | this.tempBuff.push(cp);
|
---|
894 | } else {
|
---|
895 | if (this.lastStartTagName === this.currentToken.tagName) {
|
---|
896 | if (isWhitespace(cp)) {
|
---|
897 | this.state = BEFORE_ATTRIBUTE_NAME_STATE;
|
---|
898 | return;
|
---|
899 | } else if (cp === $.SOLIDUS) {
|
---|
900 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
901 | return;
|
---|
902 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
903 | this._emitCurrentToken();
|
---|
904 | this.state = DATA_STATE;
|
---|
905 | return;
|
---|
906 | }
|
---|
907 | }
|
---|
908 |
|
---|
909 | this._emitChars('</');
|
---|
910 | this._emitSeveralCodePoints(this.tempBuff);
|
---|
911 | this._reconsumeInState(SCRIPT_DATA_STATE);
|
---|
912 | }
|
---|
913 | }
|
---|
914 |
|
---|
915 | // Script data escape start state
|
---|
916 | //------------------------------------------------------------------
|
---|
917 | [SCRIPT_DATA_ESCAPE_START_STATE](cp) {
|
---|
918 | if (cp === $.HYPHEN_MINUS) {
|
---|
919 | this.state = SCRIPT_DATA_ESCAPE_START_DASH_STATE;
|
---|
920 | this._emitChars('-');
|
---|
921 | } else {
|
---|
922 | this._reconsumeInState(SCRIPT_DATA_STATE);
|
---|
923 | }
|
---|
924 | }
|
---|
925 |
|
---|
926 | // Script data escape start dash state
|
---|
927 | //------------------------------------------------------------------
|
---|
928 | [SCRIPT_DATA_ESCAPE_START_DASH_STATE](cp) {
|
---|
929 | if (cp === $.HYPHEN_MINUS) {
|
---|
930 | this.state = SCRIPT_DATA_ESCAPED_DASH_DASH_STATE;
|
---|
931 | this._emitChars('-');
|
---|
932 | } else {
|
---|
933 | this._reconsumeInState(SCRIPT_DATA_STATE);
|
---|
934 | }
|
---|
935 | }
|
---|
936 |
|
---|
937 | // Script data escaped state
|
---|
938 | //------------------------------------------------------------------
|
---|
939 | [SCRIPT_DATA_ESCAPED_STATE](cp) {
|
---|
940 | if (cp === $.HYPHEN_MINUS) {
|
---|
941 | this.state = SCRIPT_DATA_ESCAPED_DASH_STATE;
|
---|
942 | this._emitChars('-');
|
---|
943 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
944 | this.state = SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
945 | } else if (cp === $.NULL) {
|
---|
946 | this._err(ERR.unexpectedNullCharacter);
|
---|
947 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
948 | } else if (cp === $.EOF) {
|
---|
949 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
950 | this._emitEOFToken();
|
---|
951 | } else {
|
---|
952 | this._emitCodePoint(cp);
|
---|
953 | }
|
---|
954 | }
|
---|
955 |
|
---|
956 | // Script data escaped dash state
|
---|
957 | //------------------------------------------------------------------
|
---|
958 | [SCRIPT_DATA_ESCAPED_DASH_STATE](cp) {
|
---|
959 | if (cp === $.HYPHEN_MINUS) {
|
---|
960 | this.state = SCRIPT_DATA_ESCAPED_DASH_DASH_STATE;
|
---|
961 | this._emitChars('-');
|
---|
962 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
963 | this.state = SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
964 | } else if (cp === $.NULL) {
|
---|
965 | this._err(ERR.unexpectedNullCharacter);
|
---|
966 | this.state = SCRIPT_DATA_ESCAPED_STATE;
|
---|
967 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
968 | } else if (cp === $.EOF) {
|
---|
969 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
970 | this._emitEOFToken();
|
---|
971 | } else {
|
---|
972 | this.state = SCRIPT_DATA_ESCAPED_STATE;
|
---|
973 | this._emitCodePoint(cp);
|
---|
974 | }
|
---|
975 | }
|
---|
976 |
|
---|
977 | // Script data escaped dash dash state
|
---|
978 | //------------------------------------------------------------------
|
---|
979 | [SCRIPT_DATA_ESCAPED_DASH_DASH_STATE](cp) {
|
---|
980 | if (cp === $.HYPHEN_MINUS) {
|
---|
981 | this._emitChars('-');
|
---|
982 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
983 | this.state = SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
984 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
985 | this.state = SCRIPT_DATA_STATE;
|
---|
986 | this._emitChars('>');
|
---|
987 | } else if (cp === $.NULL) {
|
---|
988 | this._err(ERR.unexpectedNullCharacter);
|
---|
989 | this.state = SCRIPT_DATA_ESCAPED_STATE;
|
---|
990 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
991 | } else if (cp === $.EOF) {
|
---|
992 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
993 | this._emitEOFToken();
|
---|
994 | } else {
|
---|
995 | this.state = SCRIPT_DATA_ESCAPED_STATE;
|
---|
996 | this._emitCodePoint(cp);
|
---|
997 | }
|
---|
998 | }
|
---|
999 |
|
---|
1000 | // Script data escaped less-than sign state
|
---|
1001 | //------------------------------------------------------------------
|
---|
1002 | [SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN_STATE](cp) {
|
---|
1003 | if (cp === $.SOLIDUS) {
|
---|
1004 | this.tempBuff = [];
|
---|
1005 | this.state = SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE;
|
---|
1006 | } else if (isAsciiLetter(cp)) {
|
---|
1007 | this.tempBuff = [];
|
---|
1008 | this._emitChars('<');
|
---|
1009 | this._reconsumeInState(SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE);
|
---|
1010 | } else {
|
---|
1011 | this._emitChars('<');
|
---|
1012 | this._reconsumeInState(SCRIPT_DATA_ESCAPED_STATE);
|
---|
1013 | }
|
---|
1014 | }
|
---|
1015 |
|
---|
1016 | // Script data escaped end tag open state
|
---|
1017 | //------------------------------------------------------------------
|
---|
1018 | [SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE](cp) {
|
---|
1019 | if (isAsciiLetter(cp)) {
|
---|
1020 | this._createEndTagToken();
|
---|
1021 | this._reconsumeInState(SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE);
|
---|
1022 | } else {
|
---|
1023 | this._emitChars('</');
|
---|
1024 | this._reconsumeInState(SCRIPT_DATA_ESCAPED_STATE);
|
---|
1025 | }
|
---|
1026 | }
|
---|
1027 |
|
---|
1028 | // Script data escaped end tag name state
|
---|
1029 | //------------------------------------------------------------------
|
---|
1030 | [SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE](cp) {
|
---|
1031 | if (isAsciiUpper(cp)) {
|
---|
1032 | this.currentToken.tagName += toAsciiLowerChar(cp);
|
---|
1033 | this.tempBuff.push(cp);
|
---|
1034 | } else if (isAsciiLower(cp)) {
|
---|
1035 | this.currentToken.tagName += toChar(cp);
|
---|
1036 | this.tempBuff.push(cp);
|
---|
1037 | } else {
|
---|
1038 | if (this.lastStartTagName === this.currentToken.tagName) {
|
---|
1039 | if (isWhitespace(cp)) {
|
---|
1040 | this.state = BEFORE_ATTRIBUTE_NAME_STATE;
|
---|
1041 | return;
|
---|
1042 | }
|
---|
1043 |
|
---|
1044 | if (cp === $.SOLIDUS) {
|
---|
1045 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
1046 | return;
|
---|
1047 | }
|
---|
1048 |
|
---|
1049 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1050 | this._emitCurrentToken();
|
---|
1051 | this.state = DATA_STATE;
|
---|
1052 | return;
|
---|
1053 | }
|
---|
1054 | }
|
---|
1055 |
|
---|
1056 | this._emitChars('</');
|
---|
1057 | this._emitSeveralCodePoints(this.tempBuff);
|
---|
1058 | this._reconsumeInState(SCRIPT_DATA_ESCAPED_STATE);
|
---|
1059 | }
|
---|
1060 | }
|
---|
1061 |
|
---|
1062 | // Script data double escape start state
|
---|
1063 | //------------------------------------------------------------------
|
---|
1064 | [SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE](cp) {
|
---|
1065 | if (isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN) {
|
---|
1066 | this.state = this._isTempBufferEqualToScriptString()
|
---|
1067 | ? SCRIPT_DATA_DOUBLE_ESCAPED_STATE
|
---|
1068 | : SCRIPT_DATA_ESCAPED_STATE;
|
---|
1069 | this._emitCodePoint(cp);
|
---|
1070 | } else if (isAsciiUpper(cp)) {
|
---|
1071 | this.tempBuff.push(toAsciiLowerCodePoint(cp));
|
---|
1072 | this._emitCodePoint(cp);
|
---|
1073 | } else if (isAsciiLower(cp)) {
|
---|
1074 | this.tempBuff.push(cp);
|
---|
1075 | this._emitCodePoint(cp);
|
---|
1076 | } else {
|
---|
1077 | this._reconsumeInState(SCRIPT_DATA_ESCAPED_STATE);
|
---|
1078 | }
|
---|
1079 | }
|
---|
1080 |
|
---|
1081 | // Script data double escaped state
|
---|
1082 | //------------------------------------------------------------------
|
---|
1083 | [SCRIPT_DATA_DOUBLE_ESCAPED_STATE](cp) {
|
---|
1084 | if (cp === $.HYPHEN_MINUS) {
|
---|
1085 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE;
|
---|
1086 | this._emitChars('-');
|
---|
1087 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
1088 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
1089 | this._emitChars('<');
|
---|
1090 | } else if (cp === $.NULL) {
|
---|
1091 | this._err(ERR.unexpectedNullCharacter);
|
---|
1092 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
1093 | } else if (cp === $.EOF) {
|
---|
1094 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
1095 | this._emitEOFToken();
|
---|
1096 | } else {
|
---|
1097 | this._emitCodePoint(cp);
|
---|
1098 | }
|
---|
1099 | }
|
---|
1100 |
|
---|
1101 | // Script data double escaped dash state
|
---|
1102 | //------------------------------------------------------------------
|
---|
1103 | [SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE](cp) {
|
---|
1104 | if (cp === $.HYPHEN_MINUS) {
|
---|
1105 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE;
|
---|
1106 | this._emitChars('-');
|
---|
1107 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
1108 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
1109 | this._emitChars('<');
|
---|
1110 | } else if (cp === $.NULL) {
|
---|
1111 | this._err(ERR.unexpectedNullCharacter);
|
---|
1112 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
|
---|
1113 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
1114 | } else if (cp === $.EOF) {
|
---|
1115 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
1116 | this._emitEOFToken();
|
---|
1117 | } else {
|
---|
1118 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
|
---|
1119 | this._emitCodePoint(cp);
|
---|
1120 | }
|
---|
1121 | }
|
---|
1122 |
|
---|
1123 | // Script data double escaped dash dash state
|
---|
1124 | //------------------------------------------------------------------
|
---|
1125 | [SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE](cp) {
|
---|
1126 | if (cp === $.HYPHEN_MINUS) {
|
---|
1127 | this._emitChars('-');
|
---|
1128 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
1129 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE;
|
---|
1130 | this._emitChars('<');
|
---|
1131 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1132 | this.state = SCRIPT_DATA_STATE;
|
---|
1133 | this._emitChars('>');
|
---|
1134 | } else if (cp === $.NULL) {
|
---|
1135 | this._err(ERR.unexpectedNullCharacter);
|
---|
1136 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
|
---|
1137 | this._emitChars(unicode.REPLACEMENT_CHARACTER);
|
---|
1138 | } else if (cp === $.EOF) {
|
---|
1139 | this._err(ERR.eofInScriptHtmlCommentLikeText);
|
---|
1140 | this._emitEOFToken();
|
---|
1141 | } else {
|
---|
1142 | this.state = SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
|
---|
1143 | this._emitCodePoint(cp);
|
---|
1144 | }
|
---|
1145 | }
|
---|
1146 |
|
---|
1147 | // Script data double escaped less-than sign state
|
---|
1148 | //------------------------------------------------------------------
|
---|
1149 | [SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN_STATE](cp) {
|
---|
1150 | if (cp === $.SOLIDUS) {
|
---|
1151 | this.tempBuff = [];
|
---|
1152 | this.state = SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE;
|
---|
1153 | this._emitChars('/');
|
---|
1154 | } else {
|
---|
1155 | this._reconsumeInState(SCRIPT_DATA_DOUBLE_ESCAPED_STATE);
|
---|
1156 | }
|
---|
1157 | }
|
---|
1158 |
|
---|
1159 | // Script data double escape end state
|
---|
1160 | //------------------------------------------------------------------
|
---|
1161 | [SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE](cp) {
|
---|
1162 | if (isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN) {
|
---|
1163 | this.state = this._isTempBufferEqualToScriptString()
|
---|
1164 | ? SCRIPT_DATA_ESCAPED_STATE
|
---|
1165 | : SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
|
---|
1166 |
|
---|
1167 | this._emitCodePoint(cp);
|
---|
1168 | } else if (isAsciiUpper(cp)) {
|
---|
1169 | this.tempBuff.push(toAsciiLowerCodePoint(cp));
|
---|
1170 | this._emitCodePoint(cp);
|
---|
1171 | } else if (isAsciiLower(cp)) {
|
---|
1172 | this.tempBuff.push(cp);
|
---|
1173 | this._emitCodePoint(cp);
|
---|
1174 | } else {
|
---|
1175 | this._reconsumeInState(SCRIPT_DATA_DOUBLE_ESCAPED_STATE);
|
---|
1176 | }
|
---|
1177 | }
|
---|
1178 |
|
---|
1179 | // Before attribute name state
|
---|
1180 | //------------------------------------------------------------------
|
---|
1181 | [BEFORE_ATTRIBUTE_NAME_STATE](cp) {
|
---|
1182 | if (isWhitespace(cp)) {
|
---|
1183 | return;
|
---|
1184 | }
|
---|
1185 |
|
---|
1186 | if (cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN || cp === $.EOF) {
|
---|
1187 | this._reconsumeInState(AFTER_ATTRIBUTE_NAME_STATE);
|
---|
1188 | } else if (cp === $.EQUALS_SIGN) {
|
---|
1189 | this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
|
---|
1190 | this._createAttr('=');
|
---|
1191 | this.state = ATTRIBUTE_NAME_STATE;
|
---|
1192 | } else {
|
---|
1193 | this._createAttr('');
|
---|
1194 | this._reconsumeInState(ATTRIBUTE_NAME_STATE);
|
---|
1195 | }
|
---|
1196 | }
|
---|
1197 |
|
---|
1198 | // Attribute name state
|
---|
1199 | //------------------------------------------------------------------
|
---|
1200 | [ATTRIBUTE_NAME_STATE](cp) {
|
---|
1201 | if (isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN || cp === $.EOF) {
|
---|
1202 | this._leaveAttrName(AFTER_ATTRIBUTE_NAME_STATE);
|
---|
1203 | this._unconsume();
|
---|
1204 | } else if (cp === $.EQUALS_SIGN) {
|
---|
1205 | this._leaveAttrName(BEFORE_ATTRIBUTE_VALUE_STATE);
|
---|
1206 | } else if (isAsciiUpper(cp)) {
|
---|
1207 | this.currentAttr.name += toAsciiLowerChar(cp);
|
---|
1208 | } else if (cp === $.QUOTATION_MARK || cp === $.APOSTROPHE || cp === $.LESS_THAN_SIGN) {
|
---|
1209 | this._err(ERR.unexpectedCharacterInAttributeName);
|
---|
1210 | this.currentAttr.name += toChar(cp);
|
---|
1211 | } else if (cp === $.NULL) {
|
---|
1212 | this._err(ERR.unexpectedNullCharacter);
|
---|
1213 | this.currentAttr.name += unicode.REPLACEMENT_CHARACTER;
|
---|
1214 | } else {
|
---|
1215 | this.currentAttr.name += toChar(cp);
|
---|
1216 | }
|
---|
1217 | }
|
---|
1218 |
|
---|
1219 | // After attribute name state
|
---|
1220 | //------------------------------------------------------------------
|
---|
1221 | [AFTER_ATTRIBUTE_NAME_STATE](cp) {
|
---|
1222 | if (isWhitespace(cp)) {
|
---|
1223 | return;
|
---|
1224 | }
|
---|
1225 |
|
---|
1226 | if (cp === $.SOLIDUS) {
|
---|
1227 | this.state = SELF_CLOSING_START_TAG_STATE;
|
---|
1228 | } else if (cp === $.EQUALS_SIGN) {
|
---|
1229 | this.state = BEFORE_ATTRIBUTE_VALUE_STATE;
|
---|
1230 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1231 | this.state = DATA_STATE;
|
---|
1232 | this._emitCurrentToken();
|
---|
1233 | } else if (cp === $.EOF) {
|
---|
1234 | this._err(ERR.eofInTag);
|
---|
1235 | this._emitEOFToken();
|
---|
1236 | } else {
|
---|
1237 | this._createAttr('');
|
---|
1238 | this._reconsumeInState(ATTRIBUTE_NAME_STATE);
|
---|
1239 | }
|
---|
1240 | }
|
---|
1241 |
|
---|
1242 | // Before attribute value state
|
---|
1243 | //------------------------------------------------------------------
|
---|
1244 | [BEFORE_ATTRIBUTE_VALUE_STATE](cp) {
|
---|
1245 | if (isWhitespace(cp)) {
|
---|
1246 | return;
|
---|
1247 | }
|
---|
1248 |
|
---|
1249 | if (cp === $.QUOTATION_MARK) {
|
---|
1250 | this.state = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
|
---|
1251 | } else if (cp === $.APOSTROPHE) {
|
---|
1252 | this.state = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
|
---|
1253 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1254 | this._err(ERR.missingAttributeValue);
|
---|
1255 | this.state = DATA_STATE;
|
---|
1256 | this._emitCurrentToken();
|
---|
1257 | } else {
|
---|
1258 | this._reconsumeInState(ATTRIBUTE_VALUE_UNQUOTED_STATE);
|
---|
1259 | }
|
---|
1260 | }
|
---|
1261 |
|
---|
1262 | // Attribute value (double-quoted) state
|
---|
1263 | //------------------------------------------------------------------
|
---|
1264 | [ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE](cp) {
|
---|
1265 | if (cp === $.QUOTATION_MARK) {
|
---|
1266 | this.state = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
|
---|
1267 | } else if (cp === $.AMPERSAND) {
|
---|
1268 | this.returnState = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
|
---|
1269 | this.state = CHARACTER_REFERENCE_STATE;
|
---|
1270 | } else if (cp === $.NULL) {
|
---|
1271 | this._err(ERR.unexpectedNullCharacter);
|
---|
1272 | this.currentAttr.value += unicode.REPLACEMENT_CHARACTER;
|
---|
1273 | } else if (cp === $.EOF) {
|
---|
1274 | this._err(ERR.eofInTag);
|
---|
1275 | this._emitEOFToken();
|
---|
1276 | } else {
|
---|
1277 | this.currentAttr.value += toChar(cp);
|
---|
1278 | }
|
---|
1279 | }
|
---|
1280 |
|
---|
1281 | // Attribute value (single-quoted) state
|
---|
1282 | //------------------------------------------------------------------
|
---|
1283 | [ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE](cp) {
|
---|
1284 | if (cp === $.APOSTROPHE) {
|
---|
1285 | this.state = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
|
---|
1286 | } else if (cp === $.AMPERSAND) {
|
---|
1287 | this.returnState = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
|
---|
1288 | this.state = CHARACTER_REFERENCE_STATE;
|
---|
1289 | } else if (cp === $.NULL) {
|
---|
1290 | this._err(ERR.unexpectedNullCharacter);
|
---|
1291 | this.currentAttr.value += unicode.REPLACEMENT_CHARACTER;
|
---|
1292 | } else if (cp === $.EOF) {
|
---|
1293 | this._err(ERR.eofInTag);
|
---|
1294 | this._emitEOFToken();
|
---|
1295 | } else {
|
---|
1296 | this.currentAttr.value += toChar(cp);
|
---|
1297 | }
|
---|
1298 | }
|
---|
1299 |
|
---|
1300 | // Attribute value (unquoted) state
|
---|
1301 | //------------------------------------------------------------------
|
---|
1302 | [ATTRIBUTE_VALUE_UNQUOTED_STATE](cp) {
|
---|
1303 | if (isWhitespace(cp)) {
|
---|
1304 | this._leaveAttrValue(BEFORE_ATTRIBUTE_NAME_STATE);
|
---|
1305 | } else if (cp === $.AMPERSAND) {
|
---|
1306 | this.returnState = ATTRIBUTE_VALUE_UNQUOTED_STATE;
|
---|
1307 | this.state = CHARACTER_REFERENCE_STATE;
|
---|
1308 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1309 | this._leaveAttrValue(DATA_STATE);
|
---|
1310 | this._emitCurrentToken();
|
---|
1311 | } else if (cp === $.NULL) {
|
---|
1312 | this._err(ERR.unexpectedNullCharacter);
|
---|
1313 | this.currentAttr.value += unicode.REPLACEMENT_CHARACTER;
|
---|
1314 | } else if (
|
---|
1315 | cp === $.QUOTATION_MARK ||
|
---|
1316 | cp === $.APOSTROPHE ||
|
---|
1317 | cp === $.LESS_THAN_SIGN ||
|
---|
1318 | cp === $.EQUALS_SIGN ||
|
---|
1319 | cp === $.GRAVE_ACCENT
|
---|
1320 | ) {
|
---|
1321 | this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
|
---|
1322 | this.currentAttr.value += toChar(cp);
|
---|
1323 | } else if (cp === $.EOF) {
|
---|
1324 | this._err(ERR.eofInTag);
|
---|
1325 | this._emitEOFToken();
|
---|
1326 | } else {
|
---|
1327 | this.currentAttr.value += toChar(cp);
|
---|
1328 | }
|
---|
1329 | }
|
---|
1330 |
|
---|
1331 | // After attribute value (quoted) state
|
---|
1332 | //------------------------------------------------------------------
|
---|
1333 | [AFTER_ATTRIBUTE_VALUE_QUOTED_STATE](cp) {
|
---|
1334 | if (isWhitespace(cp)) {
|
---|
1335 | this._leaveAttrValue(BEFORE_ATTRIBUTE_NAME_STATE);
|
---|
1336 | } else if (cp === $.SOLIDUS) {
|
---|
1337 | this._leaveAttrValue(SELF_CLOSING_START_TAG_STATE);
|
---|
1338 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1339 | this._leaveAttrValue(DATA_STATE);
|
---|
1340 | this._emitCurrentToken();
|
---|
1341 | } else if (cp === $.EOF) {
|
---|
1342 | this._err(ERR.eofInTag);
|
---|
1343 | this._emitEOFToken();
|
---|
1344 | } else {
|
---|
1345 | this._err(ERR.missingWhitespaceBetweenAttributes);
|
---|
1346 | this._reconsumeInState(BEFORE_ATTRIBUTE_NAME_STATE);
|
---|
1347 | }
|
---|
1348 | }
|
---|
1349 |
|
---|
1350 | // Self-closing start tag state
|
---|
1351 | //------------------------------------------------------------------
|
---|
1352 | [SELF_CLOSING_START_TAG_STATE](cp) {
|
---|
1353 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1354 | this.currentToken.selfClosing = true;
|
---|
1355 | this.state = DATA_STATE;
|
---|
1356 | this._emitCurrentToken();
|
---|
1357 | } else if (cp === $.EOF) {
|
---|
1358 | this._err(ERR.eofInTag);
|
---|
1359 | this._emitEOFToken();
|
---|
1360 | } else {
|
---|
1361 | this._err(ERR.unexpectedSolidusInTag);
|
---|
1362 | this._reconsumeInState(BEFORE_ATTRIBUTE_NAME_STATE);
|
---|
1363 | }
|
---|
1364 | }
|
---|
1365 |
|
---|
1366 | // Bogus comment state
|
---|
1367 | //------------------------------------------------------------------
|
---|
1368 | [BOGUS_COMMENT_STATE](cp) {
|
---|
1369 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1370 | this.state = DATA_STATE;
|
---|
1371 | this._emitCurrentToken();
|
---|
1372 | } else if (cp === $.EOF) {
|
---|
1373 | this._emitCurrentToken();
|
---|
1374 | this._emitEOFToken();
|
---|
1375 | } else if (cp === $.NULL) {
|
---|
1376 | this._err(ERR.unexpectedNullCharacter);
|
---|
1377 | this.currentToken.data += unicode.REPLACEMENT_CHARACTER;
|
---|
1378 | } else {
|
---|
1379 | this.currentToken.data += toChar(cp);
|
---|
1380 | }
|
---|
1381 | }
|
---|
1382 |
|
---|
1383 | // Markup declaration open state
|
---|
1384 | //------------------------------------------------------------------
|
---|
1385 | [MARKUP_DECLARATION_OPEN_STATE](cp) {
|
---|
1386 | if (this._consumeSequenceIfMatch($$.DASH_DASH_STRING, cp, true)) {
|
---|
1387 | this._createCommentToken();
|
---|
1388 | this.state = COMMENT_START_STATE;
|
---|
1389 | } else if (this._consumeSequenceIfMatch($$.DOCTYPE_STRING, cp, false)) {
|
---|
1390 | this.state = DOCTYPE_STATE;
|
---|
1391 | } else if (this._consumeSequenceIfMatch($$.CDATA_START_STRING, cp, true)) {
|
---|
1392 | if (this.allowCDATA) {
|
---|
1393 | this.state = CDATA_SECTION_STATE;
|
---|
1394 | } else {
|
---|
1395 | this._err(ERR.cdataInHtmlContent);
|
---|
1396 | this._createCommentToken();
|
---|
1397 | this.currentToken.data = '[CDATA[';
|
---|
1398 | this.state = BOGUS_COMMENT_STATE;
|
---|
1399 | }
|
---|
1400 | }
|
---|
1401 |
|
---|
1402 | //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
|
---|
1403 | //results are no longer valid and we will need to start over.
|
---|
1404 | else if (!this._ensureHibernation()) {
|
---|
1405 | this._err(ERR.incorrectlyOpenedComment);
|
---|
1406 | this._createCommentToken();
|
---|
1407 | this._reconsumeInState(BOGUS_COMMENT_STATE);
|
---|
1408 | }
|
---|
1409 | }
|
---|
1410 |
|
---|
1411 | // Comment start state
|
---|
1412 | //------------------------------------------------------------------
|
---|
1413 | [COMMENT_START_STATE](cp) {
|
---|
1414 | if (cp === $.HYPHEN_MINUS) {
|
---|
1415 | this.state = COMMENT_START_DASH_STATE;
|
---|
1416 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1417 | this._err(ERR.abruptClosingOfEmptyComment);
|
---|
1418 | this.state = DATA_STATE;
|
---|
1419 | this._emitCurrentToken();
|
---|
1420 | } else {
|
---|
1421 | this._reconsumeInState(COMMENT_STATE);
|
---|
1422 | }
|
---|
1423 | }
|
---|
1424 |
|
---|
1425 | // Comment start dash state
|
---|
1426 | //------------------------------------------------------------------
|
---|
1427 | [COMMENT_START_DASH_STATE](cp) {
|
---|
1428 | if (cp === $.HYPHEN_MINUS) {
|
---|
1429 | this.state = COMMENT_END_STATE;
|
---|
1430 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1431 | this._err(ERR.abruptClosingOfEmptyComment);
|
---|
1432 | this.state = DATA_STATE;
|
---|
1433 | this._emitCurrentToken();
|
---|
1434 | } else if (cp === $.EOF) {
|
---|
1435 | this._err(ERR.eofInComment);
|
---|
1436 | this._emitCurrentToken();
|
---|
1437 | this._emitEOFToken();
|
---|
1438 | } else {
|
---|
1439 | this.currentToken.data += '-';
|
---|
1440 | this._reconsumeInState(COMMENT_STATE);
|
---|
1441 | }
|
---|
1442 | }
|
---|
1443 |
|
---|
1444 | // Comment state
|
---|
1445 | //------------------------------------------------------------------
|
---|
1446 | [COMMENT_STATE](cp) {
|
---|
1447 | if (cp === $.HYPHEN_MINUS) {
|
---|
1448 | this.state = COMMENT_END_DASH_STATE;
|
---|
1449 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
1450 | this.currentToken.data += '<';
|
---|
1451 | this.state = COMMENT_LESS_THAN_SIGN_STATE;
|
---|
1452 | } else if (cp === $.NULL) {
|
---|
1453 | this._err(ERR.unexpectedNullCharacter);
|
---|
1454 | this.currentToken.data += unicode.REPLACEMENT_CHARACTER;
|
---|
1455 | } else if (cp === $.EOF) {
|
---|
1456 | this._err(ERR.eofInComment);
|
---|
1457 | this._emitCurrentToken();
|
---|
1458 | this._emitEOFToken();
|
---|
1459 | } else {
|
---|
1460 | this.currentToken.data += toChar(cp);
|
---|
1461 | }
|
---|
1462 | }
|
---|
1463 |
|
---|
1464 | // Comment less-than sign state
|
---|
1465 | //------------------------------------------------------------------
|
---|
1466 | [COMMENT_LESS_THAN_SIGN_STATE](cp) {
|
---|
1467 | if (cp === $.EXCLAMATION_MARK) {
|
---|
1468 | this.currentToken.data += '!';
|
---|
1469 | this.state = COMMENT_LESS_THAN_SIGN_BANG_STATE;
|
---|
1470 | } else if (cp === $.LESS_THAN_SIGN) {
|
---|
1471 | this.currentToken.data += '!';
|
---|
1472 | } else {
|
---|
1473 | this._reconsumeInState(COMMENT_STATE);
|
---|
1474 | }
|
---|
1475 | }
|
---|
1476 |
|
---|
1477 | // Comment less-than sign bang state
|
---|
1478 | //------------------------------------------------------------------
|
---|
1479 | [COMMENT_LESS_THAN_SIGN_BANG_STATE](cp) {
|
---|
1480 | if (cp === $.HYPHEN_MINUS) {
|
---|
1481 | this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE;
|
---|
1482 | } else {
|
---|
1483 | this._reconsumeInState(COMMENT_STATE);
|
---|
1484 | }
|
---|
1485 | }
|
---|
1486 |
|
---|
1487 | // Comment less-than sign bang dash state
|
---|
1488 | //------------------------------------------------------------------
|
---|
1489 | [COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE](cp) {
|
---|
1490 | if (cp === $.HYPHEN_MINUS) {
|
---|
1491 | this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE;
|
---|
1492 | } else {
|
---|
1493 | this._reconsumeInState(COMMENT_END_DASH_STATE);
|
---|
1494 | }
|
---|
1495 | }
|
---|
1496 |
|
---|
1497 | // Comment less-than sign bang dash dash state
|
---|
1498 | //------------------------------------------------------------------
|
---|
1499 | [COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE](cp) {
|
---|
1500 | if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) {
|
---|
1501 | this._err(ERR.nestedComment);
|
---|
1502 | }
|
---|
1503 |
|
---|
1504 | this._reconsumeInState(COMMENT_END_STATE);
|
---|
1505 | }
|
---|
1506 |
|
---|
1507 | // Comment end dash state
|
---|
1508 | //------------------------------------------------------------------
|
---|
1509 | [COMMENT_END_DASH_STATE](cp) {
|
---|
1510 | if (cp === $.HYPHEN_MINUS) {
|
---|
1511 | this.state = COMMENT_END_STATE;
|
---|
1512 | } else if (cp === $.EOF) {
|
---|
1513 | this._err(ERR.eofInComment);
|
---|
1514 | this._emitCurrentToken();
|
---|
1515 | this._emitEOFToken();
|
---|
1516 | } else {
|
---|
1517 | this.currentToken.data += '-';
|
---|
1518 | this._reconsumeInState(COMMENT_STATE);
|
---|
1519 | }
|
---|
1520 | }
|
---|
1521 |
|
---|
1522 | // Comment end state
|
---|
1523 | //------------------------------------------------------------------
|
---|
1524 | [COMMENT_END_STATE](cp) {
|
---|
1525 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1526 | this.state = DATA_STATE;
|
---|
1527 | this._emitCurrentToken();
|
---|
1528 | } else if (cp === $.EXCLAMATION_MARK) {
|
---|
1529 | this.state = COMMENT_END_BANG_STATE;
|
---|
1530 | } else if (cp === $.HYPHEN_MINUS) {
|
---|
1531 | this.currentToken.data += '-';
|
---|
1532 | } else if (cp === $.EOF) {
|
---|
1533 | this._err(ERR.eofInComment);
|
---|
1534 | this._emitCurrentToken();
|
---|
1535 | this._emitEOFToken();
|
---|
1536 | } else {
|
---|
1537 | this.currentToken.data += '--';
|
---|
1538 | this._reconsumeInState(COMMENT_STATE);
|
---|
1539 | }
|
---|
1540 | }
|
---|
1541 |
|
---|
1542 | // Comment end bang state
|
---|
1543 | //------------------------------------------------------------------
|
---|
1544 | [COMMENT_END_BANG_STATE](cp) {
|
---|
1545 | if (cp === $.HYPHEN_MINUS) {
|
---|
1546 | this.currentToken.data += '--!';
|
---|
1547 | this.state = COMMENT_END_DASH_STATE;
|
---|
1548 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1549 | this._err(ERR.incorrectlyClosedComment);
|
---|
1550 | this.state = DATA_STATE;
|
---|
1551 | this._emitCurrentToken();
|
---|
1552 | } else if (cp === $.EOF) {
|
---|
1553 | this._err(ERR.eofInComment);
|
---|
1554 | this._emitCurrentToken();
|
---|
1555 | this._emitEOFToken();
|
---|
1556 | } else {
|
---|
1557 | this.currentToken.data += '--!';
|
---|
1558 | this._reconsumeInState(COMMENT_STATE);
|
---|
1559 | }
|
---|
1560 | }
|
---|
1561 |
|
---|
1562 | // DOCTYPE state
|
---|
1563 | //------------------------------------------------------------------
|
---|
1564 | [DOCTYPE_STATE](cp) {
|
---|
1565 | if (isWhitespace(cp)) {
|
---|
1566 | this.state = BEFORE_DOCTYPE_NAME_STATE;
|
---|
1567 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1568 | this._reconsumeInState(BEFORE_DOCTYPE_NAME_STATE);
|
---|
1569 | } else if (cp === $.EOF) {
|
---|
1570 | this._err(ERR.eofInDoctype);
|
---|
1571 | this._createDoctypeToken(null);
|
---|
1572 | this.currentToken.forceQuirks = true;
|
---|
1573 | this._emitCurrentToken();
|
---|
1574 | this._emitEOFToken();
|
---|
1575 | } else {
|
---|
1576 | this._err(ERR.missingWhitespaceBeforeDoctypeName);
|
---|
1577 | this._reconsumeInState(BEFORE_DOCTYPE_NAME_STATE);
|
---|
1578 | }
|
---|
1579 | }
|
---|
1580 |
|
---|
1581 | // Before DOCTYPE name state
|
---|
1582 | //------------------------------------------------------------------
|
---|
1583 | [BEFORE_DOCTYPE_NAME_STATE](cp) {
|
---|
1584 | if (isWhitespace(cp)) {
|
---|
1585 | return;
|
---|
1586 | }
|
---|
1587 |
|
---|
1588 | if (isAsciiUpper(cp)) {
|
---|
1589 | this._createDoctypeToken(toAsciiLowerChar(cp));
|
---|
1590 | this.state = DOCTYPE_NAME_STATE;
|
---|
1591 | } else if (cp === $.NULL) {
|
---|
1592 | this._err(ERR.unexpectedNullCharacter);
|
---|
1593 | this._createDoctypeToken(unicode.REPLACEMENT_CHARACTER);
|
---|
1594 | this.state = DOCTYPE_NAME_STATE;
|
---|
1595 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1596 | this._err(ERR.missingDoctypeName);
|
---|
1597 | this._createDoctypeToken(null);
|
---|
1598 | this.currentToken.forceQuirks = true;
|
---|
1599 | this._emitCurrentToken();
|
---|
1600 | this.state = DATA_STATE;
|
---|
1601 | } else if (cp === $.EOF) {
|
---|
1602 | this._err(ERR.eofInDoctype);
|
---|
1603 | this._createDoctypeToken(null);
|
---|
1604 | this.currentToken.forceQuirks = true;
|
---|
1605 | this._emitCurrentToken();
|
---|
1606 | this._emitEOFToken();
|
---|
1607 | } else {
|
---|
1608 | this._createDoctypeToken(toChar(cp));
|
---|
1609 | this.state = DOCTYPE_NAME_STATE;
|
---|
1610 | }
|
---|
1611 | }
|
---|
1612 |
|
---|
1613 | // DOCTYPE name state
|
---|
1614 | //------------------------------------------------------------------
|
---|
1615 | [DOCTYPE_NAME_STATE](cp) {
|
---|
1616 | if (isWhitespace(cp)) {
|
---|
1617 | this.state = AFTER_DOCTYPE_NAME_STATE;
|
---|
1618 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1619 | this.state = DATA_STATE;
|
---|
1620 | this._emitCurrentToken();
|
---|
1621 | } else if (isAsciiUpper(cp)) {
|
---|
1622 | this.currentToken.name += toAsciiLowerChar(cp);
|
---|
1623 | } else if (cp === $.NULL) {
|
---|
1624 | this._err(ERR.unexpectedNullCharacter);
|
---|
1625 | this.currentToken.name += unicode.REPLACEMENT_CHARACTER;
|
---|
1626 | } else if (cp === $.EOF) {
|
---|
1627 | this._err(ERR.eofInDoctype);
|
---|
1628 | this.currentToken.forceQuirks = true;
|
---|
1629 | this._emitCurrentToken();
|
---|
1630 | this._emitEOFToken();
|
---|
1631 | } else {
|
---|
1632 | this.currentToken.name += toChar(cp);
|
---|
1633 | }
|
---|
1634 | }
|
---|
1635 |
|
---|
1636 | // After DOCTYPE name state
|
---|
1637 | //------------------------------------------------------------------
|
---|
1638 | [AFTER_DOCTYPE_NAME_STATE](cp) {
|
---|
1639 | if (isWhitespace(cp)) {
|
---|
1640 | return;
|
---|
1641 | }
|
---|
1642 |
|
---|
1643 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1644 | this.state = DATA_STATE;
|
---|
1645 | this._emitCurrentToken();
|
---|
1646 | } else if (cp === $.EOF) {
|
---|
1647 | this._err(ERR.eofInDoctype);
|
---|
1648 | this.currentToken.forceQuirks = true;
|
---|
1649 | this._emitCurrentToken();
|
---|
1650 | this._emitEOFToken();
|
---|
1651 | } else if (this._consumeSequenceIfMatch($$.PUBLIC_STRING, cp, false)) {
|
---|
1652 | this.state = AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE;
|
---|
1653 | } else if (this._consumeSequenceIfMatch($$.SYSTEM_STRING, cp, false)) {
|
---|
1654 | this.state = AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE;
|
---|
1655 | }
|
---|
1656 | //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
|
---|
1657 | //results are no longer valid and we will need to start over.
|
---|
1658 | else if (!this._ensureHibernation()) {
|
---|
1659 | this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
|
---|
1660 | this.currentToken.forceQuirks = true;
|
---|
1661 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1662 | }
|
---|
1663 | }
|
---|
1664 |
|
---|
1665 | // After DOCTYPE public keyword state
|
---|
1666 | //------------------------------------------------------------------
|
---|
1667 | [AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE](cp) {
|
---|
1668 | if (isWhitespace(cp)) {
|
---|
1669 | this.state = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
|
---|
1670 | } else if (cp === $.QUOTATION_MARK) {
|
---|
1671 | this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
---|
1672 | this.currentToken.publicId = '';
|
---|
1673 | this.state = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1674 | } else if (cp === $.APOSTROPHE) {
|
---|
1675 | this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
---|
1676 | this.currentToken.publicId = '';
|
---|
1677 | this.state = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1678 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1679 | this._err(ERR.missingDoctypePublicIdentifier);
|
---|
1680 | this.currentToken.forceQuirks = true;
|
---|
1681 | this.state = DATA_STATE;
|
---|
1682 | this._emitCurrentToken();
|
---|
1683 | } else if (cp === $.EOF) {
|
---|
1684 | this._err(ERR.eofInDoctype);
|
---|
1685 | this.currentToken.forceQuirks = true;
|
---|
1686 | this._emitCurrentToken();
|
---|
1687 | this._emitEOFToken();
|
---|
1688 | } else {
|
---|
1689 | this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
---|
1690 | this.currentToken.forceQuirks = true;
|
---|
1691 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1692 | }
|
---|
1693 | }
|
---|
1694 |
|
---|
1695 | // Before DOCTYPE public identifier state
|
---|
1696 | //------------------------------------------------------------------
|
---|
1697 | [BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE](cp) {
|
---|
1698 | if (isWhitespace(cp)) {
|
---|
1699 | return;
|
---|
1700 | }
|
---|
1701 |
|
---|
1702 | if (cp === $.QUOTATION_MARK) {
|
---|
1703 | this.currentToken.publicId = '';
|
---|
1704 | this.state = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1705 | } else if (cp === $.APOSTROPHE) {
|
---|
1706 | this.currentToken.publicId = '';
|
---|
1707 | this.state = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1708 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1709 | this._err(ERR.missingDoctypePublicIdentifier);
|
---|
1710 | this.currentToken.forceQuirks = true;
|
---|
1711 | this.state = DATA_STATE;
|
---|
1712 | this._emitCurrentToken();
|
---|
1713 | } else if (cp === $.EOF) {
|
---|
1714 | this._err(ERR.eofInDoctype);
|
---|
1715 | this.currentToken.forceQuirks = true;
|
---|
1716 | this._emitCurrentToken();
|
---|
1717 | this._emitEOFToken();
|
---|
1718 | } else {
|
---|
1719 | this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
---|
1720 | this.currentToken.forceQuirks = true;
|
---|
1721 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1722 | }
|
---|
1723 | }
|
---|
1724 |
|
---|
1725 | // DOCTYPE public identifier (double-quoted) state
|
---|
1726 | //------------------------------------------------------------------
|
---|
1727 | [DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE](cp) {
|
---|
1728 | if (cp === $.QUOTATION_MARK) {
|
---|
1729 | this.state = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
|
---|
1730 | } else if (cp === $.NULL) {
|
---|
1731 | this._err(ERR.unexpectedNullCharacter);
|
---|
1732 | this.currentToken.publicId += unicode.REPLACEMENT_CHARACTER;
|
---|
1733 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1734 | this._err(ERR.abruptDoctypePublicIdentifier);
|
---|
1735 | this.currentToken.forceQuirks = true;
|
---|
1736 | this._emitCurrentToken();
|
---|
1737 | this.state = DATA_STATE;
|
---|
1738 | } else if (cp === $.EOF) {
|
---|
1739 | this._err(ERR.eofInDoctype);
|
---|
1740 | this.currentToken.forceQuirks = true;
|
---|
1741 | this._emitCurrentToken();
|
---|
1742 | this._emitEOFToken();
|
---|
1743 | } else {
|
---|
1744 | this.currentToken.publicId += toChar(cp);
|
---|
1745 | }
|
---|
1746 | }
|
---|
1747 |
|
---|
1748 | // DOCTYPE public identifier (single-quoted) state
|
---|
1749 | //------------------------------------------------------------------
|
---|
1750 | [DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE](cp) {
|
---|
1751 | if (cp === $.APOSTROPHE) {
|
---|
1752 | this.state = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
|
---|
1753 | } else if (cp === $.NULL) {
|
---|
1754 | this._err(ERR.unexpectedNullCharacter);
|
---|
1755 | this.currentToken.publicId += unicode.REPLACEMENT_CHARACTER;
|
---|
1756 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1757 | this._err(ERR.abruptDoctypePublicIdentifier);
|
---|
1758 | this.currentToken.forceQuirks = true;
|
---|
1759 | this._emitCurrentToken();
|
---|
1760 | this.state = DATA_STATE;
|
---|
1761 | } else if (cp === $.EOF) {
|
---|
1762 | this._err(ERR.eofInDoctype);
|
---|
1763 | this.currentToken.forceQuirks = true;
|
---|
1764 | this._emitCurrentToken();
|
---|
1765 | this._emitEOFToken();
|
---|
1766 | } else {
|
---|
1767 | this.currentToken.publicId += toChar(cp);
|
---|
1768 | }
|
---|
1769 | }
|
---|
1770 |
|
---|
1771 | // After DOCTYPE public identifier state
|
---|
1772 | //------------------------------------------------------------------
|
---|
1773 | [AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE](cp) {
|
---|
1774 | if (isWhitespace(cp)) {
|
---|
1775 | this.state = BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE;
|
---|
1776 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1777 | this.state = DATA_STATE;
|
---|
1778 | this._emitCurrentToken();
|
---|
1779 | } else if (cp === $.QUOTATION_MARK) {
|
---|
1780 | this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
---|
1781 | this.currentToken.systemId = '';
|
---|
1782 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1783 | } else if (cp === $.APOSTROPHE) {
|
---|
1784 | this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
---|
1785 | this.currentToken.systemId = '';
|
---|
1786 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1787 | } else if (cp === $.EOF) {
|
---|
1788 | this._err(ERR.eofInDoctype);
|
---|
1789 | this.currentToken.forceQuirks = true;
|
---|
1790 | this._emitCurrentToken();
|
---|
1791 | this._emitEOFToken();
|
---|
1792 | } else {
|
---|
1793 | this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
---|
1794 | this.currentToken.forceQuirks = true;
|
---|
1795 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1796 | }
|
---|
1797 | }
|
---|
1798 |
|
---|
1799 | // Between DOCTYPE public and system identifiers state
|
---|
1800 | //------------------------------------------------------------------
|
---|
1801 | [BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE](cp) {
|
---|
1802 | if (isWhitespace(cp)) {
|
---|
1803 | return;
|
---|
1804 | }
|
---|
1805 |
|
---|
1806 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1807 | this._emitCurrentToken();
|
---|
1808 | this.state = DATA_STATE;
|
---|
1809 | } else if (cp === $.QUOTATION_MARK) {
|
---|
1810 | this.currentToken.systemId = '';
|
---|
1811 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1812 | } else if (cp === $.APOSTROPHE) {
|
---|
1813 | this.currentToken.systemId = '';
|
---|
1814 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1815 | } else if (cp === $.EOF) {
|
---|
1816 | this._err(ERR.eofInDoctype);
|
---|
1817 | this.currentToken.forceQuirks = true;
|
---|
1818 | this._emitCurrentToken();
|
---|
1819 | this._emitEOFToken();
|
---|
1820 | } else {
|
---|
1821 | this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
---|
1822 | this.currentToken.forceQuirks = true;
|
---|
1823 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1824 | }
|
---|
1825 | }
|
---|
1826 |
|
---|
1827 | // After DOCTYPE system keyword state
|
---|
1828 | //------------------------------------------------------------------
|
---|
1829 | [AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE](cp) {
|
---|
1830 | if (isWhitespace(cp)) {
|
---|
1831 | this.state = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
|
---|
1832 | } else if (cp === $.QUOTATION_MARK) {
|
---|
1833 | this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
---|
1834 | this.currentToken.systemId = '';
|
---|
1835 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1836 | } else if (cp === $.APOSTROPHE) {
|
---|
1837 | this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
---|
1838 | this.currentToken.systemId = '';
|
---|
1839 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1840 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1841 | this._err(ERR.missingDoctypeSystemIdentifier);
|
---|
1842 | this.currentToken.forceQuirks = true;
|
---|
1843 | this.state = DATA_STATE;
|
---|
1844 | this._emitCurrentToken();
|
---|
1845 | } else if (cp === $.EOF) {
|
---|
1846 | this._err(ERR.eofInDoctype);
|
---|
1847 | this.currentToken.forceQuirks = true;
|
---|
1848 | this._emitCurrentToken();
|
---|
1849 | this._emitEOFToken();
|
---|
1850 | } else {
|
---|
1851 | this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
---|
1852 | this.currentToken.forceQuirks = true;
|
---|
1853 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1854 | }
|
---|
1855 | }
|
---|
1856 |
|
---|
1857 | // Before DOCTYPE system identifier state
|
---|
1858 | //------------------------------------------------------------------
|
---|
1859 | [BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE](cp) {
|
---|
1860 | if (isWhitespace(cp)) {
|
---|
1861 | return;
|
---|
1862 | }
|
---|
1863 |
|
---|
1864 | if (cp === $.QUOTATION_MARK) {
|
---|
1865 | this.currentToken.systemId = '';
|
---|
1866 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
|
---|
1867 | } else if (cp === $.APOSTROPHE) {
|
---|
1868 | this.currentToken.systemId = '';
|
---|
1869 | this.state = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
|
---|
1870 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1871 | this._err(ERR.missingDoctypeSystemIdentifier);
|
---|
1872 | this.currentToken.forceQuirks = true;
|
---|
1873 | this.state = DATA_STATE;
|
---|
1874 | this._emitCurrentToken();
|
---|
1875 | } else if (cp === $.EOF) {
|
---|
1876 | this._err(ERR.eofInDoctype);
|
---|
1877 | this.currentToken.forceQuirks = true;
|
---|
1878 | this._emitCurrentToken();
|
---|
1879 | this._emitEOFToken();
|
---|
1880 | } else {
|
---|
1881 | this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
---|
1882 | this.currentToken.forceQuirks = true;
|
---|
1883 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1884 | }
|
---|
1885 | }
|
---|
1886 |
|
---|
1887 | // DOCTYPE system identifier (double-quoted) state
|
---|
1888 | //------------------------------------------------------------------
|
---|
1889 | [DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE](cp) {
|
---|
1890 | if (cp === $.QUOTATION_MARK) {
|
---|
1891 | this.state = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
|
---|
1892 | } else if (cp === $.NULL) {
|
---|
1893 | this._err(ERR.unexpectedNullCharacter);
|
---|
1894 | this.currentToken.systemId += unicode.REPLACEMENT_CHARACTER;
|
---|
1895 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1896 | this._err(ERR.abruptDoctypeSystemIdentifier);
|
---|
1897 | this.currentToken.forceQuirks = true;
|
---|
1898 | this._emitCurrentToken();
|
---|
1899 | this.state = DATA_STATE;
|
---|
1900 | } else if (cp === $.EOF) {
|
---|
1901 | this._err(ERR.eofInDoctype);
|
---|
1902 | this.currentToken.forceQuirks = true;
|
---|
1903 | this._emitCurrentToken();
|
---|
1904 | this._emitEOFToken();
|
---|
1905 | } else {
|
---|
1906 | this.currentToken.systemId += toChar(cp);
|
---|
1907 | }
|
---|
1908 | }
|
---|
1909 |
|
---|
1910 | // DOCTYPE system identifier (single-quoted) state
|
---|
1911 | //------------------------------------------------------------------
|
---|
1912 | [DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE](cp) {
|
---|
1913 | if (cp === $.APOSTROPHE) {
|
---|
1914 | this.state = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
|
---|
1915 | } else if (cp === $.NULL) {
|
---|
1916 | this._err(ERR.unexpectedNullCharacter);
|
---|
1917 | this.currentToken.systemId += unicode.REPLACEMENT_CHARACTER;
|
---|
1918 | } else if (cp === $.GREATER_THAN_SIGN) {
|
---|
1919 | this._err(ERR.abruptDoctypeSystemIdentifier);
|
---|
1920 | this.currentToken.forceQuirks = true;
|
---|
1921 | this._emitCurrentToken();
|
---|
1922 | this.state = DATA_STATE;
|
---|
1923 | } else if (cp === $.EOF) {
|
---|
1924 | this._err(ERR.eofInDoctype);
|
---|
1925 | this.currentToken.forceQuirks = true;
|
---|
1926 | this._emitCurrentToken();
|
---|
1927 | this._emitEOFToken();
|
---|
1928 | } else {
|
---|
1929 | this.currentToken.systemId += toChar(cp);
|
---|
1930 | }
|
---|
1931 | }
|
---|
1932 |
|
---|
1933 | // After DOCTYPE system identifier state
|
---|
1934 | //------------------------------------------------------------------
|
---|
1935 | [AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE](cp) {
|
---|
1936 | if (isWhitespace(cp)) {
|
---|
1937 | return;
|
---|
1938 | }
|
---|
1939 |
|
---|
1940 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1941 | this._emitCurrentToken();
|
---|
1942 | this.state = DATA_STATE;
|
---|
1943 | } else if (cp === $.EOF) {
|
---|
1944 | this._err(ERR.eofInDoctype);
|
---|
1945 | this.currentToken.forceQuirks = true;
|
---|
1946 | this._emitCurrentToken();
|
---|
1947 | this._emitEOFToken();
|
---|
1948 | } else {
|
---|
1949 | this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
|
---|
1950 | this._reconsumeInState(BOGUS_DOCTYPE_STATE);
|
---|
1951 | }
|
---|
1952 | }
|
---|
1953 |
|
---|
1954 | // Bogus DOCTYPE state
|
---|
1955 | //------------------------------------------------------------------
|
---|
1956 | [BOGUS_DOCTYPE_STATE](cp) {
|
---|
1957 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1958 | this._emitCurrentToken();
|
---|
1959 | this.state = DATA_STATE;
|
---|
1960 | } else if (cp === $.NULL) {
|
---|
1961 | this._err(ERR.unexpectedNullCharacter);
|
---|
1962 | } else if (cp === $.EOF) {
|
---|
1963 | this._emitCurrentToken();
|
---|
1964 | this._emitEOFToken();
|
---|
1965 | }
|
---|
1966 | }
|
---|
1967 |
|
---|
1968 | // CDATA section state
|
---|
1969 | //------------------------------------------------------------------
|
---|
1970 | [CDATA_SECTION_STATE](cp) {
|
---|
1971 | if (cp === $.RIGHT_SQUARE_BRACKET) {
|
---|
1972 | this.state = CDATA_SECTION_BRACKET_STATE;
|
---|
1973 | } else if (cp === $.EOF) {
|
---|
1974 | this._err(ERR.eofInCdata);
|
---|
1975 | this._emitEOFToken();
|
---|
1976 | } else {
|
---|
1977 | this._emitCodePoint(cp);
|
---|
1978 | }
|
---|
1979 | }
|
---|
1980 |
|
---|
1981 | // CDATA section bracket state
|
---|
1982 | //------------------------------------------------------------------
|
---|
1983 | [CDATA_SECTION_BRACKET_STATE](cp) {
|
---|
1984 | if (cp === $.RIGHT_SQUARE_BRACKET) {
|
---|
1985 | this.state = CDATA_SECTION_END_STATE;
|
---|
1986 | } else {
|
---|
1987 | this._emitChars(']');
|
---|
1988 | this._reconsumeInState(CDATA_SECTION_STATE);
|
---|
1989 | }
|
---|
1990 | }
|
---|
1991 |
|
---|
1992 | // CDATA section end state
|
---|
1993 | //------------------------------------------------------------------
|
---|
1994 | [CDATA_SECTION_END_STATE](cp) {
|
---|
1995 | if (cp === $.GREATER_THAN_SIGN) {
|
---|
1996 | this.state = DATA_STATE;
|
---|
1997 | } else if (cp === $.RIGHT_SQUARE_BRACKET) {
|
---|
1998 | this._emitChars(']');
|
---|
1999 | } else {
|
---|
2000 | this._emitChars(']]');
|
---|
2001 | this._reconsumeInState(CDATA_SECTION_STATE);
|
---|
2002 | }
|
---|
2003 | }
|
---|
2004 |
|
---|
2005 | // Character reference state
|
---|
2006 | //------------------------------------------------------------------
|
---|
2007 | [CHARACTER_REFERENCE_STATE](cp) {
|
---|
2008 | this.tempBuff = [$.AMPERSAND];
|
---|
2009 |
|
---|
2010 | if (cp === $.NUMBER_SIGN) {
|
---|
2011 | this.tempBuff.push(cp);
|
---|
2012 | this.state = NUMERIC_CHARACTER_REFERENCE_STATE;
|
---|
2013 | } else if (isAsciiAlphaNumeric(cp)) {
|
---|
2014 | this._reconsumeInState(NAMED_CHARACTER_REFERENCE_STATE);
|
---|
2015 | } else {
|
---|
2016 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2017 | this._reconsumeInState(this.returnState);
|
---|
2018 | }
|
---|
2019 | }
|
---|
2020 |
|
---|
2021 | // Named character reference state
|
---|
2022 | //------------------------------------------------------------------
|
---|
2023 | [NAMED_CHARACTER_REFERENCE_STATE](cp) {
|
---|
2024 | const matchResult = this._matchNamedCharacterReference(cp);
|
---|
2025 |
|
---|
2026 | //NOTE: matching can be abrupted by hibernation. In that case match
|
---|
2027 | //results are no longer valid and we will need to start over.
|
---|
2028 | if (this._ensureHibernation()) {
|
---|
2029 | this.tempBuff = [$.AMPERSAND];
|
---|
2030 | } else if (matchResult) {
|
---|
2031 | const withSemicolon = this.tempBuff[this.tempBuff.length - 1] === $.SEMICOLON;
|
---|
2032 |
|
---|
2033 | if (!this._isCharacterReferenceAttributeQuirk(withSemicolon)) {
|
---|
2034 | if (!withSemicolon) {
|
---|
2035 | this._errOnNextCodePoint(ERR.missingSemicolonAfterCharacterReference);
|
---|
2036 | }
|
---|
2037 |
|
---|
2038 | this.tempBuff = matchResult;
|
---|
2039 | }
|
---|
2040 |
|
---|
2041 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2042 | this.state = this.returnState;
|
---|
2043 | } else {
|
---|
2044 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2045 | this.state = AMBIGUOUS_AMPERSAND_STATE;
|
---|
2046 | }
|
---|
2047 | }
|
---|
2048 |
|
---|
2049 | // Ambiguos ampersand state
|
---|
2050 | //------------------------------------------------------------------
|
---|
2051 | [AMBIGUOUS_AMPERSAND_STATE](cp) {
|
---|
2052 | if (isAsciiAlphaNumeric(cp)) {
|
---|
2053 | if (this._isCharacterReferenceInAttribute()) {
|
---|
2054 | this.currentAttr.value += toChar(cp);
|
---|
2055 | } else {
|
---|
2056 | this._emitCodePoint(cp);
|
---|
2057 | }
|
---|
2058 | } else {
|
---|
2059 | if (cp === $.SEMICOLON) {
|
---|
2060 | this._err(ERR.unknownNamedCharacterReference);
|
---|
2061 | }
|
---|
2062 |
|
---|
2063 | this._reconsumeInState(this.returnState);
|
---|
2064 | }
|
---|
2065 | }
|
---|
2066 |
|
---|
2067 | // Numeric character reference state
|
---|
2068 | //------------------------------------------------------------------
|
---|
2069 | [NUMERIC_CHARACTER_REFERENCE_STATE](cp) {
|
---|
2070 | this.charRefCode = 0;
|
---|
2071 |
|
---|
2072 | if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) {
|
---|
2073 | this.tempBuff.push(cp);
|
---|
2074 | this.state = HEXADEMICAL_CHARACTER_REFERENCE_START_STATE;
|
---|
2075 | } else {
|
---|
2076 | this._reconsumeInState(DECIMAL_CHARACTER_REFERENCE_START_STATE);
|
---|
2077 | }
|
---|
2078 | }
|
---|
2079 |
|
---|
2080 | // Hexademical character reference start state
|
---|
2081 | //------------------------------------------------------------------
|
---|
2082 | [HEXADEMICAL_CHARACTER_REFERENCE_START_STATE](cp) {
|
---|
2083 | if (isAsciiHexDigit(cp)) {
|
---|
2084 | this._reconsumeInState(HEXADEMICAL_CHARACTER_REFERENCE_STATE);
|
---|
2085 | } else {
|
---|
2086 | this._err(ERR.absenceOfDigitsInNumericCharacterReference);
|
---|
2087 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2088 | this._reconsumeInState(this.returnState);
|
---|
2089 | }
|
---|
2090 | }
|
---|
2091 |
|
---|
2092 | // Decimal character reference start state
|
---|
2093 | //------------------------------------------------------------------
|
---|
2094 | [DECIMAL_CHARACTER_REFERENCE_START_STATE](cp) {
|
---|
2095 | if (isAsciiDigit(cp)) {
|
---|
2096 | this._reconsumeInState(DECIMAL_CHARACTER_REFERENCE_STATE);
|
---|
2097 | } else {
|
---|
2098 | this._err(ERR.absenceOfDigitsInNumericCharacterReference);
|
---|
2099 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2100 | this._reconsumeInState(this.returnState);
|
---|
2101 | }
|
---|
2102 | }
|
---|
2103 |
|
---|
2104 | // Hexademical character reference state
|
---|
2105 | //------------------------------------------------------------------
|
---|
2106 | [HEXADEMICAL_CHARACTER_REFERENCE_STATE](cp) {
|
---|
2107 | if (isAsciiUpperHexDigit(cp)) {
|
---|
2108 | this.charRefCode = this.charRefCode * 16 + cp - 0x37;
|
---|
2109 | } else if (isAsciiLowerHexDigit(cp)) {
|
---|
2110 | this.charRefCode = this.charRefCode * 16 + cp - 0x57;
|
---|
2111 | } else if (isAsciiDigit(cp)) {
|
---|
2112 | this.charRefCode = this.charRefCode * 16 + cp - 0x30;
|
---|
2113 | } else if (cp === $.SEMICOLON) {
|
---|
2114 | this.state = NUMERIC_CHARACTER_REFERENCE_END_STATE;
|
---|
2115 | } else {
|
---|
2116 | this._err(ERR.missingSemicolonAfterCharacterReference);
|
---|
2117 | this._reconsumeInState(NUMERIC_CHARACTER_REFERENCE_END_STATE);
|
---|
2118 | }
|
---|
2119 | }
|
---|
2120 |
|
---|
2121 | // Decimal character reference state
|
---|
2122 | //------------------------------------------------------------------
|
---|
2123 | [DECIMAL_CHARACTER_REFERENCE_STATE](cp) {
|
---|
2124 | if (isAsciiDigit(cp)) {
|
---|
2125 | this.charRefCode = this.charRefCode * 10 + cp - 0x30;
|
---|
2126 | } else if (cp === $.SEMICOLON) {
|
---|
2127 | this.state = NUMERIC_CHARACTER_REFERENCE_END_STATE;
|
---|
2128 | } else {
|
---|
2129 | this._err(ERR.missingSemicolonAfterCharacterReference);
|
---|
2130 | this._reconsumeInState(NUMERIC_CHARACTER_REFERENCE_END_STATE);
|
---|
2131 | }
|
---|
2132 | }
|
---|
2133 |
|
---|
2134 | // Numeric character reference end state
|
---|
2135 | //------------------------------------------------------------------
|
---|
2136 | [NUMERIC_CHARACTER_REFERENCE_END_STATE]() {
|
---|
2137 | if (this.charRefCode === $.NULL) {
|
---|
2138 | this._err(ERR.nullCharacterReference);
|
---|
2139 | this.charRefCode = $.REPLACEMENT_CHARACTER;
|
---|
2140 | } else if (this.charRefCode > 0x10ffff) {
|
---|
2141 | this._err(ERR.characterReferenceOutsideUnicodeRange);
|
---|
2142 | this.charRefCode = $.REPLACEMENT_CHARACTER;
|
---|
2143 | } else if (unicode.isSurrogate(this.charRefCode)) {
|
---|
2144 | this._err(ERR.surrogateCharacterReference);
|
---|
2145 | this.charRefCode = $.REPLACEMENT_CHARACTER;
|
---|
2146 | } else if (unicode.isUndefinedCodePoint(this.charRefCode)) {
|
---|
2147 | this._err(ERR.noncharacterCharacterReference);
|
---|
2148 | } else if (unicode.isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) {
|
---|
2149 | this._err(ERR.controlCharacterReference);
|
---|
2150 |
|
---|
2151 | const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS[this.charRefCode];
|
---|
2152 |
|
---|
2153 | if (replacement) {
|
---|
2154 | this.charRefCode = replacement;
|
---|
2155 | }
|
---|
2156 | }
|
---|
2157 |
|
---|
2158 | this.tempBuff = [this.charRefCode];
|
---|
2159 |
|
---|
2160 | this._flushCodePointsConsumedAsCharacterReference();
|
---|
2161 | this._reconsumeInState(this.returnState);
|
---|
2162 | }
|
---|
2163 | }
|
---|
2164 |
|
---|
2165 | //Token types
|
---|
2166 | Tokenizer.CHARACTER_TOKEN = 'CHARACTER_TOKEN';
|
---|
2167 | Tokenizer.NULL_CHARACTER_TOKEN = 'NULL_CHARACTER_TOKEN';
|
---|
2168 | Tokenizer.WHITESPACE_CHARACTER_TOKEN = 'WHITESPACE_CHARACTER_TOKEN';
|
---|
2169 | Tokenizer.START_TAG_TOKEN = 'START_TAG_TOKEN';
|
---|
2170 | Tokenizer.END_TAG_TOKEN = 'END_TAG_TOKEN';
|
---|
2171 | Tokenizer.COMMENT_TOKEN = 'COMMENT_TOKEN';
|
---|
2172 | Tokenizer.DOCTYPE_TOKEN = 'DOCTYPE_TOKEN';
|
---|
2173 | Tokenizer.EOF_TOKEN = 'EOF_TOKEN';
|
---|
2174 | Tokenizer.HIBERNATION_TOKEN = 'HIBERNATION_TOKEN';
|
---|
2175 |
|
---|
2176 | //Tokenizer initial states for different modes
|
---|
2177 | Tokenizer.MODE = {
|
---|
2178 | DATA: DATA_STATE,
|
---|
2179 | RCDATA: RCDATA_STATE,
|
---|
2180 | RAWTEXT: RAWTEXT_STATE,
|
---|
2181 | SCRIPT_DATA: SCRIPT_DATA_STATE,
|
---|
2182 | PLAINTEXT: PLAINTEXT_STATE
|
---|
2183 | };
|
---|
2184 |
|
---|
2185 | //Static
|
---|
2186 | Tokenizer.getTokenAttr = function(token, attrName) {
|
---|
2187 | for (let i = token.attrs.length - 1; i >= 0; i--) {
|
---|
2188 | if (token.attrs[i].name === attrName) {
|
---|
2189 | return token.attrs[i].value;
|
---|
2190 | }
|
---|
2191 | }
|
---|
2192 |
|
---|
2193 | return null;
|
---|
2194 | };
|
---|
2195 |
|
---|
2196 | module.exports = Tokenizer;
|
---|