source: imaps-frontend/node_modules/htmlparser2/lib/Tokenizer.js@ 79a0317

main
Last change on this file since 79a0317 was 79a0317, checked in by stefan toskovski <stefantoska84@…>, 4 days ago

F4 Finalna Verzija

  • Property mode set to 100644
File size: 34.8 KB
RevLine 
[79a0317]1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
3 return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint"));
7var entities_json_1 = __importDefault(require("entities/lib/maps/entities.json"));
8var legacy_json_1 = __importDefault(require("entities/lib/maps/legacy.json"));
9var xml_json_1 = __importDefault(require("entities/lib/maps/xml.json"));
10function whitespace(c) {
11 return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
12}
13function isASCIIAlpha(c) {
14 return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z");
15}
16function ifElseState(upper, SUCCESS, FAILURE) {
17 var lower = upper.toLowerCase();
18 if (upper === lower) {
19 return function (t, c) {
20 if (c === lower) {
21 t._state = SUCCESS;
22 }
23 else {
24 t._state = FAILURE;
25 t._index--;
26 }
27 };
28 }
29 return function (t, c) {
30 if (c === lower || c === upper) {
31 t._state = SUCCESS;
32 }
33 else {
34 t._state = FAILURE;
35 t._index--;
36 }
37 };
38}
39function consumeSpecialNameChar(upper, NEXT_STATE) {
40 var lower = upper.toLowerCase();
41 return function (t, c) {
42 if (c === lower || c === upper) {
43 t._state = NEXT_STATE;
44 }
45 else {
46 t._state = 3 /* InTagName */;
47 t._index--; // Consume the token again
48 }
49 };
50}
51var stateBeforeCdata1 = ifElseState("C", 24 /* BeforeCdata2 */, 16 /* InDeclaration */);
52var stateBeforeCdata2 = ifElseState("D", 25 /* BeforeCdata3 */, 16 /* InDeclaration */);
53var stateBeforeCdata3 = ifElseState("A", 26 /* BeforeCdata4 */, 16 /* InDeclaration */);
54var stateBeforeCdata4 = ifElseState("T", 27 /* BeforeCdata5 */, 16 /* InDeclaration */);
55var stateBeforeCdata5 = ifElseState("A", 28 /* BeforeCdata6 */, 16 /* InDeclaration */);
56var stateBeforeScript1 = consumeSpecialNameChar("R", 35 /* BeforeScript2 */);
57var stateBeforeScript2 = consumeSpecialNameChar("I", 36 /* BeforeScript3 */);
58var stateBeforeScript3 = consumeSpecialNameChar("P", 37 /* BeforeScript4 */);
59var stateBeforeScript4 = consumeSpecialNameChar("T", 38 /* BeforeScript5 */);
60var stateAfterScript1 = ifElseState("R", 40 /* AfterScript2 */, 1 /* Text */);
61var stateAfterScript2 = ifElseState("I", 41 /* AfterScript3 */, 1 /* Text */);
62var stateAfterScript3 = ifElseState("P", 42 /* AfterScript4 */, 1 /* Text */);
63var stateAfterScript4 = ifElseState("T", 43 /* AfterScript5 */, 1 /* Text */);
64var stateBeforeStyle1 = consumeSpecialNameChar("Y", 45 /* BeforeStyle2 */);
65var stateBeforeStyle2 = consumeSpecialNameChar("L", 46 /* BeforeStyle3 */);
66var stateBeforeStyle3 = consumeSpecialNameChar("E", 47 /* BeforeStyle4 */);
67var stateAfterStyle1 = ifElseState("Y", 49 /* AfterStyle2 */, 1 /* Text */);
68var stateAfterStyle2 = ifElseState("L", 50 /* AfterStyle3 */, 1 /* Text */);
69var stateAfterStyle3 = ifElseState("E", 51 /* AfterStyle4 */, 1 /* Text */);
70var stateBeforeSpecialT = consumeSpecialNameChar("I", 54 /* BeforeTitle1 */);
71var stateBeforeTitle1 = consumeSpecialNameChar("T", 55 /* BeforeTitle2 */);
72var stateBeforeTitle2 = consumeSpecialNameChar("L", 56 /* BeforeTitle3 */);
73var stateBeforeTitle3 = consumeSpecialNameChar("E", 57 /* BeforeTitle4 */);
74var stateAfterSpecialTEnd = ifElseState("I", 58 /* AfterTitle1 */, 1 /* Text */);
75var stateAfterTitle1 = ifElseState("T", 59 /* AfterTitle2 */, 1 /* Text */);
76var stateAfterTitle2 = ifElseState("L", 60 /* AfterTitle3 */, 1 /* Text */);
77var stateAfterTitle3 = ifElseState("E", 61 /* AfterTitle4 */, 1 /* Text */);
78var stateBeforeEntity = ifElseState("#", 63 /* BeforeNumericEntity */, 64 /* InNamedEntity */);
79var stateBeforeNumericEntity = ifElseState("X", 66 /* InHexEntity */, 65 /* InNumericEntity */);
80var Tokenizer = /** @class */ (function () {
81 function Tokenizer(options, cbs) {
82 var _a;
83 /** The current state the tokenizer is in. */
84 this._state = 1 /* Text */;
85 /** The read buffer. */
86 this.buffer = "";
87 /** The beginning of the section that is currently being read. */
88 this.sectionStart = 0;
89 /** The index within the buffer that we are currently looking at. */
90 this._index = 0;
91 /**
92 * Data that has already been processed will be removed from the buffer occasionally.
93 * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
94 */
95 this.bufferOffset = 0;
96 /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
97 this.baseState = 1 /* Text */;
98 /** For special parsing behavior inside of script and style tags. */
99 this.special = 1 /* None */;
100 /** Indicates whether the tokenizer has been paused. */
101 this.running = true;
102 /** Indicates whether the tokenizer has finished running / `.end` has been called. */
103 this.ended = false;
104 this.cbs = cbs;
105 this.xmlMode = !!(options === null || options === void 0 ? void 0 : options.xmlMode);
106 this.decodeEntities = (_a = options === null || options === void 0 ? void 0 : options.decodeEntities) !== null && _a !== void 0 ? _a : true;
107 }
108 Tokenizer.prototype.reset = function () {
109 this._state = 1 /* Text */;
110 this.buffer = "";
111 this.sectionStart = 0;
112 this._index = 0;
113 this.bufferOffset = 0;
114 this.baseState = 1 /* Text */;
115 this.special = 1 /* None */;
116 this.running = true;
117 this.ended = false;
118 };
119 Tokenizer.prototype.write = function (chunk) {
120 if (this.ended)
121 this.cbs.onerror(Error(".write() after done!"));
122 this.buffer += chunk;
123 this.parse();
124 };
125 Tokenizer.prototype.end = function (chunk) {
126 if (this.ended)
127 this.cbs.onerror(Error(".end() after done!"));
128 if (chunk)
129 this.write(chunk);
130 this.ended = true;
131 if (this.running)
132 this.finish();
133 };
134 Tokenizer.prototype.pause = function () {
135 this.running = false;
136 };
137 Tokenizer.prototype.resume = function () {
138 this.running = true;
139 if (this._index < this.buffer.length) {
140 this.parse();
141 }
142 if (this.ended) {
143 this.finish();
144 }
145 };
146 /**
147 * The current index within all of the written data.
148 */
149 Tokenizer.prototype.getAbsoluteIndex = function () {
150 return this.bufferOffset + this._index;
151 };
152 Tokenizer.prototype.stateText = function (c) {
153 if (c === "<") {
154 if (this._index > this.sectionStart) {
155 this.cbs.ontext(this.getSection());
156 }
157 this._state = 2 /* BeforeTagName */;
158 this.sectionStart = this._index;
159 }
160 else if (this.decodeEntities &&
161 c === "&" &&
162 (this.special === 1 /* None */ || this.special === 4 /* Title */)) {
163 if (this._index > this.sectionStart) {
164 this.cbs.ontext(this.getSection());
165 }
166 this.baseState = 1 /* Text */;
167 this._state = 62 /* BeforeEntity */;
168 this.sectionStart = this._index;
169 }
170 };
171 /**
172 * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
173 *
174 * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
175 * We allow anything that wouldn't end the tag.
176 */
177 Tokenizer.prototype.isTagStartChar = function (c) {
178 return (isASCIIAlpha(c) ||
179 (this.xmlMode && !whitespace(c) && c !== "/" && c !== ">"));
180 };
181 Tokenizer.prototype.stateBeforeTagName = function (c) {
182 if (c === "/") {
183 this._state = 5 /* BeforeClosingTagName */;
184 }
185 else if (c === "<") {
186 this.cbs.ontext(this.getSection());
187 this.sectionStart = this._index;
188 }
189 else if (c === ">" ||
190 this.special !== 1 /* None */ ||
191 whitespace(c)) {
192 this._state = 1 /* Text */;
193 }
194 else if (c === "!") {
195 this._state = 15 /* BeforeDeclaration */;
196 this.sectionStart = this._index + 1;
197 }
198 else if (c === "?") {
199 this._state = 17 /* InProcessingInstruction */;
200 this.sectionStart = this._index + 1;
201 }
202 else if (!this.isTagStartChar(c)) {
203 this._state = 1 /* Text */;
204 }
205 else {
206 this._state =
207 !this.xmlMode && (c === "s" || c === "S")
208 ? 32 /* BeforeSpecialS */
209 : !this.xmlMode && (c === "t" || c === "T")
210 ? 52 /* BeforeSpecialT */
211 : 3 /* InTagName */;
212 this.sectionStart = this._index;
213 }
214 };
215 Tokenizer.prototype.stateInTagName = function (c) {
216 if (c === "/" || c === ">" || whitespace(c)) {
217 this.emitToken("onopentagname");
218 this._state = 8 /* BeforeAttributeName */;
219 this._index--;
220 }
221 };
222 Tokenizer.prototype.stateBeforeClosingTagName = function (c) {
223 if (whitespace(c)) {
224 // Ignore
225 }
226 else if (c === ">") {
227 this._state = 1 /* Text */;
228 }
229 else if (this.special !== 1 /* None */) {
230 if (this.special !== 4 /* Title */ && (c === "s" || c === "S")) {
231 this._state = 33 /* BeforeSpecialSEnd */;
232 }
233 else if (this.special === 4 /* Title */ &&
234 (c === "t" || c === "T")) {
235 this._state = 53 /* BeforeSpecialTEnd */;
236 }
237 else {
238 this._state = 1 /* Text */;
239 this._index--;
240 }
241 }
242 else if (!this.isTagStartChar(c)) {
243 this._state = 20 /* InSpecialComment */;
244 this.sectionStart = this._index;
245 }
246 else {
247 this._state = 6 /* InClosingTagName */;
248 this.sectionStart = this._index;
249 }
250 };
251 Tokenizer.prototype.stateInClosingTagName = function (c) {
252 if (c === ">" || whitespace(c)) {
253 this.emitToken("onclosetag");
254 this._state = 7 /* AfterClosingTagName */;
255 this._index--;
256 }
257 };
258 Tokenizer.prototype.stateAfterClosingTagName = function (c) {
259 // Skip everything until ">"
260 if (c === ">") {
261 this._state = 1 /* Text */;
262 this.sectionStart = this._index + 1;
263 }
264 };
265 Tokenizer.prototype.stateBeforeAttributeName = function (c) {
266 if (c === ">") {
267 this.cbs.onopentagend();
268 this._state = 1 /* Text */;
269 this.sectionStart = this._index + 1;
270 }
271 else if (c === "/") {
272 this._state = 4 /* InSelfClosingTag */;
273 }
274 else if (!whitespace(c)) {
275 this._state = 9 /* InAttributeName */;
276 this.sectionStart = this._index;
277 }
278 };
279 Tokenizer.prototype.stateInSelfClosingTag = function (c) {
280 if (c === ">") {
281 this.cbs.onselfclosingtag();
282 this._state = 1 /* Text */;
283 this.sectionStart = this._index + 1;
284 this.special = 1 /* None */; // Reset special state, in case of self-closing special tags
285 }
286 else if (!whitespace(c)) {
287 this._state = 8 /* BeforeAttributeName */;
288 this._index--;
289 }
290 };
291 Tokenizer.prototype.stateInAttributeName = function (c) {
292 if (c === "=" || c === "/" || c === ">" || whitespace(c)) {
293 this.cbs.onattribname(this.getSection());
294 this.sectionStart = -1;
295 this._state = 10 /* AfterAttributeName */;
296 this._index--;
297 }
298 };
299 Tokenizer.prototype.stateAfterAttributeName = function (c) {
300 if (c === "=") {
301 this._state = 11 /* BeforeAttributeValue */;
302 }
303 else if (c === "/" || c === ">") {
304 this.cbs.onattribend(undefined);
305 this._state = 8 /* BeforeAttributeName */;
306 this._index--;
307 }
308 else if (!whitespace(c)) {
309 this.cbs.onattribend(undefined);
310 this._state = 9 /* InAttributeName */;
311 this.sectionStart = this._index;
312 }
313 };
314 Tokenizer.prototype.stateBeforeAttributeValue = function (c) {
315 if (c === '"') {
316 this._state = 12 /* InAttributeValueDq */;
317 this.sectionStart = this._index + 1;
318 }
319 else if (c === "'") {
320 this._state = 13 /* InAttributeValueSq */;
321 this.sectionStart = this._index + 1;
322 }
323 else if (!whitespace(c)) {
324 this._state = 14 /* InAttributeValueNq */;
325 this.sectionStart = this._index;
326 this._index--; // Reconsume token
327 }
328 };
329 Tokenizer.prototype.handleInAttributeValue = function (c, quote) {
330 if (c === quote) {
331 this.emitToken("onattribdata");
332 this.cbs.onattribend(quote);
333 this._state = 8 /* BeforeAttributeName */;
334 }
335 else if (this.decodeEntities && c === "&") {
336 this.emitToken("onattribdata");
337 this.baseState = this._state;
338 this._state = 62 /* BeforeEntity */;
339 this.sectionStart = this._index;
340 }
341 };
342 Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) {
343 this.handleInAttributeValue(c, '"');
344 };
345 Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) {
346 this.handleInAttributeValue(c, "'");
347 };
348 Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) {
349 if (whitespace(c) || c === ">") {
350 this.emitToken("onattribdata");
351 this.cbs.onattribend(null);
352 this._state = 8 /* BeforeAttributeName */;
353 this._index--;
354 }
355 else if (this.decodeEntities && c === "&") {
356 this.emitToken("onattribdata");
357 this.baseState = this._state;
358 this._state = 62 /* BeforeEntity */;
359 this.sectionStart = this._index;
360 }
361 };
362 Tokenizer.prototype.stateBeforeDeclaration = function (c) {
363 this._state =
364 c === "["
365 ? 23 /* BeforeCdata1 */
366 : c === "-"
367 ? 18 /* BeforeComment */
368 : 16 /* InDeclaration */;
369 };
370 Tokenizer.prototype.stateInDeclaration = function (c) {
371 if (c === ">") {
372 this.cbs.ondeclaration(this.getSection());
373 this._state = 1 /* Text */;
374 this.sectionStart = this._index + 1;
375 }
376 };
377 Tokenizer.prototype.stateInProcessingInstruction = function (c) {
378 if (c === ">") {
379 this.cbs.onprocessinginstruction(this.getSection());
380 this._state = 1 /* Text */;
381 this.sectionStart = this._index + 1;
382 }
383 };
384 Tokenizer.prototype.stateBeforeComment = function (c) {
385 if (c === "-") {
386 this._state = 19 /* InComment */;
387 this.sectionStart = this._index + 1;
388 }
389 else {
390 this._state = 16 /* InDeclaration */;
391 }
392 };
393 Tokenizer.prototype.stateInComment = function (c) {
394 if (c === "-")
395 this._state = 21 /* AfterComment1 */;
396 };
397 Tokenizer.prototype.stateInSpecialComment = function (c) {
398 if (c === ">") {
399 this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index));
400 this._state = 1 /* Text */;
401 this.sectionStart = this._index + 1;
402 }
403 };
404 Tokenizer.prototype.stateAfterComment1 = function (c) {
405 if (c === "-") {
406 this._state = 22 /* AfterComment2 */;
407 }
408 else {
409 this._state = 19 /* InComment */;
410 }
411 };
412 Tokenizer.prototype.stateAfterComment2 = function (c) {
413 if (c === ">") {
414 // Remove 2 trailing chars
415 this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index - 2));
416 this._state = 1 /* Text */;
417 this.sectionStart = this._index + 1;
418 }
419 else if (c !== "-") {
420 this._state = 19 /* InComment */;
421 }
422 // Else: stay in AFTER_COMMENT_2 (`--->`)
423 };
424 Tokenizer.prototype.stateBeforeCdata6 = function (c) {
425 if (c === "[") {
426 this._state = 29 /* InCdata */;
427 this.sectionStart = this._index + 1;
428 }
429 else {
430 this._state = 16 /* InDeclaration */;
431 this._index--;
432 }
433 };
434 Tokenizer.prototype.stateInCdata = function (c) {
435 if (c === "]")
436 this._state = 30 /* AfterCdata1 */;
437 };
438 Tokenizer.prototype.stateAfterCdata1 = function (c) {
439 if (c === "]")
440 this._state = 31 /* AfterCdata2 */;
441 else
442 this._state = 29 /* InCdata */;
443 };
444 Tokenizer.prototype.stateAfterCdata2 = function (c) {
445 if (c === ">") {
446 // Remove 2 trailing chars
447 this.cbs.oncdata(this.buffer.substring(this.sectionStart, this._index - 2));
448 this._state = 1 /* Text */;
449 this.sectionStart = this._index + 1;
450 }
451 else if (c !== "]") {
452 this._state = 29 /* InCdata */;
453 }
454 // Else: stay in AFTER_CDATA_2 (`]]]>`)
455 };
456 Tokenizer.prototype.stateBeforeSpecialS = function (c) {
457 if (c === "c" || c === "C") {
458 this._state = 34 /* BeforeScript1 */;
459 }
460 else if (c === "t" || c === "T") {
461 this._state = 44 /* BeforeStyle1 */;
462 }
463 else {
464 this._state = 3 /* InTagName */;
465 this._index--; // Consume the token again
466 }
467 };
468 Tokenizer.prototype.stateBeforeSpecialSEnd = function (c) {
469 if (this.special === 2 /* Script */ && (c === "c" || c === "C")) {
470 this._state = 39 /* AfterScript1 */;
471 }
472 else if (this.special === 3 /* Style */ && (c === "t" || c === "T")) {
473 this._state = 48 /* AfterStyle1 */;
474 }
475 else
476 this._state = 1 /* Text */;
477 };
478 Tokenizer.prototype.stateBeforeSpecialLast = function (c, special) {
479 if (c === "/" || c === ">" || whitespace(c)) {
480 this.special = special;
481 }
482 this._state = 3 /* InTagName */;
483 this._index--; // Consume the token again
484 };
485 Tokenizer.prototype.stateAfterSpecialLast = function (c, sectionStartOffset) {
486 if (c === ">" || whitespace(c)) {
487 this.special = 1 /* None */;
488 this._state = 6 /* InClosingTagName */;
489 this.sectionStart = this._index - sectionStartOffset;
490 this._index--; // Reconsume the token
491 }
492 else
493 this._state = 1 /* Text */;
494 };
495 // For entities terminated with a semicolon
496 Tokenizer.prototype.parseFixedEntity = function (map) {
497 if (map === void 0) { map = this.xmlMode ? xml_json_1.default : entities_json_1.default; }
498 // Offset = 1
499 if (this.sectionStart + 1 < this._index) {
500 var entity = this.buffer.substring(this.sectionStart + 1, this._index);
501 if (Object.prototype.hasOwnProperty.call(map, entity)) {
502 this.emitPartial(map[entity]);
503 this.sectionStart = this._index + 1;
504 }
505 }
506 };
507 // Parses legacy entities (without trailing semicolon)
508 Tokenizer.prototype.parseLegacyEntity = function () {
509 var start = this.sectionStart + 1;
510 // The max length of legacy entities is 6
511 var limit = Math.min(this._index - start, 6);
512 while (limit >= 2) {
513 // The min length of legacy entities is 2
514 var entity = this.buffer.substr(start, limit);
515 if (Object.prototype.hasOwnProperty.call(legacy_json_1.default, entity)) {
516 this.emitPartial(legacy_json_1.default[entity]);
517 this.sectionStart += limit + 1;
518 return;
519 }
520 limit--;
521 }
522 };
523 Tokenizer.prototype.stateInNamedEntity = function (c) {
524 if (c === ";") {
525 this.parseFixedEntity();
526 // Retry as legacy entity if entity wasn't parsed
527 if (this.baseState === 1 /* Text */ &&
528 this.sectionStart + 1 < this._index &&
529 !this.xmlMode) {
530 this.parseLegacyEntity();
531 }
532 this._state = this.baseState;
533 }
534 else if ((c < "0" || c > "9") && !isASCIIAlpha(c)) {
535 if (this.xmlMode || this.sectionStart + 1 === this._index) {
536 // Ignore
537 }
538 else if (this.baseState !== 1 /* Text */) {
539 if (c !== "=") {
540 // Parse as legacy entity, without allowing additional characters.
541 this.parseFixedEntity(legacy_json_1.default);
542 }
543 }
544 else {
545 this.parseLegacyEntity();
546 }
547 this._state = this.baseState;
548 this._index--;
549 }
550 };
551 Tokenizer.prototype.decodeNumericEntity = function (offset, base, strict) {
552 var sectionStart = this.sectionStart + offset;
553 if (sectionStart !== this._index) {
554 // Parse entity
555 var entity = this.buffer.substring(sectionStart, this._index);
556 var parsed = parseInt(entity, base);
557 this.emitPartial(decode_codepoint_1.default(parsed));
558 this.sectionStart = strict ? this._index + 1 : this._index;
559 }
560 this._state = this.baseState;
561 };
562 Tokenizer.prototype.stateInNumericEntity = function (c) {
563 if (c === ";") {
564 this.decodeNumericEntity(2, 10, true);
565 }
566 else if (c < "0" || c > "9") {
567 if (!this.xmlMode) {
568 this.decodeNumericEntity(2, 10, false);
569 }
570 else {
571 this._state = this.baseState;
572 }
573 this._index--;
574 }
575 };
576 Tokenizer.prototype.stateInHexEntity = function (c) {
577 if (c === ";") {
578 this.decodeNumericEntity(3, 16, true);
579 }
580 else if ((c < "a" || c > "f") &&
581 (c < "A" || c > "F") &&
582 (c < "0" || c > "9")) {
583 if (!this.xmlMode) {
584 this.decodeNumericEntity(3, 16, false);
585 }
586 else {
587 this._state = this.baseState;
588 }
589 this._index--;
590 }
591 };
592 Tokenizer.prototype.cleanup = function () {
593 if (this.sectionStart < 0) {
594 this.buffer = "";
595 this.bufferOffset += this._index;
596 this._index = 0;
597 }
598 else if (this.running) {
599 if (this._state === 1 /* Text */) {
600 if (this.sectionStart !== this._index) {
601 this.cbs.ontext(this.buffer.substr(this.sectionStart));
602 }
603 this.buffer = "";
604 this.bufferOffset += this._index;
605 this._index = 0;
606 }
607 else if (this.sectionStart === this._index) {
608 // The section just started
609 this.buffer = "";
610 this.bufferOffset += this._index;
611 this._index = 0;
612 }
613 else {
614 // Remove everything unnecessary
615 this.buffer = this.buffer.substr(this.sectionStart);
616 this._index -= this.sectionStart;
617 this.bufferOffset += this.sectionStart;
618 }
619 this.sectionStart = 0;
620 }
621 };
622 /**
623 * Iterates through the buffer, calling the function corresponding to the current state.
624 *
625 * States that are more likely to be hit are higher up, as a performance improvement.
626 */
627 Tokenizer.prototype.parse = function () {
628 while (this._index < this.buffer.length && this.running) {
629 var c = this.buffer.charAt(this._index);
630 if (this._state === 1 /* Text */) {
631 this.stateText(c);
632 }
633 else if (this._state === 12 /* InAttributeValueDq */) {
634 this.stateInAttributeValueDoubleQuotes(c);
635 }
636 else if (this._state === 9 /* InAttributeName */) {
637 this.stateInAttributeName(c);
638 }
639 else if (this._state === 19 /* InComment */) {
640 this.stateInComment(c);
641 }
642 else if (this._state === 20 /* InSpecialComment */) {
643 this.stateInSpecialComment(c);
644 }
645 else if (this._state === 8 /* BeforeAttributeName */) {
646 this.stateBeforeAttributeName(c);
647 }
648 else if (this._state === 3 /* InTagName */) {
649 this.stateInTagName(c);
650 }
651 else if (this._state === 6 /* InClosingTagName */) {
652 this.stateInClosingTagName(c);
653 }
654 else if (this._state === 2 /* BeforeTagName */) {
655 this.stateBeforeTagName(c);
656 }
657 else if (this._state === 10 /* AfterAttributeName */) {
658 this.stateAfterAttributeName(c);
659 }
660 else if (this._state === 13 /* InAttributeValueSq */) {
661 this.stateInAttributeValueSingleQuotes(c);
662 }
663 else if (this._state === 11 /* BeforeAttributeValue */) {
664 this.stateBeforeAttributeValue(c);
665 }
666 else if (this._state === 5 /* BeforeClosingTagName */) {
667 this.stateBeforeClosingTagName(c);
668 }
669 else if (this._state === 7 /* AfterClosingTagName */) {
670 this.stateAfterClosingTagName(c);
671 }
672 else if (this._state === 32 /* BeforeSpecialS */) {
673 this.stateBeforeSpecialS(c);
674 }
675 else if (this._state === 21 /* AfterComment1 */) {
676 this.stateAfterComment1(c);
677 }
678 else if (this._state === 14 /* InAttributeValueNq */) {
679 this.stateInAttributeValueNoQuotes(c);
680 }
681 else if (this._state === 4 /* InSelfClosingTag */) {
682 this.stateInSelfClosingTag(c);
683 }
684 else if (this._state === 16 /* InDeclaration */) {
685 this.stateInDeclaration(c);
686 }
687 else if (this._state === 15 /* BeforeDeclaration */) {
688 this.stateBeforeDeclaration(c);
689 }
690 else if (this._state === 22 /* AfterComment2 */) {
691 this.stateAfterComment2(c);
692 }
693 else if (this._state === 18 /* BeforeComment */) {
694 this.stateBeforeComment(c);
695 }
696 else if (this._state === 33 /* BeforeSpecialSEnd */) {
697 this.stateBeforeSpecialSEnd(c);
698 }
699 else if (this._state === 53 /* BeforeSpecialTEnd */) {
700 stateAfterSpecialTEnd(this, c);
701 }
702 else if (this._state === 39 /* AfterScript1 */) {
703 stateAfterScript1(this, c);
704 }
705 else if (this._state === 40 /* AfterScript2 */) {
706 stateAfterScript2(this, c);
707 }
708 else if (this._state === 41 /* AfterScript3 */) {
709 stateAfterScript3(this, c);
710 }
711 else if (this._state === 34 /* BeforeScript1 */) {
712 stateBeforeScript1(this, c);
713 }
714 else if (this._state === 35 /* BeforeScript2 */) {
715 stateBeforeScript2(this, c);
716 }
717 else if (this._state === 36 /* BeforeScript3 */) {
718 stateBeforeScript3(this, c);
719 }
720 else if (this._state === 37 /* BeforeScript4 */) {
721 stateBeforeScript4(this, c);
722 }
723 else if (this._state === 38 /* BeforeScript5 */) {
724 this.stateBeforeSpecialLast(c, 2 /* Script */);
725 }
726 else if (this._state === 42 /* AfterScript4 */) {
727 stateAfterScript4(this, c);
728 }
729 else if (this._state === 43 /* AfterScript5 */) {
730 this.stateAfterSpecialLast(c, 6);
731 }
732 else if (this._state === 44 /* BeforeStyle1 */) {
733 stateBeforeStyle1(this, c);
734 }
735 else if (this._state === 29 /* InCdata */) {
736 this.stateInCdata(c);
737 }
738 else if (this._state === 45 /* BeforeStyle2 */) {
739 stateBeforeStyle2(this, c);
740 }
741 else if (this._state === 46 /* BeforeStyle3 */) {
742 stateBeforeStyle3(this, c);
743 }
744 else if (this._state === 47 /* BeforeStyle4 */) {
745 this.stateBeforeSpecialLast(c, 3 /* Style */);
746 }
747 else if (this._state === 48 /* AfterStyle1 */) {
748 stateAfterStyle1(this, c);
749 }
750 else if (this._state === 49 /* AfterStyle2 */) {
751 stateAfterStyle2(this, c);
752 }
753 else if (this._state === 50 /* AfterStyle3 */) {
754 stateAfterStyle3(this, c);
755 }
756 else if (this._state === 51 /* AfterStyle4 */) {
757 this.stateAfterSpecialLast(c, 5);
758 }
759 else if (this._state === 52 /* BeforeSpecialT */) {
760 stateBeforeSpecialT(this, c);
761 }
762 else if (this._state === 54 /* BeforeTitle1 */) {
763 stateBeforeTitle1(this, c);
764 }
765 else if (this._state === 55 /* BeforeTitle2 */) {
766 stateBeforeTitle2(this, c);
767 }
768 else if (this._state === 56 /* BeforeTitle3 */) {
769 stateBeforeTitle3(this, c);
770 }
771 else if (this._state === 57 /* BeforeTitle4 */) {
772 this.stateBeforeSpecialLast(c, 4 /* Title */);
773 }
774 else if (this._state === 58 /* AfterTitle1 */) {
775 stateAfterTitle1(this, c);
776 }
777 else if (this._state === 59 /* AfterTitle2 */) {
778 stateAfterTitle2(this, c);
779 }
780 else if (this._state === 60 /* AfterTitle3 */) {
781 stateAfterTitle3(this, c);
782 }
783 else if (this._state === 61 /* AfterTitle4 */) {
784 this.stateAfterSpecialLast(c, 5);
785 }
786 else if (this._state === 17 /* InProcessingInstruction */) {
787 this.stateInProcessingInstruction(c);
788 }
789 else if (this._state === 64 /* InNamedEntity */) {
790 this.stateInNamedEntity(c);
791 }
792 else if (this._state === 23 /* BeforeCdata1 */) {
793 stateBeforeCdata1(this, c);
794 }
795 else if (this._state === 62 /* BeforeEntity */) {
796 stateBeforeEntity(this, c);
797 }
798 else if (this._state === 24 /* BeforeCdata2 */) {
799 stateBeforeCdata2(this, c);
800 }
801 else if (this._state === 25 /* BeforeCdata3 */) {
802 stateBeforeCdata3(this, c);
803 }
804 else if (this._state === 30 /* AfterCdata1 */) {
805 this.stateAfterCdata1(c);
806 }
807 else if (this._state === 31 /* AfterCdata2 */) {
808 this.stateAfterCdata2(c);
809 }
810 else if (this._state === 26 /* BeforeCdata4 */) {
811 stateBeforeCdata4(this, c);
812 }
813 else if (this._state === 27 /* BeforeCdata5 */) {
814 stateBeforeCdata5(this, c);
815 }
816 else if (this._state === 28 /* BeforeCdata6 */) {
817 this.stateBeforeCdata6(c);
818 }
819 else if (this._state === 66 /* InHexEntity */) {
820 this.stateInHexEntity(c);
821 }
822 else if (this._state === 65 /* InNumericEntity */) {
823 this.stateInNumericEntity(c);
824 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
825 }
826 else if (this._state === 63 /* BeforeNumericEntity */) {
827 stateBeforeNumericEntity(this, c);
828 }
829 else {
830 this.cbs.onerror(Error("unknown _state"), this._state);
831 }
832 this._index++;
833 }
834 this.cleanup();
835 };
836 Tokenizer.prototype.finish = function () {
837 // If there is remaining data, emit it in a reasonable way
838 if (this.sectionStart < this._index) {
839 this.handleTrailingData();
840 }
841 this.cbs.onend();
842 };
843 Tokenizer.prototype.handleTrailingData = function () {
844 var data = this.buffer.substr(this.sectionStart);
845 if (this._state === 29 /* InCdata */ ||
846 this._state === 30 /* AfterCdata1 */ ||
847 this._state === 31 /* AfterCdata2 */) {
848 this.cbs.oncdata(data);
849 }
850 else if (this._state === 19 /* InComment */ ||
851 this._state === 21 /* AfterComment1 */ ||
852 this._state === 22 /* AfterComment2 */) {
853 this.cbs.oncomment(data);
854 }
855 else if (this._state === 64 /* InNamedEntity */ && !this.xmlMode) {
856 this.parseLegacyEntity();
857 if (this.sectionStart < this._index) {
858 this._state = this.baseState;
859 this.handleTrailingData();
860 }
861 }
862 else if (this._state === 65 /* InNumericEntity */ && !this.xmlMode) {
863 this.decodeNumericEntity(2, 10, false);
864 if (this.sectionStart < this._index) {
865 this._state = this.baseState;
866 this.handleTrailingData();
867 }
868 }
869 else if (this._state === 66 /* InHexEntity */ && !this.xmlMode) {
870 this.decodeNumericEntity(3, 16, false);
871 if (this.sectionStart < this._index) {
872 this._state = this.baseState;
873 this.handleTrailingData();
874 }
875 }
876 else if (this._state !== 3 /* InTagName */ &&
877 this._state !== 8 /* BeforeAttributeName */ &&
878 this._state !== 11 /* BeforeAttributeValue */ &&
879 this._state !== 10 /* AfterAttributeName */ &&
880 this._state !== 9 /* InAttributeName */ &&
881 this._state !== 13 /* InAttributeValueSq */ &&
882 this._state !== 12 /* InAttributeValueDq */ &&
883 this._state !== 14 /* InAttributeValueNq */ &&
884 this._state !== 6 /* InClosingTagName */) {
885 this.cbs.ontext(data);
886 }
887 /*
888 * Else, ignore remaining data
889 * TODO add a way to remove current tag
890 */
891 };
892 Tokenizer.prototype.getSection = function () {
893 return this.buffer.substring(this.sectionStart, this._index);
894 };
895 Tokenizer.prototype.emitToken = function (name) {
896 this.cbs[name](this.getSection());
897 this.sectionStart = -1;
898 };
899 Tokenizer.prototype.emitPartial = function (value) {
900 if (this.baseState !== 1 /* Text */) {
901 this.cbs.onattribdata(value); // TODO implement the new event
902 }
903 else {
904 this.cbs.ontext(value);
905 }
906 };
907 return Tokenizer;
908}());
909exports.default = Tokenizer;
Note: See TracBrowser for help on using the repository browser.