1 | "use strict";
|
---|
2 | var __importDefault = (this && this.__importDefault) || function (mod) {
|
---|
3 | return (mod && mod.__esModule) ? mod : { "default": mod };
|
---|
4 | };
|
---|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
---|
6 | exports.Parser = void 0;
|
---|
7 | var Tokenizer_1 = __importDefault(require("./Tokenizer"));
|
---|
8 | var formTags = new Set([
|
---|
9 | "input",
|
---|
10 | "option",
|
---|
11 | "optgroup",
|
---|
12 | "select",
|
---|
13 | "button",
|
---|
14 | "datalist",
|
---|
15 | "textarea",
|
---|
16 | ]);
|
---|
17 | var pTag = new Set(["p"]);
|
---|
18 | var openImpliesClose = {
|
---|
19 | tr: new Set(["tr", "th", "td"]),
|
---|
20 | th: new Set(["th"]),
|
---|
21 | td: new Set(["thead", "th", "td"]),
|
---|
22 | body: new Set(["head", "link", "script"]),
|
---|
23 | li: new Set(["li"]),
|
---|
24 | p: pTag,
|
---|
25 | h1: pTag,
|
---|
26 | h2: pTag,
|
---|
27 | h3: pTag,
|
---|
28 | h4: pTag,
|
---|
29 | h5: pTag,
|
---|
30 | h6: pTag,
|
---|
31 | select: formTags,
|
---|
32 | input: formTags,
|
---|
33 | output: formTags,
|
---|
34 | button: formTags,
|
---|
35 | datalist: formTags,
|
---|
36 | textarea: formTags,
|
---|
37 | option: new Set(["option"]),
|
---|
38 | optgroup: new Set(["optgroup", "option"]),
|
---|
39 | dd: new Set(["dt", "dd"]),
|
---|
40 | dt: new Set(["dt", "dd"]),
|
---|
41 | address: pTag,
|
---|
42 | article: pTag,
|
---|
43 | aside: pTag,
|
---|
44 | blockquote: pTag,
|
---|
45 | details: pTag,
|
---|
46 | div: pTag,
|
---|
47 | dl: pTag,
|
---|
48 | fieldset: pTag,
|
---|
49 | figcaption: pTag,
|
---|
50 | figure: pTag,
|
---|
51 | footer: pTag,
|
---|
52 | form: pTag,
|
---|
53 | header: pTag,
|
---|
54 | hr: pTag,
|
---|
55 | main: pTag,
|
---|
56 | nav: pTag,
|
---|
57 | ol: pTag,
|
---|
58 | pre: pTag,
|
---|
59 | section: pTag,
|
---|
60 | table: pTag,
|
---|
61 | ul: pTag,
|
---|
62 | rt: new Set(["rt", "rp"]),
|
---|
63 | rp: new Set(["rt", "rp"]),
|
---|
64 | tbody: new Set(["thead", "tbody"]),
|
---|
65 | tfoot: new Set(["thead", "tbody"]),
|
---|
66 | };
|
---|
67 | var voidElements = new Set([
|
---|
68 | "area",
|
---|
69 | "base",
|
---|
70 | "basefont",
|
---|
71 | "br",
|
---|
72 | "col",
|
---|
73 | "command",
|
---|
74 | "embed",
|
---|
75 | "frame",
|
---|
76 | "hr",
|
---|
77 | "img",
|
---|
78 | "input",
|
---|
79 | "isindex",
|
---|
80 | "keygen",
|
---|
81 | "link",
|
---|
82 | "meta",
|
---|
83 | "param",
|
---|
84 | "source",
|
---|
85 | "track",
|
---|
86 | "wbr",
|
---|
87 | ]);
|
---|
88 | var foreignContextElements = new Set(["math", "svg"]);
|
---|
89 | var htmlIntegrationElements = new Set([
|
---|
90 | "mi",
|
---|
91 | "mo",
|
---|
92 | "mn",
|
---|
93 | "ms",
|
---|
94 | "mtext",
|
---|
95 | "annotation-xml",
|
---|
96 | "foreignObject",
|
---|
97 | "desc",
|
---|
98 | "title",
|
---|
99 | ]);
|
---|
100 | var reNameEnd = /\s|\//;
|
---|
101 | var Parser = /** @class */ (function () {
|
---|
102 | function Parser(cbs, options) {
|
---|
103 | if (options === void 0) { options = {}; }
|
---|
104 | var _a, _b, _c, _d, _e;
|
---|
105 | /** The start index of the last event. */
|
---|
106 | this.startIndex = 0;
|
---|
107 | /** The end index of the last event. */
|
---|
108 | this.endIndex = null;
|
---|
109 | this.tagname = "";
|
---|
110 | this.attribname = "";
|
---|
111 | this.attribvalue = "";
|
---|
112 | this.attribs = null;
|
---|
113 | this.stack = [];
|
---|
114 | this.foreignContext = [];
|
---|
115 | this.options = options;
|
---|
116 | this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
|
---|
117 | this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
|
---|
118 | this.lowerCaseAttributeNames =
|
---|
119 | (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
|
---|
120 | this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this);
|
---|
121 | (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
|
---|
122 | }
|
---|
123 | Parser.prototype.updatePosition = function (initialOffset) {
|
---|
124 | if (this.endIndex === null) {
|
---|
125 | if (this.tokenizer.sectionStart <= initialOffset) {
|
---|
126 | this.startIndex = 0;
|
---|
127 | }
|
---|
128 | else {
|
---|
129 | this.startIndex = this.tokenizer.sectionStart - initialOffset;
|
---|
130 | }
|
---|
131 | }
|
---|
132 | else {
|
---|
133 | this.startIndex = this.endIndex + 1;
|
---|
134 | }
|
---|
135 | this.endIndex = this.tokenizer.getAbsoluteIndex();
|
---|
136 | };
|
---|
137 | // Tokenizer event handlers
|
---|
138 | Parser.prototype.ontext = function (data) {
|
---|
139 | var _a, _b;
|
---|
140 | this.updatePosition(1);
|
---|
141 | this.endIndex--;
|
---|
142 | (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
|
---|
143 | };
|
---|
144 | Parser.prototype.onopentagname = function (name) {
|
---|
145 | var _a, _b;
|
---|
146 | if (this.lowerCaseTagNames) {
|
---|
147 | name = name.toLowerCase();
|
---|
148 | }
|
---|
149 | this.tagname = name;
|
---|
150 | if (!this.options.xmlMode &&
|
---|
151 | Object.prototype.hasOwnProperty.call(openImpliesClose, name)) {
|
---|
152 | var el = void 0;
|
---|
153 | while (this.stack.length > 0 &&
|
---|
154 | openImpliesClose[name].has((el = this.stack[this.stack.length - 1]))) {
|
---|
155 | this.onclosetag(el);
|
---|
156 | }
|
---|
157 | }
|
---|
158 | if (this.options.xmlMode || !voidElements.has(name)) {
|
---|
159 | this.stack.push(name);
|
---|
160 | if (foreignContextElements.has(name)) {
|
---|
161 | this.foreignContext.push(true);
|
---|
162 | }
|
---|
163 | else if (htmlIntegrationElements.has(name)) {
|
---|
164 | this.foreignContext.push(false);
|
---|
165 | }
|
---|
166 | }
|
---|
167 | (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
|
---|
168 | if (this.cbs.onopentag)
|
---|
169 | this.attribs = {};
|
---|
170 | };
|
---|
171 | Parser.prototype.onopentagend = function () {
|
---|
172 | var _a, _b;
|
---|
173 | this.updatePosition(1);
|
---|
174 | if (this.attribs) {
|
---|
175 | (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs);
|
---|
176 | this.attribs = null;
|
---|
177 | }
|
---|
178 | if (!this.options.xmlMode &&
|
---|
179 | this.cbs.onclosetag &&
|
---|
180 | voidElements.has(this.tagname)) {
|
---|
181 | this.cbs.onclosetag(this.tagname);
|
---|
182 | }
|
---|
183 | this.tagname = "";
|
---|
184 | };
|
---|
185 | Parser.prototype.onclosetag = function (name) {
|
---|
186 | this.updatePosition(1);
|
---|
187 | if (this.lowerCaseTagNames) {
|
---|
188 | name = name.toLowerCase();
|
---|
189 | }
|
---|
190 | if (foreignContextElements.has(name) ||
|
---|
191 | htmlIntegrationElements.has(name)) {
|
---|
192 | this.foreignContext.pop();
|
---|
193 | }
|
---|
194 | if (this.stack.length &&
|
---|
195 | (this.options.xmlMode || !voidElements.has(name))) {
|
---|
196 | var pos = this.stack.lastIndexOf(name);
|
---|
197 | if (pos !== -1) {
|
---|
198 | if (this.cbs.onclosetag) {
|
---|
199 | pos = this.stack.length - pos;
|
---|
200 | while (pos--) {
|
---|
201 | // We know the stack has sufficient elements.
|
---|
202 | this.cbs.onclosetag(this.stack.pop());
|
---|
203 | }
|
---|
204 | }
|
---|
205 | else
|
---|
206 | this.stack.length = pos;
|
---|
207 | }
|
---|
208 | else if (name === "p" && !this.options.xmlMode) {
|
---|
209 | this.onopentagname(name);
|
---|
210 | this.closeCurrentTag();
|
---|
211 | }
|
---|
212 | }
|
---|
213 | else if (!this.options.xmlMode && (name === "br" || name === "p")) {
|
---|
214 | this.onopentagname(name);
|
---|
215 | this.closeCurrentTag();
|
---|
216 | }
|
---|
217 | };
|
---|
218 | Parser.prototype.onselfclosingtag = function () {
|
---|
219 | if (this.options.xmlMode ||
|
---|
220 | this.options.recognizeSelfClosing ||
|
---|
221 | this.foreignContext[this.foreignContext.length - 1]) {
|
---|
222 | this.closeCurrentTag();
|
---|
223 | }
|
---|
224 | else {
|
---|
225 | this.onopentagend();
|
---|
226 | }
|
---|
227 | };
|
---|
228 | Parser.prototype.closeCurrentTag = function () {
|
---|
229 | var _a, _b;
|
---|
230 | var name = this.tagname;
|
---|
231 | this.onopentagend();
|
---|
232 | /*
|
---|
233 | * Self-closing tags will be on the top of the stack
|
---|
234 | * (cheaper check than in onclosetag)
|
---|
235 | */
|
---|
236 | if (this.stack[this.stack.length - 1] === name) {
|
---|
237 | (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name);
|
---|
238 | this.stack.pop();
|
---|
239 | }
|
---|
240 | };
|
---|
241 | Parser.prototype.onattribname = function (name) {
|
---|
242 | if (this.lowerCaseAttributeNames) {
|
---|
243 | name = name.toLowerCase();
|
---|
244 | }
|
---|
245 | this.attribname = name;
|
---|
246 | };
|
---|
247 | Parser.prototype.onattribdata = function (value) {
|
---|
248 | this.attribvalue += value;
|
---|
249 | };
|
---|
250 | Parser.prototype.onattribend = function (quote) {
|
---|
251 | var _a, _b;
|
---|
252 | (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);
|
---|
253 | if (this.attribs &&
|
---|
254 | !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
|
---|
255 | this.attribs[this.attribname] = this.attribvalue;
|
---|
256 | }
|
---|
257 | this.attribname = "";
|
---|
258 | this.attribvalue = "";
|
---|
259 | };
|
---|
260 | Parser.prototype.getInstructionName = function (value) {
|
---|
261 | var idx = value.search(reNameEnd);
|
---|
262 | var name = idx < 0 ? value : value.substr(0, idx);
|
---|
263 | if (this.lowerCaseTagNames) {
|
---|
264 | name = name.toLowerCase();
|
---|
265 | }
|
---|
266 | return name;
|
---|
267 | };
|
---|
268 | Parser.prototype.ondeclaration = function (value) {
|
---|
269 | if (this.cbs.onprocessinginstruction) {
|
---|
270 | var name_1 = this.getInstructionName(value);
|
---|
271 | this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
|
---|
272 | }
|
---|
273 | };
|
---|
274 | Parser.prototype.onprocessinginstruction = function (value) {
|
---|
275 | if (this.cbs.onprocessinginstruction) {
|
---|
276 | var name_2 = this.getInstructionName(value);
|
---|
277 | this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
|
---|
278 | }
|
---|
279 | };
|
---|
280 | Parser.prototype.oncomment = function (value) {
|
---|
281 | var _a, _b, _c, _d;
|
---|
282 | this.updatePosition(4);
|
---|
283 | (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
|
---|
284 | (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
|
---|
285 | };
|
---|
286 | Parser.prototype.oncdata = function (value) {
|
---|
287 | var _a, _b, _c, _d, _e, _f;
|
---|
288 | this.updatePosition(1);
|
---|
289 | if (this.options.xmlMode || this.options.recognizeCDATA) {
|
---|
290 | (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
|
---|
291 | (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
|
---|
292 | (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
|
---|
293 | }
|
---|
294 | else {
|
---|
295 | this.oncomment("[CDATA[" + value + "]]");
|
---|
296 | }
|
---|
297 | };
|
---|
298 | Parser.prototype.onerror = function (err) {
|
---|
299 | var _a, _b;
|
---|
300 | (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err);
|
---|
301 | };
|
---|
302 | Parser.prototype.onend = function () {
|
---|
303 | var _a, _b;
|
---|
304 | if (this.cbs.onclosetag) {
|
---|
305 | for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i]))
|
---|
306 | ;
|
---|
307 | }
|
---|
308 | (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
|
---|
309 | };
|
---|
310 | /**
|
---|
311 | * Resets the parser to a blank state, ready to parse a new HTML document
|
---|
312 | */
|
---|
313 | Parser.prototype.reset = function () {
|
---|
314 | var _a, _b, _c, _d;
|
---|
315 | (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
|
---|
316 | this.tokenizer.reset();
|
---|
317 | this.tagname = "";
|
---|
318 | this.attribname = "";
|
---|
319 | this.attribs = null;
|
---|
320 | this.stack = [];
|
---|
321 | (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
|
---|
322 | };
|
---|
323 | /**
|
---|
324 | * Resets the parser, then parses a complete document and
|
---|
325 | * pushes it to the handler.
|
---|
326 | *
|
---|
327 | * @param data Document to parse.
|
---|
328 | */
|
---|
329 | Parser.prototype.parseComplete = function (data) {
|
---|
330 | this.reset();
|
---|
331 | this.end(data);
|
---|
332 | };
|
---|
333 | /**
|
---|
334 | * Parses a chunk of data and calls the corresponding callbacks.
|
---|
335 | *
|
---|
336 | * @param chunk Chunk to parse.
|
---|
337 | */
|
---|
338 | Parser.prototype.write = function (chunk) {
|
---|
339 | this.tokenizer.write(chunk);
|
---|
340 | };
|
---|
341 | /**
|
---|
342 | * Parses the end of the buffer and clears the stack, calls onend.
|
---|
343 | *
|
---|
344 | * @param chunk Optional final chunk to parse.
|
---|
345 | */
|
---|
346 | Parser.prototype.end = function (chunk) {
|
---|
347 | this.tokenizer.end(chunk);
|
---|
348 | };
|
---|
349 | /**
|
---|
350 | * Pauses parsing. The parser won't emit events until `resume` is called.
|
---|
351 | */
|
---|
352 | Parser.prototype.pause = function () {
|
---|
353 | this.tokenizer.pause();
|
---|
354 | };
|
---|
355 | /**
|
---|
356 | * Resumes parsing after `pause` was called.
|
---|
357 | */
|
---|
358 | Parser.prototype.resume = function () {
|
---|
359 | this.tokenizer.resume();
|
---|
360 | };
|
---|
361 | /**
|
---|
362 | * Alias of `write`, for backwards compatibility.
|
---|
363 | *
|
---|
364 | * @param chunk Chunk to parse.
|
---|
365 | * @deprecated
|
---|
366 | */
|
---|
367 | Parser.prototype.parseChunk = function (chunk) {
|
---|
368 | this.write(chunk);
|
---|
369 | };
|
---|
370 | /**
|
---|
371 | * Alias of `end`, for backwards compatibility.
|
---|
372 | *
|
---|
373 | * @param chunk Optional final chunk to parse.
|
---|
374 | * @deprecated
|
---|
375 | */
|
---|
376 | Parser.prototype.done = function (chunk) {
|
---|
377 | this.end(chunk);
|
---|
378 | };
|
---|
379 | return Parser;
|
---|
380 | }());
|
---|
381 | exports.Parser = Parser;
|
---|