source: trip-planner-front/node_modules/@trysound/sax/lib/sax.js@ 76712b2

Last change on this file since 76712b2 was 6a3a178, checked in by Ema <ema_spirova@…>, 3 years ago

initial commit

  • Property mode set to 100644
File size: 38.7 KB
Line 
1;(function (sax) { // wrapper for non-node envs
2 sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
3 sax.SAXParser = SAXParser
4
5 // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
6 // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
7 // since that's the earliest that a buffer overrun could occur. This way, checks are
8 // as rare as required, but as often as necessary to ensure never crossing this bound.
9 // Furthermore, buffers are only tested at most once per write(), so passing a very
10 // large string into write() might have undesirable effects, but this is manageable by
11 // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
12 // edge case, result in creating at most one complete copy of the string passed in.
13 // Set to Infinity to have unlimited buffers.
14 sax.MAX_BUFFER_LENGTH = 64 * 1024
15
16 var buffers = [
17 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
18 'procInstName', 'procInstBody', 'entity', 'attribName',
19 'attribValue', 'cdata', 'script'
20 ]
21
22 sax.EVENTS = [
23 'text',
24 'processinginstruction',
25 'sgmldeclaration',
26 'doctype',
27 'comment',
28 'opentagstart',
29 'attribute',
30 'opentag',
31 'closetag',
32 'opencdata',
33 'cdata',
34 'closecdata',
35 'error',
36 'end',
37 'ready',
38 'script',
39 'opennamespace',
40 'closenamespace'
41 ]
42
43 function SAXParser (strict, opt) {
44 if (!(this instanceof SAXParser)) {
45 return new SAXParser(strict, opt)
46 }
47
48 var parser = this
49 clearBuffers(parser)
50 parser.q = parser.c = ''
51 parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
52 parser.opt = opt || {}
53 parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
54 parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
55 parser.tags = []
56 parser.closed = parser.closedRoot = parser.sawRoot = false
57 parser.tag = parser.error = null
58 parser.strict = !!strict
59 parser.noscript = !!(strict || parser.opt.noscript)
60 parser.state = S.BEGIN
61 parser.strictEntities = parser.opt.strictEntities
62 parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
63 parser.attribList = []
64
65 // namespaces form a prototype chain.
66 // it always points at the current tag,
67 // which protos to its parent tag.
68 if (parser.opt.xmlns) {
69 parser.ns = Object.create(rootNS)
70 }
71
72 // mostly just for error reporting
73 parser.trackPosition = parser.opt.position !== false
74 if (parser.trackPosition) {
75 parser.position = parser.line = parser.column = 0
76 }
77 emit(parser, 'onready')
78 }
79
80 if (!Object.create) {
81 Object.create = function (o) {
82 function F () {}
83 F.prototype = o
84 var newf = new F()
85 return newf
86 }
87 }
88
89 if (!Object.keys) {
90 Object.keys = function (o) {
91 var a = []
92 for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
93 return a
94 }
95 }
96
97 function checkBufferLength (parser) {
98 var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
99 var maxActual = 0
100 for (var i = 0, l = buffers.length; i < l; i++) {
101 var len = parser[buffers[i]].length
102 if (len > maxAllowed) {
103 // Text/cdata nodes can get big, and since they're buffered,
104 // we can get here under normal conditions.
105 // Avoid issues by emitting the text node now,
106 // so at least it won't get any bigger.
107 switch (buffers[i]) {
108 case 'textNode':
109 closeText(parser)
110 break
111
112 case 'cdata':
113 emitNode(parser, 'oncdata', parser.cdata)
114 parser.cdata = ''
115 break
116
117 case 'script':
118 emitNode(parser, 'onscript', parser.script)
119 parser.script = ''
120 break
121
122 default:
123 error(parser, 'Max buffer length exceeded: ' + buffers[i])
124 }
125 }
126 maxActual = Math.max(maxActual, len)
127 }
128 // schedule the next check for the earliest possible buffer overrun.
129 var m = sax.MAX_BUFFER_LENGTH - maxActual
130 parser.bufferCheckPosition = m + parser.position
131 }
132
133 function clearBuffers (parser) {
134 for (var i = 0, l = buffers.length; i < l; i++) {
135 parser[buffers[i]] = ''
136 }
137 }
138
139 function flushBuffers (parser) {
140 closeText(parser)
141 if (parser.cdata !== '') {
142 emitNode(parser, 'oncdata', parser.cdata)
143 parser.cdata = ''
144 }
145 if (parser.script !== '') {
146 emitNode(parser, 'onscript', parser.script)
147 parser.script = ''
148 }
149 }
150
151 SAXParser.prototype = {
152 end: function () { end(this) },
153 write: write,
154 resume: function () { this.error = null; return this },
155 close: function () { return this.write(null) },
156 flush: function () { flushBuffers(this) }
157 }
158
159 // this really needs to be replaced with character classes.
160 // XML allows all manner of ridiculous numbers and digits.
161 var CDATA = '[CDATA['
162 var DOCTYPE = 'DOCTYPE'
163 var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
164 var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
165 var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
166
167 // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
168 // This implementation works on strings, a single character at a time
169 // as such, it cannot ever support astral-plane characters (10000-EFFFF)
170 // without a significant breaking change to either this parser, or the
171 // JavaScript language. Implementation of an emoji-capable xml parser
172 // is left as an exercise for the reader.
173 var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
174
175 var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
176
177 var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
178 var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
179
180 function isWhitespace (c) {
181 return c === ' ' || c === '\n' || c === '\r' || c === '\t'
182 }
183
184 function isQuote (c) {
185 return c === '"' || c === '\''
186 }
187
188 function isAttribEnd (c) {
189 return c === '>' || isWhitespace(c)
190 }
191
192 function isMatch (regex, c) {
193 return regex.test(c)
194 }
195
196 function notMatch (regex, c) {
197 return !isMatch(regex, c)
198 }
199
200 var S = 0
201 sax.STATE = {
202 BEGIN: S++, // leading byte order mark or whitespace
203 BEGIN_WHITESPACE: S++, // leading whitespace
204 TEXT: S++, // general stuff
205 TEXT_ENTITY: S++, // &amp and such.
206 OPEN_WAKA: S++, // <
207 SGML_DECL: S++, // <!BLARG
208 SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
209 DOCTYPE: S++, // <!DOCTYPE
210 DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
211 DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
212 DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
213 COMMENT_STARTING: S++, // <!-
214 COMMENT: S++, // <!--
215 COMMENT_ENDING: S++, // <!-- blah -
216 COMMENT_ENDED: S++, // <!-- blah --
217 CDATA: S++, // <![CDATA[ something
218 CDATA_ENDING: S++, // ]
219 CDATA_ENDING_2: S++, // ]]
220 PROC_INST: S++, // <?hi
221 PROC_INST_BODY: S++, // <?hi there
222 PROC_INST_ENDING: S++, // <?hi "there" ?
223 OPEN_TAG: S++, // <strong
224 OPEN_TAG_SLASH: S++, // <strong /
225 ATTRIB: S++, // <a
226 ATTRIB_NAME: S++, // <a foo
227 ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
228 ATTRIB_VALUE: S++, // <a foo=
229 ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
230 ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
231 ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
232 ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
233 ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
234 CLOSE_TAG: S++, // </a
235 CLOSE_TAG_SAW_WHITE: S++, // </a >
236 SCRIPT: S++, // <script> ...
237 SCRIPT_ENDING: S++ // <script> ... <
238 }
239
240 sax.XML_ENTITIES = {
241 'amp': '&',
242 'gt': '>',
243 'lt': '<',
244 'quot': '"',
245 'apos': "'"
246 }
247
248 sax.ENTITIES = {
249 'amp': '&',
250 'gt': '>',
251 'lt': '<',
252 'quot': '"',
253 'apos': "'",
254 'AElig': 198,
255 'Aacute': 193,
256 'Acirc': 194,
257 'Agrave': 192,
258 'Aring': 197,
259 'Atilde': 195,
260 'Auml': 196,
261 'Ccedil': 199,
262 'ETH': 208,
263 'Eacute': 201,
264 'Ecirc': 202,
265 'Egrave': 200,
266 'Euml': 203,
267 'Iacute': 205,
268 'Icirc': 206,
269 'Igrave': 204,
270 'Iuml': 207,
271 'Ntilde': 209,
272 'Oacute': 211,
273 'Ocirc': 212,
274 'Ograve': 210,
275 'Oslash': 216,
276 'Otilde': 213,
277 'Ouml': 214,
278 'THORN': 222,
279 'Uacute': 218,
280 'Ucirc': 219,
281 'Ugrave': 217,
282 'Uuml': 220,
283 'Yacute': 221,
284 'aacute': 225,
285 'acirc': 226,
286 'aelig': 230,
287 'agrave': 224,
288 'aring': 229,
289 'atilde': 227,
290 'auml': 228,
291 'ccedil': 231,
292 'eacute': 233,
293 'ecirc': 234,
294 'egrave': 232,
295 'eth': 240,
296 'euml': 235,
297 'iacute': 237,
298 'icirc': 238,
299 'igrave': 236,
300 'iuml': 239,
301 'ntilde': 241,
302 'oacute': 243,
303 'ocirc': 244,
304 'ograve': 242,
305 'oslash': 248,
306 'otilde': 245,
307 'ouml': 246,
308 'szlig': 223,
309 'thorn': 254,
310 'uacute': 250,
311 'ucirc': 251,
312 'ugrave': 249,
313 'uuml': 252,
314 'yacute': 253,
315 'yuml': 255,
316 'copy': 169,
317 'reg': 174,
318 'nbsp': 160,
319 'iexcl': 161,
320 'cent': 162,
321 'pound': 163,
322 'curren': 164,
323 'yen': 165,
324 'brvbar': 166,
325 'sect': 167,
326 'uml': 168,
327 'ordf': 170,
328 'laquo': 171,
329 'not': 172,
330 'shy': 173,
331 'macr': 175,
332 'deg': 176,
333 'plusmn': 177,
334 'sup1': 185,
335 'sup2': 178,
336 'sup3': 179,
337 'acute': 180,
338 'micro': 181,
339 'para': 182,
340 'middot': 183,
341 'cedil': 184,
342 'ordm': 186,
343 'raquo': 187,
344 'frac14': 188,
345 'frac12': 189,
346 'frac34': 190,
347 'iquest': 191,
348 'times': 215,
349 'divide': 247,
350 'OElig': 338,
351 'oelig': 339,
352 'Scaron': 352,
353 'scaron': 353,
354 'Yuml': 376,
355 'fnof': 402,
356 'circ': 710,
357 'tilde': 732,
358 'Alpha': 913,
359 'Beta': 914,
360 'Gamma': 915,
361 'Delta': 916,
362 'Epsilon': 917,
363 'Zeta': 918,
364 'Eta': 919,
365 'Theta': 920,
366 'Iota': 921,
367 'Kappa': 922,
368 'Lambda': 923,
369 'Mu': 924,
370 'Nu': 925,
371 'Xi': 926,
372 'Omicron': 927,
373 'Pi': 928,
374 'Rho': 929,
375 'Sigma': 931,
376 'Tau': 932,
377 'Upsilon': 933,
378 'Phi': 934,
379 'Chi': 935,
380 'Psi': 936,
381 'Omega': 937,
382 'alpha': 945,
383 'beta': 946,
384 'gamma': 947,
385 'delta': 948,
386 'epsilon': 949,
387 'zeta': 950,
388 'eta': 951,
389 'theta': 952,
390 'iota': 953,
391 'kappa': 954,
392 'lambda': 955,
393 'mu': 956,
394 'nu': 957,
395 'xi': 958,
396 'omicron': 959,
397 'pi': 960,
398 'rho': 961,
399 'sigmaf': 962,
400 'sigma': 963,
401 'tau': 964,
402 'upsilon': 965,
403 'phi': 966,
404 'chi': 967,
405 'psi': 968,
406 'omega': 969,
407 'thetasym': 977,
408 'upsih': 978,
409 'piv': 982,
410 'ensp': 8194,
411 'emsp': 8195,
412 'thinsp': 8201,
413 'zwnj': 8204,
414 'zwj': 8205,
415 'lrm': 8206,
416 'rlm': 8207,
417 'ndash': 8211,
418 'mdash': 8212,
419 'lsquo': 8216,
420 'rsquo': 8217,
421 'sbquo': 8218,
422 'ldquo': 8220,
423 'rdquo': 8221,
424 'bdquo': 8222,
425 'dagger': 8224,
426 'Dagger': 8225,
427 'bull': 8226,
428 'hellip': 8230,
429 'permil': 8240,
430 'prime': 8242,
431 'Prime': 8243,
432 'lsaquo': 8249,
433 'rsaquo': 8250,
434 'oline': 8254,
435 'frasl': 8260,
436 'euro': 8364,
437 'image': 8465,
438 'weierp': 8472,
439 'real': 8476,
440 'trade': 8482,
441 'alefsym': 8501,
442 'larr': 8592,
443 'uarr': 8593,
444 'rarr': 8594,
445 'darr': 8595,
446 'harr': 8596,
447 'crarr': 8629,
448 'lArr': 8656,
449 'uArr': 8657,
450 'rArr': 8658,
451 'dArr': 8659,
452 'hArr': 8660,
453 'forall': 8704,
454 'part': 8706,
455 'exist': 8707,
456 'empty': 8709,
457 'nabla': 8711,
458 'isin': 8712,
459 'notin': 8713,
460 'ni': 8715,
461 'prod': 8719,
462 'sum': 8721,
463 'minus': 8722,
464 'lowast': 8727,
465 'radic': 8730,
466 'prop': 8733,
467 'infin': 8734,
468 'ang': 8736,
469 'and': 8743,
470 'or': 8744,
471 'cap': 8745,
472 'cup': 8746,
473 'int': 8747,
474 'there4': 8756,
475 'sim': 8764,
476 'cong': 8773,
477 'asymp': 8776,
478 'ne': 8800,
479 'equiv': 8801,
480 'le': 8804,
481 'ge': 8805,
482 'sub': 8834,
483 'sup': 8835,
484 'nsub': 8836,
485 'sube': 8838,
486 'supe': 8839,
487 'oplus': 8853,
488 'otimes': 8855,
489 'perp': 8869,
490 'sdot': 8901,
491 'lceil': 8968,
492 'rceil': 8969,
493 'lfloor': 8970,
494 'rfloor': 8971,
495 'lang': 9001,
496 'rang': 9002,
497 'loz': 9674,
498 'spades': 9824,
499 'clubs': 9827,
500 'hearts': 9829,
501 'diams': 9830
502 }
503
504 Object.keys(sax.ENTITIES).forEach(function (key) {
505 var e = sax.ENTITIES[key]
506 var s = typeof e === 'number' ? String.fromCharCode(e) : e
507 sax.ENTITIES[key] = s
508 })
509
510 for (var s in sax.STATE) {
511 sax.STATE[sax.STATE[s]] = s
512 }
513
514 // shorthand
515 S = sax.STATE
516
517 function emit (parser, event, data) {
518 parser[event] && parser[event](data)
519 }
520
521 function emitNode (parser, nodeType, data) {
522 if (parser.textNode) closeText(parser)
523 emit(parser, nodeType, data)
524 }
525
526 function closeText (parser) {
527 parser.textNode = textopts(parser.opt, parser.textNode)
528 if (parser.textNode) emit(parser, 'ontext', parser.textNode)
529 parser.textNode = ''
530 }
531
532 function textopts (opt, text) {
533 if (opt.trim) text = text.trim()
534 if (opt.normalize) text = text.replace(/\s+/g, ' ')
535 return text
536 }
537
538 function error (parser, reason) {
539 closeText(parser)
540 const message = reason +
541 '\nLine: ' + parser.line +
542 '\nColumn: ' + parser.column +
543 '\nChar: ' + parser.c
544 const error = new Error(message)
545 error.reason = reason
546 error.line = parser.line
547 error.column = parser.column
548 parser.error = error
549 emit(parser, 'onerror', error)
550 return parser
551 }
552
553 function end (parser) {
554 if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
555 if ((parser.state !== S.BEGIN) &&
556 (parser.state !== S.BEGIN_WHITESPACE) &&
557 (parser.state !== S.TEXT)) {
558 error(parser, 'Unexpected end')
559 }
560 closeText(parser)
561 parser.c = ''
562 parser.closed = true
563 emit(parser, 'onend')
564 SAXParser.call(parser, parser.strict, parser.opt)
565 return parser
566 }
567
568 function strictFail (parser, message) {
569 if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
570 throw new Error('bad call to strictFail')
571 }
572 if (parser.strict) {
573 error(parser, message)
574 }
575 }
576
577 function newTag (parser) {
578 if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
579 var parent = parser.tags[parser.tags.length - 1] || parser
580 var tag = parser.tag = { name: parser.tagName, attributes: {} }
581
582 // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
583 if (parser.opt.xmlns) {
584 tag.ns = parent.ns
585 }
586 parser.attribList.length = 0
587 emitNode(parser, 'onopentagstart', tag)
588 }
589
590 function qname (name, attribute) {
591 var i = name.indexOf(':')
592 var qualName = i < 0 ? [ '', name ] : name.split(':')
593 var prefix = qualName[0]
594 var local = qualName[1]
595
596 // <x "xmlns"="http://foo">
597 if (attribute && name === 'xmlns') {
598 prefix = 'xmlns'
599 local = ''
600 }
601
602 return { prefix: prefix, local: local }
603 }
604
605 function attrib (parser) {
606 if (!parser.strict) {
607 parser.attribName = parser.attribName[parser.looseCase]()
608 }
609
610 if (parser.attribList.indexOf(parser.attribName) !== -1 ||
611 parser.tag.attributes.hasOwnProperty(parser.attribName)) {
612 parser.attribName = parser.attribValue = ''
613 return
614 }
615
616 if (parser.opt.xmlns) {
617 var qn = qname(parser.attribName, true)
618 var prefix = qn.prefix
619 var local = qn.local
620
621 if (prefix === 'xmlns') {
622 // namespace binding attribute. push the binding into scope
623 if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
624 strictFail(parser,
625 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
626 'Actual: ' + parser.attribValue)
627 } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
628 strictFail(parser,
629 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
630 'Actual: ' + parser.attribValue)
631 } else {
632 var tag = parser.tag
633 var parent = parser.tags[parser.tags.length - 1] || parser
634 if (tag.ns === parent.ns) {
635 tag.ns = Object.create(parent.ns)
636 }
637 tag.ns[local] = parser.attribValue
638 }
639 }
640
641 // defer onattribute events until all attributes have been seen
642 // so any new bindings can take effect. preserve attribute order
643 // so deferred events can be emitted in document order
644 parser.attribList.push([parser.attribName, parser.attribValue])
645 } else {
646 // in non-xmlns mode, we can emit the event right away
647 parser.tag.attributes[parser.attribName] = parser.attribValue
648 emitNode(parser, 'onattribute', {
649 name: parser.attribName,
650 value: parser.attribValue
651 })
652 }
653
654 parser.attribName = parser.attribValue = ''
655 }
656
657 function openTag (parser, selfClosing) {
658 if (parser.opt.xmlns) {
659 // emit namespace binding events
660 var tag = parser.tag
661
662 // add namespace info to tag
663 var qn = qname(parser.tagName)
664 tag.prefix = qn.prefix
665 tag.local = qn.local
666 tag.uri = tag.ns[qn.prefix] || ''
667
668 if (tag.prefix && !tag.uri) {
669 strictFail(parser, 'Unbound namespace prefix: ' +
670 JSON.stringify(parser.tagName))
671 tag.uri = qn.prefix
672 }
673
674 var parent = parser.tags[parser.tags.length - 1] || parser
675 if (tag.ns && parent.ns !== tag.ns) {
676 Object.keys(tag.ns).forEach(function (p) {
677 emitNode(parser, 'onopennamespace', {
678 prefix: p,
679 uri: tag.ns[p]
680 })
681 })
682 }
683
684 // handle deferred onattribute events
685 // Note: do not apply default ns to attributes:
686 // http://www.w3.org/TR/REC-xml-names/#defaulting
687 for (var i = 0, l = parser.attribList.length; i < l; i++) {
688 var nv = parser.attribList[i]
689 var name = nv[0]
690 var value = nv[1]
691 var qualName = qname(name, true)
692 var prefix = qualName.prefix
693 var local = qualName.local
694 var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
695 var a = {
696 name: name,
697 value: value,
698 prefix: prefix,
699 local: local,
700 uri: uri
701 }
702
703 // if there's any attributes with an undefined namespace,
704 // then fail on them now.
705 if (prefix && prefix !== 'xmlns' && !uri) {
706 strictFail(parser, 'Unbound namespace prefix: ' +
707 JSON.stringify(prefix))
708 a.uri = prefix
709 }
710 parser.tag.attributes[name] = a
711 emitNode(parser, 'onattribute', a)
712 }
713 parser.attribList.length = 0
714 }
715
716 parser.tag.isSelfClosing = !!selfClosing
717
718 // process the tag
719 parser.sawRoot = true
720 parser.tags.push(parser.tag)
721 emitNode(parser, 'onopentag', parser.tag)
722 if (!selfClosing) {
723 // special case for <script> in non-strict mode.
724 if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
725 parser.state = S.SCRIPT
726 } else {
727 parser.state = S.TEXT
728 }
729 parser.tag = null
730 parser.tagName = ''
731 }
732 parser.attribName = parser.attribValue = ''
733 parser.attribList.length = 0
734 }
735
736 function closeTag (parser) {
737 if (!parser.tagName) {
738 strictFail(parser, 'Weird empty close tag.')
739 parser.textNode += '</>'
740 parser.state = S.TEXT
741 return
742 }
743
744 if (parser.script) {
745 if (parser.tagName !== 'script') {
746 parser.script += '</' + parser.tagName + '>'
747 parser.tagName = ''
748 parser.state = S.SCRIPT
749 return
750 }
751 emitNode(parser, 'onscript', parser.script)
752 parser.script = ''
753 }
754
755 // first make sure that the closing tag actually exists.
756 // <a><b></c></b></a> will close everything, otherwise.
757 var t = parser.tags.length
758 var tagName = parser.tagName
759 if (!parser.strict) {
760 tagName = tagName[parser.looseCase]()
761 }
762 var closeTo = tagName
763 while (t--) {
764 var close = parser.tags[t]
765 if (close.name !== closeTo) {
766 // fail the first time in strict mode
767 strictFail(parser, 'Unexpected close tag')
768 } else {
769 break
770 }
771 }
772
773 // didn't find it. we already failed for strict, so just abort.
774 if (t < 0) {
775 strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
776 parser.textNode += '</' + parser.tagName + '>'
777 parser.state = S.TEXT
778 return
779 }
780 parser.tagName = tagName
781 var s = parser.tags.length
782 while (s-- > t) {
783 var tag = parser.tag = parser.tags.pop()
784 parser.tagName = parser.tag.name
785 emitNode(parser, 'onclosetag', parser.tagName)
786
787 var x = {}
788 for (var i in tag.ns) {
789 x[i] = tag.ns[i]
790 }
791
792 var parent = parser.tags[parser.tags.length - 1] || parser
793 if (parser.opt.xmlns && tag.ns !== parent.ns) {
794 // remove namespace bindings introduced by tag
795 Object.keys(tag.ns).forEach(function (p) {
796 var n = tag.ns[p]
797 emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
798 })
799 }
800 }
801 if (t === 0) parser.closedRoot = true
802 parser.tagName = parser.attribValue = parser.attribName = ''
803 parser.attribList.length = 0
804 parser.state = S.TEXT
805 }
806
807 function parseEntity (parser) {
808 var entity = parser.entity
809 var entityLC = entity.toLowerCase()
810 var num
811 var numStr = ''
812
813 if (parser.ENTITIES[entity]) {
814 return parser.ENTITIES[entity]
815 }
816 if (parser.ENTITIES[entityLC]) {
817 return parser.ENTITIES[entityLC]
818 }
819 entity = entityLC
820 if (entity.charAt(0) === '#') {
821 if (entity.charAt(1) === 'x') {
822 entity = entity.slice(2)
823 num = parseInt(entity, 16)
824 numStr = num.toString(16)
825 } else {
826 entity = entity.slice(1)
827 num = parseInt(entity, 10)
828 numStr = num.toString(10)
829 }
830 }
831 entity = entity.replace(/^0+/, '')
832 if (isNaN(num) || numStr.toLowerCase() !== entity) {
833 strictFail(parser, 'Invalid character entity')
834 return '&' + parser.entity + ';'
835 }
836
837 return String.fromCodePoint(num)
838 }
839
840 function beginWhiteSpace (parser, c) {
841 if (c === '<') {
842 parser.state = S.OPEN_WAKA
843 parser.startTagPosition = parser.position
844 } else if (!isWhitespace(c)) {
845 // have to process this as a text node.
846 // weird, but happens.
847 strictFail(parser, 'Non-whitespace before first tag.')
848 parser.textNode = c
849 parser.state = S.TEXT
850 }
851 }
852
853 function charAt (chunk, i) {
854 var result = ''
855 if (i < chunk.length) {
856 result = chunk.charAt(i)
857 }
858 return result
859 }
860
861 function write (chunk) {
862 var parser = this
863 if (this.error) {
864 throw this.error
865 }
866 if (parser.closed) {
867 return error(parser,
868 'Cannot write after close. Assign an onready handler.')
869 }
870 if (chunk === null) {
871 return end(parser)
872 }
873 if (typeof chunk === 'object') {
874 chunk = chunk.toString()
875 }
876 var i = 0
877 var c = ''
878 while (true) {
879 c = charAt(chunk, i++)
880 parser.c = c
881
882 if (!c) {
883 break
884 }
885
886 if (parser.trackPosition) {
887 parser.position++
888 if (c === '\n') {
889 parser.line++
890 parser.column = 0
891 } else {
892 parser.column++
893 }
894 }
895
896 switch (parser.state) {
897 case S.BEGIN:
898 parser.state = S.BEGIN_WHITESPACE
899 if (c === '\uFEFF') {
900 continue
901 }
902 beginWhiteSpace(parser, c)
903 continue
904
905 case S.BEGIN_WHITESPACE:
906 beginWhiteSpace(parser, c)
907 continue
908
909 case S.TEXT:
910 if (parser.sawRoot && !parser.closedRoot) {
911 var starti = i - 1
912 while (c && c !== '<' && c !== '&') {
913 c = charAt(chunk, i++)
914 if (c && parser.trackPosition) {
915 parser.position++
916 if (c === '\n') {
917 parser.line++
918 parser.column = 0
919 } else {
920 parser.column++
921 }
922 }
923 }
924 parser.textNode += chunk.substring(starti, i - 1)
925 }
926 if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
927 parser.state = S.OPEN_WAKA
928 parser.startTagPosition = parser.position
929 } else {
930 if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) {
931 strictFail(parser, 'Text data outside of root node.')
932 }
933 if (c === '&') {
934 parser.state = S.TEXT_ENTITY
935 } else {
936 parser.textNode += c
937 }
938 }
939 continue
940
941 case S.SCRIPT:
942 // only non-strict
943 if (c === '<') {
944 parser.state = S.SCRIPT_ENDING
945 } else {
946 parser.script += c
947 }
948 continue
949
950 case S.SCRIPT_ENDING:
951 if (c === '/') {
952 parser.state = S.CLOSE_TAG
953 } else {
954 parser.script += '<' + c
955 parser.state = S.SCRIPT
956 }
957 continue
958
959 case S.OPEN_WAKA:
960 // either a /, ?, !, or text is coming next.
961 if (c === '!') {
962 parser.state = S.SGML_DECL
963 parser.sgmlDecl = ''
964 } else if (isWhitespace(c)) {
965 // wait for it...
966 } else if (isMatch(nameStart, c)) {
967 parser.state = S.OPEN_TAG
968 parser.tagName = c
969 } else if (c === '/') {
970 parser.state = S.CLOSE_TAG
971 parser.tagName = ''
972 } else if (c === '?') {
973 parser.state = S.PROC_INST
974 parser.procInstName = parser.procInstBody = ''
975 } else {
976 strictFail(parser, 'Unencoded <')
977 // if there was some whitespace, then add that in.
978 if (parser.startTagPosition + 1 < parser.position) {
979 var pad = parser.position - parser.startTagPosition
980 c = new Array(pad).join(' ') + c
981 }
982 parser.textNode += '<' + c
983 parser.state = S.TEXT
984 }
985 continue
986
987 case S.SGML_DECL:
988 if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
989 emitNode(parser, 'onopencdata')
990 parser.state = S.CDATA
991 parser.sgmlDecl = ''
992 parser.cdata = ''
993 } else if (parser.sgmlDecl + c === '--') {
994 parser.state = S.COMMENT
995 parser.comment = ''
996 parser.sgmlDecl = ''
997 } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
998 parser.state = S.DOCTYPE
999 if (parser.doctype || parser.sawRoot) {
1000 strictFail(parser,
1001 'Inappropriately located doctype declaration')
1002 }
1003 parser.doctype = ''
1004 parser.sgmlDecl = ''
1005 } else if (c === '>') {
1006 emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
1007 parser.sgmlDecl = ''
1008 parser.state = S.TEXT
1009 } else if (isQuote(c)) {
1010 parser.state = S.SGML_DECL_QUOTED
1011 parser.sgmlDecl += c
1012 } else {
1013 parser.sgmlDecl += c
1014 }
1015 continue
1016
1017 case S.SGML_DECL_QUOTED:
1018 if (c === parser.q) {
1019 parser.state = S.SGML_DECL
1020 parser.q = ''
1021 }
1022 parser.sgmlDecl += c
1023 continue
1024
1025 case S.DOCTYPE:
1026 if (c === '>') {
1027 parser.state = S.TEXT
1028 emitNode(parser, 'ondoctype', parser.doctype)
1029 parser.doctype = true // just remember that we saw it.
1030 } else {
1031 parser.doctype += c
1032 if (c === '[') {
1033 parser.state = S.DOCTYPE_DTD
1034 } else if (isQuote(c)) {
1035 parser.state = S.DOCTYPE_QUOTED
1036 parser.q = c
1037 }
1038 }
1039 continue
1040
1041 case S.DOCTYPE_QUOTED:
1042 parser.doctype += c
1043 if (c === parser.q) {
1044 parser.q = ''
1045 parser.state = S.DOCTYPE
1046 }
1047 continue
1048
1049 case S.DOCTYPE_DTD:
1050 parser.doctype += c
1051 if (c === ']') {
1052 parser.state = S.DOCTYPE
1053 } else if (isQuote(c)) {
1054 parser.state = S.DOCTYPE_DTD_QUOTED
1055 parser.q = c
1056 }
1057 continue
1058
1059 case S.DOCTYPE_DTD_QUOTED:
1060 parser.doctype += c
1061 if (c === parser.q) {
1062 parser.state = S.DOCTYPE_DTD
1063 parser.q = ''
1064 }
1065 continue
1066
1067 case S.COMMENT:
1068 if (c === '-') {
1069 parser.state = S.COMMENT_ENDING
1070 } else {
1071 parser.comment += c
1072 }
1073 continue
1074
1075 case S.COMMENT_ENDING:
1076 if (c === '-') {
1077 parser.state = S.COMMENT_ENDED
1078 parser.comment = textopts(parser.opt, parser.comment)
1079 if (parser.comment) {
1080 emitNode(parser, 'oncomment', parser.comment)
1081 }
1082 parser.comment = ''
1083 } else {
1084 parser.comment += '-' + c
1085 parser.state = S.COMMENT
1086 }
1087 continue
1088
1089 case S.COMMENT_ENDED:
1090 if (c !== '>') {
1091 strictFail(parser, 'Malformed comment')
1092 // allow <!-- blah -- bloo --> in non-strict mode,
1093 // which is a comment of " blah -- bloo "
1094 parser.comment += '--' + c
1095 parser.state = S.COMMENT
1096 } else {
1097 parser.state = S.TEXT
1098 }
1099 continue
1100
1101 case S.CDATA:
1102 if (c === ']') {
1103 parser.state = S.CDATA_ENDING
1104 } else {
1105 parser.cdata += c
1106 }
1107 continue
1108
1109 case S.CDATA_ENDING:
1110 if (c === ']') {
1111 parser.state = S.CDATA_ENDING_2
1112 } else {
1113 parser.cdata += ']' + c
1114 parser.state = S.CDATA
1115 }
1116 continue
1117
1118 case S.CDATA_ENDING_2:
1119 if (c === '>') {
1120 if (parser.cdata) {
1121 emitNode(parser, 'oncdata', parser.cdata)
1122 }
1123 emitNode(parser, 'onclosecdata')
1124 parser.cdata = ''
1125 parser.state = S.TEXT
1126 } else if (c === ']') {
1127 parser.cdata += ']'
1128 } else {
1129 parser.cdata += ']]' + c
1130 parser.state = S.CDATA
1131 }
1132 continue
1133
1134 case S.PROC_INST:
1135 if (c === '?') {
1136 parser.state = S.PROC_INST_ENDING
1137 } else if (isWhitespace(c)) {
1138 parser.state = S.PROC_INST_BODY
1139 } else {
1140 parser.procInstName += c
1141 }
1142 continue
1143
1144 case S.PROC_INST_BODY:
1145 if (!parser.procInstBody && isWhitespace(c)) {
1146 continue
1147 } else if (c === '?') {
1148 parser.state = S.PROC_INST_ENDING
1149 } else {
1150 parser.procInstBody += c
1151 }
1152 continue
1153
1154 case S.PROC_INST_ENDING:
1155 if (c === '>') {
1156 emitNode(parser, 'onprocessinginstruction', {
1157 name: parser.procInstName,
1158 body: parser.procInstBody
1159 })
1160 parser.procInstName = parser.procInstBody = ''
1161 parser.state = S.TEXT
1162 } else {
1163 parser.procInstBody += '?' + c
1164 parser.state = S.PROC_INST_BODY
1165 }
1166 continue
1167
1168 case S.OPEN_TAG:
1169 if (isMatch(nameBody, c)) {
1170 parser.tagName += c
1171 } else {
1172 newTag(parser)
1173 if (c === '>') {
1174 openTag(parser)
1175 } else if (c === '/') {
1176 parser.state = S.OPEN_TAG_SLASH
1177 } else {
1178 if (!isWhitespace(c)) {
1179 strictFail(parser, 'Invalid character in tag name')
1180 }
1181 parser.state = S.ATTRIB
1182 }
1183 }
1184 continue
1185
1186 case S.OPEN_TAG_SLASH:
1187 if (c === '>') {
1188 openTag(parser, true)
1189 closeTag(parser)
1190 } else {
1191 strictFail(parser, 'Forward-slash in opening tag not followed by >')
1192 parser.state = S.ATTRIB
1193 }
1194 continue
1195
1196 case S.ATTRIB:
1197 // haven't read the attribute name yet.
1198 if (isWhitespace(c)) {
1199 continue
1200 } else if (c === '>') {
1201 openTag(parser)
1202 } else if (c === '/') {
1203 parser.state = S.OPEN_TAG_SLASH
1204 } else if (isMatch(nameStart, c)) {
1205 parser.attribName = c
1206 parser.attribValue = ''
1207 parser.state = S.ATTRIB_NAME
1208 } else {
1209 strictFail(parser, 'Invalid attribute name')
1210 }
1211 continue
1212
1213 case S.ATTRIB_NAME:
1214 if (c === '=') {
1215 parser.state = S.ATTRIB_VALUE
1216 } else if (c === '>') {
1217 strictFail(parser, 'Attribute without value')
1218 parser.attribValue = parser.attribName
1219 attrib(parser)
1220 openTag(parser)
1221 } else if (isWhitespace(c)) {
1222 parser.state = S.ATTRIB_NAME_SAW_WHITE
1223 } else if (isMatch(nameBody, c)) {
1224 parser.attribName += c
1225 } else {
1226 strictFail(parser, 'Invalid attribute name')
1227 }
1228 continue
1229
1230 case S.ATTRIB_NAME_SAW_WHITE:
1231 if (c === '=') {
1232 parser.state = S.ATTRIB_VALUE
1233 } else if (isWhitespace(c)) {
1234 continue
1235 } else {
1236 strictFail(parser, 'Attribute without value')
1237 parser.tag.attributes[parser.attribName] = ''
1238 parser.attribValue = ''
1239 emitNode(parser, 'onattribute', {
1240 name: parser.attribName,
1241 value: ''
1242 })
1243 parser.attribName = ''
1244 if (c === '>') {
1245 openTag(parser)
1246 } else if (isMatch(nameStart, c)) {
1247 parser.attribName = c
1248 parser.state = S.ATTRIB_NAME
1249 } else {
1250 strictFail(parser, 'Invalid attribute name')
1251 parser.state = S.ATTRIB
1252 }
1253 }
1254 continue
1255
1256 case S.ATTRIB_VALUE:
1257 if (isWhitespace(c)) {
1258 continue
1259 } else if (isQuote(c)) {
1260 parser.q = c
1261 parser.state = S.ATTRIB_VALUE_QUOTED
1262 } else {
1263 strictFail(parser, 'Unquoted attribute value')
1264 parser.state = S.ATTRIB_VALUE_UNQUOTED
1265 parser.attribValue = c
1266 }
1267 continue
1268
1269 case S.ATTRIB_VALUE_QUOTED:
1270 if (c !== parser.q) {
1271 if (c === '&') {
1272 parser.state = S.ATTRIB_VALUE_ENTITY_Q
1273 } else {
1274 parser.attribValue += c
1275 }
1276 continue
1277 }
1278 attrib(parser)
1279 parser.q = ''
1280 parser.state = S.ATTRIB_VALUE_CLOSED
1281 continue
1282
1283 case S.ATTRIB_VALUE_CLOSED:
1284 if (isWhitespace(c)) {
1285 parser.state = S.ATTRIB
1286 } else if (c === '>') {
1287 openTag(parser)
1288 } else if (c === '/') {
1289 parser.state = S.OPEN_TAG_SLASH
1290 } else if (isMatch(nameStart, c)) {
1291 strictFail(parser, 'No whitespace between attributes')
1292 parser.attribName = c
1293 parser.attribValue = ''
1294 parser.state = S.ATTRIB_NAME
1295 } else {
1296 strictFail(parser, 'Invalid attribute name')
1297 }
1298 continue
1299
1300 case S.ATTRIB_VALUE_UNQUOTED:
1301 if (!isAttribEnd(c)) {
1302 if (c === '&') {
1303 parser.state = S.ATTRIB_VALUE_ENTITY_U
1304 } else {
1305 parser.attribValue += c
1306 }
1307 continue
1308 }
1309 attrib(parser)
1310 if (c === '>') {
1311 openTag(parser)
1312 } else {
1313 parser.state = S.ATTRIB
1314 }
1315 continue
1316
1317 case S.CLOSE_TAG:
1318 if (!parser.tagName) {
1319 if (isWhitespace(c)) {
1320 continue
1321 } else if (notMatch(nameStart, c)) {
1322 if (parser.script) {
1323 parser.script += '</' + c
1324 parser.state = S.SCRIPT
1325 } else {
1326 strictFail(parser, 'Invalid tagname in closing tag.')
1327 }
1328 } else {
1329 parser.tagName = c
1330 }
1331 } else if (c === '>') {
1332 closeTag(parser)
1333 } else if (isMatch(nameBody, c)) {
1334 parser.tagName += c
1335 } else if (parser.script) {
1336 parser.script += '</' + parser.tagName
1337 parser.tagName = ''
1338 parser.state = S.SCRIPT
1339 } else {
1340 if (!isWhitespace(c)) {
1341 strictFail(parser, 'Invalid tagname in closing tag')
1342 }
1343 parser.state = S.CLOSE_TAG_SAW_WHITE
1344 }
1345 continue
1346
1347 case S.CLOSE_TAG_SAW_WHITE:
1348 if (isWhitespace(c)) {
1349 continue
1350 }
1351 if (c === '>') {
1352 closeTag(parser)
1353 } else {
1354 strictFail(parser, 'Invalid characters in closing tag')
1355 }
1356 continue
1357
1358 case S.TEXT_ENTITY:
1359 case S.ATTRIB_VALUE_ENTITY_Q:
1360 case S.ATTRIB_VALUE_ENTITY_U:
1361 var returnState
1362 var buffer
1363 switch (parser.state) {
1364 case S.TEXT_ENTITY:
1365 returnState = S.TEXT
1366 buffer = 'textNode'
1367 break
1368
1369 case S.ATTRIB_VALUE_ENTITY_Q:
1370 returnState = S.ATTRIB_VALUE_QUOTED
1371 buffer = 'attribValue'
1372 break
1373
1374 case S.ATTRIB_VALUE_ENTITY_U:
1375 returnState = S.ATTRIB_VALUE_UNQUOTED
1376 buffer = 'attribValue'
1377 break
1378 }
1379
1380 if (c === ';') {
1381 var parsedEntity = parseEntity(parser)
1382
1383 // Custom entities can contain tags, so we potentially need to parse the result
1384 if (parser.state === S.TEXT_ENTITY && !sax.ENTITIES[parser.entity] && parsedEntity !== '&' + parser.entity + ';') {
1385 chunk = chunk.slice(0, i) + parsedEntity + chunk.slice(i)
1386 } else {
1387 parser[buffer] += parsedEntity
1388 }
1389
1390 parser.entity = ''
1391 parser.state = returnState
1392 } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) {
1393 parser.entity += c
1394 } else {
1395 strictFail(parser, 'Invalid character in entity name')
1396 parser[buffer] += '&' + parser.entity + c
1397 parser.entity = ''
1398 parser.state = returnState
1399 }
1400
1401 continue
1402
1403 default:
1404 throw new Error(parser, 'Unknown state: ' + parser.state)
1405 }
1406 } // while
1407
1408 if (parser.position >= parser.bufferCheckPosition) {
1409 checkBufferLength(parser)
1410 }
1411 return parser
1412 }
1413})(typeof exports === 'undefined' ? this.sax = {} : exports)
Note: See TracBrowser for help on using the repository browser.