source: trip-planner-front/node_modules/parse5/lib/tokenizer/preprocessor.js@ ceaed42

Last change on this file since ceaed42 was 6a3a178, checked in by Ema <ema_spirova@…>, 3 years ago

initial commit

  • Property mode set to 100644
File size: 4.4 KB
Line 
1'use strict';
2
3const unicode = require('../common/unicode');
4const ERR = require('../common/error-codes');
5
6//Aliases
7const $ = unicode.CODE_POINTS;
8
9//Const
10const DEFAULT_BUFFER_WATERLINE = 1 << 16;
11
12//Preprocessor
13//NOTE: HTML input preprocessing
14//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
15class Preprocessor {
16 constructor() {
17 this.html = null;
18
19 this.pos = -1;
20 this.lastGapPos = -1;
21 this.lastCharPos = -1;
22
23 this.gapStack = [];
24
25 this.skipNextNewLine = false;
26
27 this.lastChunkWritten = false;
28 this.endOfChunkHit = false;
29 this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
30 }
31
32 _err() {
33 // NOTE: err reporting is noop by default. Enabled by mixin.
34 }
35
36 _addGap() {
37 this.gapStack.push(this.lastGapPos);
38 this.lastGapPos = this.pos;
39 }
40
41 _processSurrogate(cp) {
42 //NOTE: try to peek a surrogate pair
43 if (this.pos !== this.lastCharPos) {
44 const nextCp = this.html.charCodeAt(this.pos + 1);
45
46 if (unicode.isSurrogatePair(nextCp)) {
47 //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
48 this.pos++;
49
50 //NOTE: add gap that should be avoided during retreat
51 this._addGap();
52
53 return unicode.getSurrogatePairCodePoint(cp, nextCp);
54 }
55 }
56
57 //NOTE: we are at the end of a chunk, therefore we can't infer surrogate pair yet.
58 else if (!this.lastChunkWritten) {
59 this.endOfChunkHit = true;
60 return $.EOF;
61 }
62
63 //NOTE: isolated surrogate
64 this._err(ERR.surrogateInInputStream);
65
66 return cp;
67 }
68
69 dropParsedChunk() {
70 if (this.pos > this.bufferWaterline) {
71 this.lastCharPos -= this.pos;
72 this.html = this.html.substring(this.pos);
73 this.pos = 0;
74 this.lastGapPos = -1;
75 this.gapStack = [];
76 }
77 }
78
79 write(chunk, isLastChunk) {
80 if (this.html) {
81 this.html += chunk;
82 } else {
83 this.html = chunk;
84 }
85
86 this.lastCharPos = this.html.length - 1;
87 this.endOfChunkHit = false;
88 this.lastChunkWritten = isLastChunk;
89 }
90
91 insertHtmlAtCurrentPos(chunk) {
92 this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1, this.html.length);
93
94 this.lastCharPos = this.html.length - 1;
95 this.endOfChunkHit = false;
96 }
97
98 advance() {
99 this.pos++;
100
101 if (this.pos > this.lastCharPos) {
102 this.endOfChunkHit = !this.lastChunkWritten;
103 return $.EOF;
104 }
105
106 let cp = this.html.charCodeAt(this.pos);
107
108 //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
109 //must be ignored.
110 if (this.skipNextNewLine && cp === $.LINE_FEED) {
111 this.skipNextNewLine = false;
112 this._addGap();
113 return this.advance();
114 }
115
116 //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
117 if (cp === $.CARRIAGE_RETURN) {
118 this.skipNextNewLine = true;
119 return $.LINE_FEED;
120 }
121
122 this.skipNextNewLine = false;
123
124 if (unicode.isSurrogate(cp)) {
125 cp = this._processSurrogate(cp);
126 }
127
128 //OPTIMIZATION: first check if code point is in the common allowed
129 //range (ASCII alphanumeric, whitespaces, big chunk of BMP)
130 //before going into detailed performance cost validation.
131 const isCommonValidRange =
132 (cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0);
133
134 if (!isCommonValidRange) {
135 this._checkForProblematicCharacters(cp);
136 }
137
138 return cp;
139 }
140
141 _checkForProblematicCharacters(cp) {
142 if (unicode.isControlCodePoint(cp)) {
143 this._err(ERR.controlCharacterInInputStream);
144 } else if (unicode.isUndefinedCodePoint(cp)) {
145 this._err(ERR.noncharacterInInputStream);
146 }
147 }
148
149 retreat() {
150 if (this.pos === this.lastGapPos) {
151 this.lastGapPos = this.gapStack.pop();
152 this.pos--;
153 }
154
155 this.pos--;
156 }
157}
158
159module.exports = Preprocessor;
Note: See TracBrowser for help on using the repository browser.