source: imaps-frontend/node_modules/css-tree/lib/tokenizer/utils.js@ 79a0317

main
Last change on this file since 79a0317 was d565449, checked in by stefan toskovski <stefantoska84@…>, 3 months ago

Update repo after prototype presentation

  • Property mode set to 100644
File size: 7.2 KB
RevLine 
[d565449]1var charCodeDef = require('./char-code-definitions');
2var isDigit = charCodeDef.isDigit;
3var isHexDigit = charCodeDef.isHexDigit;
4var isUppercaseLetter = charCodeDef.isUppercaseLetter;
5var isName = charCodeDef.isName;
6var isWhiteSpace = charCodeDef.isWhiteSpace;
7var isValidEscape = charCodeDef.isValidEscape;
8
9function getCharCode(source, offset) {
10 return offset < source.length ? source.charCodeAt(offset) : 0;
11}
12
13function getNewlineLength(source, offset, code) {
14 if (code === 13 /* \r */ && getCharCode(source, offset + 1) === 10 /* \n */) {
15 return 2;
16 }
17
18 return 1;
19}
20
21function cmpChar(testStr, offset, referenceCode) {
22 var code = testStr.charCodeAt(offset);
23
24 // code.toLowerCase() for A..Z
25 if (isUppercaseLetter(code)) {
26 code = code | 32;
27 }
28
29 return code === referenceCode;
30}
31
32function cmpStr(testStr, start, end, referenceStr) {
33 if (end - start !== referenceStr.length) {
34 return false;
35 }
36
37 if (start < 0 || end > testStr.length) {
38 return false;
39 }
40
41 for (var i = start; i < end; i++) {
42 var testCode = testStr.charCodeAt(i);
43 var referenceCode = referenceStr.charCodeAt(i - start);
44
45 // testCode.toLowerCase() for A..Z
46 if (isUppercaseLetter(testCode)) {
47 testCode = testCode | 32;
48 }
49
50 if (testCode !== referenceCode) {
51 return false;
52 }
53 }
54
55 return true;
56}
57
58function findWhiteSpaceStart(source, offset) {
59 for (; offset >= 0; offset--) {
60 if (!isWhiteSpace(source.charCodeAt(offset))) {
61 break;
62 }
63 }
64
65 return offset + 1;
66}
67
68function findWhiteSpaceEnd(source, offset) {
69 for (; offset < source.length; offset++) {
70 if (!isWhiteSpace(source.charCodeAt(offset))) {
71 break;
72 }
73 }
74
75 return offset;
76}
77
78function findDecimalNumberEnd(source, offset) {
79 for (; offset < source.length; offset++) {
80 if (!isDigit(source.charCodeAt(offset))) {
81 break;
82 }
83 }
84
85 return offset;
86}
87
88// § 4.3.7. Consume an escaped code point
89function consumeEscaped(source, offset) {
90 // It assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed and
91 // that the next input code point has already been verified to be part of a valid escape.
92 offset += 2;
93
94 // hex digit
95 if (isHexDigit(getCharCode(source, offset - 1))) {
96 // Consume as many hex digits as possible, but no more than 5.
97 // Note that this means 1-6 hex digits have been consumed in total.
98 for (var maxOffset = Math.min(source.length, offset + 5); offset < maxOffset; offset++) {
99 if (!isHexDigit(getCharCode(source, offset))) {
100 break;
101 }
102 }
103
104 // If the next input code point is whitespace, consume it as well.
105 var code = getCharCode(source, offset);
106 if (isWhiteSpace(code)) {
107 offset += getNewlineLength(source, offset, code);
108 }
109 }
110
111 return offset;
112}
113
114// §4.3.11. Consume a name
115// Note: This algorithm does not do the verification of the first few code points that are necessary
116// to ensure the returned code points would constitute an <ident-token>. If that is the intended use,
117// ensure that the stream starts with an identifier before calling this algorithm.
118function consumeName(source, offset) {
119 // Let result initially be an empty string.
120 // Repeatedly consume the next input code point from the stream:
121 for (; offset < source.length; offset++) {
122 var code = source.charCodeAt(offset);
123
124 // name code point
125 if (isName(code)) {
126 // Append the code point to result.
127 continue;
128 }
129
130 // the stream starts with a valid escape
131 if (isValidEscape(code, getCharCode(source, offset + 1))) {
132 // Consume an escaped code point. Append the returned code point to result.
133 offset = consumeEscaped(source, offset) - 1;
134 continue;
135 }
136
137 // anything else
138 // Reconsume the current input code point. Return result.
139 break;
140 }
141
142 return offset;
143}
144
145// §4.3.12. Consume a number
146function consumeNumber(source, offset) {
147 var code = source.charCodeAt(offset);
148
149 // 2. If the next input code point is U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-),
150 // consume it and append it to repr.
151 if (code === 0x002B || code === 0x002D) {
152 code = source.charCodeAt(offset += 1);
153 }
154
155 // 3. While the next input code point is a digit, consume it and append it to repr.
156 if (isDigit(code)) {
157 offset = findDecimalNumberEnd(source, offset + 1);
158 code = source.charCodeAt(offset);
159 }
160
161 // 4. If the next 2 input code points are U+002E FULL STOP (.) followed by a digit, then:
162 if (code === 0x002E && isDigit(source.charCodeAt(offset + 1))) {
163 // 4.1 Consume them.
164 // 4.2 Append them to repr.
165 code = source.charCodeAt(offset += 2);
166
167 // 4.3 Set type to "number".
168 // TODO
169
170 // 4.4 While the next input code point is a digit, consume it and append it to repr.
171
172 offset = findDecimalNumberEnd(source, offset);
173 }
174
175 // 5. If the next 2 or 3 input code points are U+0045 LATIN CAPITAL LETTER E (E)
176 // or U+0065 LATIN SMALL LETTER E (e), ... , followed by a digit, then:
177 if (cmpChar(source, offset, 101 /* e */)) {
178 var sign = 0;
179 code = source.charCodeAt(offset + 1);
180
181 // ... optionally followed by U+002D HYPHEN-MINUS (-) or U+002B PLUS SIGN (+) ...
182 if (code === 0x002D || code === 0x002B) {
183 sign = 1;
184 code = source.charCodeAt(offset + 2);
185 }
186
187 // ... followed by a digit
188 if (isDigit(code)) {
189 // 5.1 Consume them.
190 // 5.2 Append them to repr.
191
192 // 5.3 Set type to "number".
193 // TODO
194
195 // 5.4 While the next input code point is a digit, consume it and append it to repr.
196 offset = findDecimalNumberEnd(source, offset + 1 + sign + 1);
197 }
198 }
199
200 return offset;
201}
202
203// § 4.3.14. Consume the remnants of a bad url
204// ... its sole use is to consume enough of the input stream to reach a recovery point
205// where normal tokenizing can resume.
206function consumeBadUrlRemnants(source, offset) {
207 // Repeatedly consume the next input code point from the stream:
208 for (; offset < source.length; offset++) {
209 var code = source.charCodeAt(offset);
210
211 // U+0029 RIGHT PARENTHESIS ())
212 // EOF
213 if (code === 0x0029) {
214 // Return.
215 offset++;
216 break;
217 }
218
219 if (isValidEscape(code, getCharCode(source, offset + 1))) {
220 // Consume an escaped code point.
221 // Note: This allows an escaped right parenthesis ("\)") to be encountered
222 // without ending the <bad-url-token>. This is otherwise identical to
223 // the "anything else" clause.
224 offset = consumeEscaped(source, offset);
225 }
226 }
227
228 return offset;
229}
230
231module.exports = {
232 consumeEscaped: consumeEscaped,
233 consumeName: consumeName,
234 consumeNumber: consumeNumber,
235 consumeBadUrlRemnants: consumeBadUrlRemnants,
236
237 cmpChar: cmpChar,
238 cmpStr: cmpStr,
239
240 getNewlineLength: getNewlineLength,
241 findWhiteSpaceStart: findWhiteSpaceStart,
242 findWhiteSpaceEnd: findWhiteSpaceEnd
243};
Note: See TracBrowser for help on using the repository browser.