1 | var EOF = 0;
|
---|
2 |
|
---|
3 | // https://drafts.csswg.org/css-syntax-3/
|
---|
4 | // § 4.2. Definitions
|
---|
5 |
|
---|
6 | // digit
|
---|
7 | // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
|
---|
8 | function isDigit(code) {
|
---|
9 | return code >= 0x0030 && code <= 0x0039;
|
---|
10 | }
|
---|
11 |
|
---|
12 | // hex digit
|
---|
13 | // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
|
---|
14 | // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
|
---|
15 | function isHexDigit(code) {
|
---|
16 | return (
|
---|
17 | isDigit(code) || // 0 .. 9
|
---|
18 | (code >= 0x0041 && code <= 0x0046) || // A .. F
|
---|
19 | (code >= 0x0061 && code <= 0x0066) // a .. f
|
---|
20 | );
|
---|
21 | }
|
---|
22 |
|
---|
23 | // uppercase letter
|
---|
24 | // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
|
---|
25 | function isUppercaseLetter(code) {
|
---|
26 | return code >= 0x0041 && code <= 0x005A;
|
---|
27 | }
|
---|
28 |
|
---|
29 | // lowercase letter
|
---|
30 | // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
|
---|
31 | function isLowercaseLetter(code) {
|
---|
32 | return code >= 0x0061 && code <= 0x007A;
|
---|
33 | }
|
---|
34 |
|
---|
35 | // letter
|
---|
36 | // An uppercase letter or a lowercase letter.
|
---|
37 | function isLetter(code) {
|
---|
38 | return isUppercaseLetter(code) || isLowercaseLetter(code);
|
---|
39 | }
|
---|
40 |
|
---|
41 | // non-ASCII code point
|
---|
42 | // A code point with a value equal to or greater than U+0080 <control>.
|
---|
43 | function isNonAscii(code) {
|
---|
44 | return code >= 0x0080;
|
---|
45 | }
|
---|
46 |
|
---|
47 | // name-start code point
|
---|
48 | // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
|
---|
49 | function isNameStart(code) {
|
---|
50 | return isLetter(code) || isNonAscii(code) || code === 0x005F;
|
---|
51 | }
|
---|
52 |
|
---|
53 | // name code point
|
---|
54 | // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
|
---|
55 | function isName(code) {
|
---|
56 | return isNameStart(code) || isDigit(code) || code === 0x002D;
|
---|
57 | }
|
---|
58 |
|
---|
59 | // non-printable code point
|
---|
60 | // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
|
---|
61 | // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
|
---|
62 | function isNonPrintable(code) {
|
---|
63 | return (
|
---|
64 | (code >= 0x0000 && code <= 0x0008) ||
|
---|
65 | (code === 0x000B) ||
|
---|
66 | (code >= 0x000E && code <= 0x001F) ||
|
---|
67 | (code === 0x007F)
|
---|
68 | );
|
---|
69 | }
|
---|
70 |
|
---|
71 | // newline
|
---|
72 | // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
|
---|
73 | // as they are converted to U+000A LINE FEED during preprocessing.
|
---|
74 | // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
|
---|
75 | function isNewline(code) {
|
---|
76 | return code === 0x000A || code === 0x000D || code === 0x000C;
|
---|
77 | }
|
---|
78 |
|
---|
79 | // whitespace
|
---|
80 | // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
|
---|
81 | function isWhiteSpace(code) {
|
---|
82 | return isNewline(code) || code === 0x0020 || code === 0x0009;
|
---|
83 | }
|
---|
84 |
|
---|
85 | // § 4.3.8. Check if two code points are a valid escape
|
---|
86 | function isValidEscape(first, second) {
|
---|
87 | // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
|
---|
88 | if (first !== 0x005C) {
|
---|
89 | return false;
|
---|
90 | }
|
---|
91 |
|
---|
92 | // Otherwise, if the second code point is a newline or EOF, return false.
|
---|
93 | if (isNewline(second) || second === EOF) {
|
---|
94 | return false;
|
---|
95 | }
|
---|
96 |
|
---|
97 | // Otherwise, return true.
|
---|
98 | return true;
|
---|
99 | }
|
---|
100 |
|
---|
101 | // § 4.3.9. Check if three code points would start an identifier
|
---|
102 | function isIdentifierStart(first, second, third) {
|
---|
103 | // Look at the first code point:
|
---|
104 |
|
---|
105 | // U+002D HYPHEN-MINUS
|
---|
106 | if (first === 0x002D) {
|
---|
107 | // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
|
---|
108 | // or the second and third code points are a valid escape, return true. Otherwise, return false.
|
---|
109 | return (
|
---|
110 | isNameStart(second) ||
|
---|
111 | second === 0x002D ||
|
---|
112 | isValidEscape(second, third)
|
---|
113 | );
|
---|
114 | }
|
---|
115 |
|
---|
116 | // name-start code point
|
---|
117 | if (isNameStart(first)) {
|
---|
118 | // Return true.
|
---|
119 | return true;
|
---|
120 | }
|
---|
121 |
|
---|
122 | // U+005C REVERSE SOLIDUS (\)
|
---|
123 | if (first === 0x005C) {
|
---|
124 | // If the first and second code points are a valid escape, return true. Otherwise, return false.
|
---|
125 | return isValidEscape(first, second);
|
---|
126 | }
|
---|
127 |
|
---|
128 | // anything else
|
---|
129 | // Return false.
|
---|
130 | return false;
|
---|
131 | }
|
---|
132 |
|
---|
133 | // § 4.3.10. Check if three code points would start a number
|
---|
134 | function isNumberStart(first, second, third) {
|
---|
135 | // Look at the first code point:
|
---|
136 |
|
---|
137 | // U+002B PLUS SIGN (+)
|
---|
138 | // U+002D HYPHEN-MINUS (-)
|
---|
139 | if (first === 0x002B || first === 0x002D) {
|
---|
140 | // If the second code point is a digit, return true.
|
---|
141 | if (isDigit(second)) {
|
---|
142 | return 2;
|
---|
143 | }
|
---|
144 |
|
---|
145 | // Otherwise, if the second code point is a U+002E FULL STOP (.)
|
---|
146 | // and the third code point is a digit, return true.
|
---|
147 | // Otherwise, return false.
|
---|
148 | return second === 0x002E && isDigit(third) ? 3 : 0;
|
---|
149 | }
|
---|
150 |
|
---|
151 | // U+002E FULL STOP (.)
|
---|
152 | if (first === 0x002E) {
|
---|
153 | // If the second code point is a digit, return true. Otherwise, return false.
|
---|
154 | return isDigit(second) ? 2 : 0;
|
---|
155 | }
|
---|
156 |
|
---|
157 | // digit
|
---|
158 | if (isDigit(first)) {
|
---|
159 | // Return true.
|
---|
160 | return 1;
|
---|
161 | }
|
---|
162 |
|
---|
163 | // anything else
|
---|
164 | // Return false.
|
---|
165 | return 0;
|
---|
166 | }
|
---|
167 |
|
---|
168 | //
|
---|
169 | // Misc
|
---|
170 | //
|
---|
171 |
|
---|
172 | // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
|
---|
173 | function isBOM(code) {
|
---|
174 | // UTF-16BE
|
---|
175 | if (code === 0xFEFF) {
|
---|
176 | return 1;
|
---|
177 | }
|
---|
178 |
|
---|
179 | // UTF-16LE
|
---|
180 | if (code === 0xFFFE) {
|
---|
181 | return 1;
|
---|
182 | }
|
---|
183 |
|
---|
184 | return 0;
|
---|
185 | }
|
---|
186 |
|
---|
187 | // Fast code category
|
---|
188 | //
|
---|
189 | // https://drafts.csswg.org/css-syntax/#tokenizer-definitions
|
---|
190 | // > non-ASCII code point
|
---|
191 | // > A code point with a value equal to or greater than U+0080 <control>
|
---|
192 | // > name-start code point
|
---|
193 | // > A letter, a non-ASCII code point, or U+005F LOW LINE (_).
|
---|
194 | // > name code point
|
---|
195 | // > A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
|
---|
196 | // That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
|
---|
197 | var CATEGORY = new Array(0x80);
|
---|
198 | charCodeCategory.Eof = 0x80;
|
---|
199 | charCodeCategory.WhiteSpace = 0x82;
|
---|
200 | charCodeCategory.Digit = 0x83;
|
---|
201 | charCodeCategory.NameStart = 0x84;
|
---|
202 | charCodeCategory.NonPrintable = 0x85;
|
---|
203 |
|
---|
204 | for (var i = 0; i < CATEGORY.length; i++) {
|
---|
205 | switch (true) {
|
---|
206 | case isWhiteSpace(i):
|
---|
207 | CATEGORY[i] = charCodeCategory.WhiteSpace;
|
---|
208 | break;
|
---|
209 |
|
---|
210 | case isDigit(i):
|
---|
211 | CATEGORY[i] = charCodeCategory.Digit;
|
---|
212 | break;
|
---|
213 |
|
---|
214 | case isNameStart(i):
|
---|
215 | CATEGORY[i] = charCodeCategory.NameStart;
|
---|
216 | break;
|
---|
217 |
|
---|
218 | case isNonPrintable(i):
|
---|
219 | CATEGORY[i] = charCodeCategory.NonPrintable;
|
---|
220 | break;
|
---|
221 |
|
---|
222 | default:
|
---|
223 | CATEGORY[i] = i || charCodeCategory.Eof;
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | function charCodeCategory(code) {
|
---|
228 | return code < 0x80 ? CATEGORY[code] : charCodeCategory.NameStart;
|
---|
229 | };
|
---|
230 |
|
---|
231 | module.exports = {
|
---|
232 | isDigit: isDigit,
|
---|
233 | isHexDigit: isHexDigit,
|
---|
234 | isUppercaseLetter: isUppercaseLetter,
|
---|
235 | isLowercaseLetter: isLowercaseLetter,
|
---|
236 | isLetter: isLetter,
|
---|
237 | isNonAscii: isNonAscii,
|
---|
238 | isNameStart: isNameStart,
|
---|
239 | isName: isName,
|
---|
240 | isNonPrintable: isNonPrintable,
|
---|
241 | isNewline: isNewline,
|
---|
242 | isWhiteSpace: isWhiteSpace,
|
---|
243 | isValidEscape: isValidEscape,
|
---|
244 | isIdentifierStart: isIdentifierStart,
|
---|
245 | isNumberStart: isNumberStart,
|
---|
246 |
|
---|
247 | isBOM: isBOM,
|
---|
248 | charCodeCategory: charCodeCategory
|
---|
249 | };
|
---|