[d565449] | 1 | var EOF = 0;
|
---|
| 2 |
|
---|
| 3 | // https://drafts.csswg.org/css-syntax-3/
|
---|
| 4 | // § 4.2. Definitions
|
---|
| 5 |
|
---|
| 6 | // digit
|
---|
| 7 | // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
|
---|
| 8 | function isDigit(code) {
|
---|
| 9 | return code >= 0x0030 && code <= 0x0039;
|
---|
| 10 | }
|
---|
| 11 |
|
---|
| 12 | // hex digit
|
---|
| 13 | // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
|
---|
| 14 | // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
|
---|
| 15 | function isHexDigit(code) {
|
---|
| 16 | return (
|
---|
| 17 | isDigit(code) || // 0 .. 9
|
---|
| 18 | (code >= 0x0041 && code <= 0x0046) || // A .. F
|
---|
| 19 | (code >= 0x0061 && code <= 0x0066) // a .. f
|
---|
| 20 | );
|
---|
| 21 | }
|
---|
| 22 |
|
---|
| 23 | // uppercase letter
|
---|
| 24 | // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
|
---|
| 25 | function isUppercaseLetter(code) {
|
---|
| 26 | return code >= 0x0041 && code <= 0x005A;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | // lowercase letter
|
---|
| 30 | // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
|
---|
| 31 | function isLowercaseLetter(code) {
|
---|
| 32 | return code >= 0x0061 && code <= 0x007A;
|
---|
| 33 | }
|
---|
| 34 |
|
---|
| 35 | // letter
|
---|
| 36 | // An uppercase letter or a lowercase letter.
|
---|
| 37 | function isLetter(code) {
|
---|
| 38 | return isUppercaseLetter(code) || isLowercaseLetter(code);
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | // non-ASCII code point
|
---|
| 42 | // A code point with a value equal to or greater than U+0080 <control>.
|
---|
| 43 | function isNonAscii(code) {
|
---|
| 44 | return code >= 0x0080;
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | // name-start code point
|
---|
| 48 | // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
|
---|
| 49 | function isNameStart(code) {
|
---|
| 50 | return isLetter(code) || isNonAscii(code) || code === 0x005F;
|
---|
| 51 | }
|
---|
| 52 |
|
---|
| 53 | // name code point
|
---|
| 54 | // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
|
---|
| 55 | function isName(code) {
|
---|
| 56 | return isNameStart(code) || isDigit(code) || code === 0x002D;
|
---|
| 57 | }
|
---|
| 58 |
|
---|
| 59 | // non-printable code point
|
---|
| 60 | // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
|
---|
| 61 | // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
|
---|
| 62 | function isNonPrintable(code) {
|
---|
| 63 | return (
|
---|
| 64 | (code >= 0x0000 && code <= 0x0008) ||
|
---|
| 65 | (code === 0x000B) ||
|
---|
| 66 | (code >= 0x000E && code <= 0x001F) ||
|
---|
| 67 | (code === 0x007F)
|
---|
| 68 | );
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 | // newline
|
---|
| 72 | // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
|
---|
| 73 | // as they are converted to U+000A LINE FEED during preprocessing.
|
---|
| 74 | // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
|
---|
| 75 | function isNewline(code) {
|
---|
| 76 | return code === 0x000A || code === 0x000D || code === 0x000C;
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | // whitespace
|
---|
| 80 | // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
|
---|
| 81 | function isWhiteSpace(code) {
|
---|
| 82 | return isNewline(code) || code === 0x0020 || code === 0x0009;
|
---|
| 83 | }
|
---|
| 84 |
|
---|
| 85 | // § 4.3.8. Check if two code points are a valid escape
|
---|
| 86 | function isValidEscape(first, second) {
|
---|
| 87 | // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
|
---|
| 88 | if (first !== 0x005C) {
|
---|
| 89 | return false;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | // Otherwise, if the second code point is a newline or EOF, return false.
|
---|
| 93 | if (isNewline(second) || second === EOF) {
|
---|
| 94 | return false;
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | // Otherwise, return true.
|
---|
| 98 | return true;
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | // § 4.3.9. Check if three code points would start an identifier
|
---|
| 102 | function isIdentifierStart(first, second, third) {
|
---|
| 103 | // Look at the first code point:
|
---|
| 104 |
|
---|
| 105 | // U+002D HYPHEN-MINUS
|
---|
| 106 | if (first === 0x002D) {
|
---|
| 107 | // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
|
---|
| 108 | // or the second and third code points are a valid escape, return true. Otherwise, return false.
|
---|
| 109 | return (
|
---|
| 110 | isNameStart(second) ||
|
---|
| 111 | second === 0x002D ||
|
---|
| 112 | isValidEscape(second, third)
|
---|
| 113 | );
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | // name-start code point
|
---|
| 117 | if (isNameStart(first)) {
|
---|
| 118 | // Return true.
|
---|
| 119 | return true;
|
---|
| 120 | }
|
---|
| 121 |
|
---|
| 122 | // U+005C REVERSE SOLIDUS (\)
|
---|
| 123 | if (first === 0x005C) {
|
---|
| 124 | // If the first and second code points are a valid escape, return true. Otherwise, return false.
|
---|
| 125 | return isValidEscape(first, second);
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 | // anything else
|
---|
| 129 | // Return false.
|
---|
| 130 | return false;
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | // § 4.3.10. Check if three code points would start a number
|
---|
| 134 | function isNumberStart(first, second, third) {
|
---|
| 135 | // Look at the first code point:
|
---|
| 136 |
|
---|
| 137 | // U+002B PLUS SIGN (+)
|
---|
| 138 | // U+002D HYPHEN-MINUS (-)
|
---|
| 139 | if (first === 0x002B || first === 0x002D) {
|
---|
| 140 | // If the second code point is a digit, return true.
|
---|
| 141 | if (isDigit(second)) {
|
---|
| 142 | return 2;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | // Otherwise, if the second code point is a U+002E FULL STOP (.)
|
---|
| 146 | // and the third code point is a digit, return true.
|
---|
| 147 | // Otherwise, return false.
|
---|
| 148 | return second === 0x002E && isDigit(third) ? 3 : 0;
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | // U+002E FULL STOP (.)
|
---|
| 152 | if (first === 0x002E) {
|
---|
| 153 | // If the second code point is a digit, return true. Otherwise, return false.
|
---|
| 154 | return isDigit(second) ? 2 : 0;
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | // digit
|
---|
| 158 | if (isDigit(first)) {
|
---|
| 159 | // Return true.
|
---|
| 160 | return 1;
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | // anything else
|
---|
| 164 | // Return false.
|
---|
| 165 | return 0;
|
---|
| 166 | }
|
---|
| 167 |
|
---|
| 168 | //
|
---|
| 169 | // Misc
|
---|
| 170 | //
|
---|
| 171 |
|
---|
| 172 | // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
|
---|
| 173 | function isBOM(code) {
|
---|
| 174 | // UTF-16BE
|
---|
| 175 | if (code === 0xFEFF) {
|
---|
| 176 | return 1;
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 | // UTF-16LE
|
---|
| 180 | if (code === 0xFFFE) {
|
---|
| 181 | return 1;
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 | return 0;
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | // Fast code category
|
---|
| 188 | //
|
---|
| 189 | // https://drafts.csswg.org/css-syntax/#tokenizer-definitions
|
---|
| 190 | // > non-ASCII code point
|
---|
| 191 | // > A code point with a value equal to or greater than U+0080 <control>
|
---|
| 192 | // > name-start code point
|
---|
| 193 | // > A letter, a non-ASCII code point, or U+005F LOW LINE (_).
|
---|
| 194 | // > name code point
|
---|
| 195 | // > A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
|
---|
| 196 | // That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
|
---|
| 197 | var CATEGORY = new Array(0x80);
|
---|
| 198 | charCodeCategory.Eof = 0x80;
|
---|
| 199 | charCodeCategory.WhiteSpace = 0x82;
|
---|
| 200 | charCodeCategory.Digit = 0x83;
|
---|
| 201 | charCodeCategory.NameStart = 0x84;
|
---|
| 202 | charCodeCategory.NonPrintable = 0x85;
|
---|
| 203 |
|
---|
| 204 | for (var i = 0; i < CATEGORY.length; i++) {
|
---|
| 205 | switch (true) {
|
---|
| 206 | case isWhiteSpace(i):
|
---|
| 207 | CATEGORY[i] = charCodeCategory.WhiteSpace;
|
---|
| 208 | break;
|
---|
| 209 |
|
---|
| 210 | case isDigit(i):
|
---|
| 211 | CATEGORY[i] = charCodeCategory.Digit;
|
---|
| 212 | break;
|
---|
| 213 |
|
---|
| 214 | case isNameStart(i):
|
---|
| 215 | CATEGORY[i] = charCodeCategory.NameStart;
|
---|
| 216 | break;
|
---|
| 217 |
|
---|
| 218 | case isNonPrintable(i):
|
---|
| 219 | CATEGORY[i] = charCodeCategory.NonPrintable;
|
---|
| 220 | break;
|
---|
| 221 |
|
---|
| 222 | default:
|
---|
| 223 | CATEGORY[i] = i || charCodeCategory.Eof;
|
---|
| 224 | }
|
---|
| 225 | }
|
---|
| 226 |
|
---|
| 227 | function charCodeCategory(code) {
|
---|
| 228 | return code < 0x80 ? CATEGORY[code] : charCodeCategory.NameStart;
|
---|
| 229 | };
|
---|
| 230 |
|
---|
| 231 | module.exports = {
|
---|
| 232 | isDigit: isDigit,
|
---|
| 233 | isHexDigit: isHexDigit,
|
---|
| 234 | isUppercaseLetter: isUppercaseLetter,
|
---|
| 235 | isLowercaseLetter: isLowercaseLetter,
|
---|
| 236 | isLetter: isLetter,
|
---|
| 237 | isNonAscii: isNonAscii,
|
---|
| 238 | isNameStart: isNameStart,
|
---|
| 239 | isName: isName,
|
---|
| 240 | isNonPrintable: isNonPrintable,
|
---|
| 241 | isNewline: isNewline,
|
---|
| 242 | isWhiteSpace: isWhiteSpace,
|
---|
| 243 | isValidEscape: isValidEscape,
|
---|
| 244 | isIdentifierStart: isIdentifierStart,
|
---|
| 245 | isNumberStart: isNumberStart,
|
---|
| 246 |
|
---|
| 247 | isBOM: isBOM,
|
---|
| 248 | charCodeCategory: charCodeCategory
|
---|
| 249 | };
|
---|