[d565449] | 1 | /**
|
---|
| 2 | * @author Toru Nagashima <https://github.com/mysticatea>
|
---|
| 3 | */
|
---|
| 4 | "use strict";
|
---|
| 5 |
|
---|
| 6 | const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
|
---|
| 7 | const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
|
---|
| 8 | const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
|
---|
| 9 | const astUtils = require("./utils/ast-utils.js");
|
---|
| 10 | const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
|
---|
| 11 |
|
---|
| 12 | //------------------------------------------------------------------------------
|
---|
| 13 | // Helpers
|
---|
| 14 | //------------------------------------------------------------------------------
|
---|
| 15 |
|
---|
| 16 | /**
|
---|
| 17 | * @typedef {import('@eslint-community/regexpp').AST.Character} Character
|
---|
| 18 | * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
|
---|
| 19 | */
|
---|
| 20 |
|
---|
| 21 | /**
|
---|
| 22 | * Iterate character sequences of a given nodes.
|
---|
| 23 | *
|
---|
| 24 | * CharacterClassRange syntax can steal a part of character sequence,
|
---|
| 25 | * so this function reverts CharacterClassRange syntax and restore the sequence.
|
---|
| 26 | * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
|
---|
| 27 | * @returns {IterableIterator<Character[]>} The list of character sequences.
|
---|
| 28 | */
|
---|
| 29 | function *iterateCharacterSequence(nodes) {
|
---|
| 30 |
|
---|
| 31 | /** @type {Character[]} */
|
---|
| 32 | let seq = [];
|
---|
| 33 |
|
---|
| 34 | for (const node of nodes) {
|
---|
| 35 | switch (node.type) {
|
---|
| 36 | case "Character":
|
---|
| 37 | seq.push(node);
|
---|
| 38 | break;
|
---|
| 39 |
|
---|
| 40 | case "CharacterClassRange":
|
---|
| 41 | seq.push(node.min);
|
---|
| 42 | yield seq;
|
---|
| 43 | seq = [node.max];
|
---|
| 44 | break;
|
---|
| 45 |
|
---|
| 46 | case "CharacterSet":
|
---|
| 47 | case "CharacterClass": // [[]] nesting character class
|
---|
| 48 | case "ClassStringDisjunction": // \q{...}
|
---|
| 49 | case "ExpressionCharacterClass": // [A--B]
|
---|
| 50 | if (seq.length > 0) {
|
---|
| 51 | yield seq;
|
---|
| 52 | seq = [];
|
---|
| 53 | }
|
---|
| 54 | break;
|
---|
| 55 |
|
---|
| 56 | // no default
|
---|
| 57 | }
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | if (seq.length > 0) {
|
---|
| 61 | yield seq;
|
---|
| 62 | }
|
---|
| 63 | }
|
---|
| 64 |
|
---|
| 65 |
|
---|
| 66 | /**
|
---|
| 67 | * Checks whether the given character node is a Unicode code point escape or not.
|
---|
| 68 | * @param {Character} char the character node to check.
|
---|
| 69 | * @returns {boolean} `true` if the character node is a Unicode code point escape.
|
---|
| 70 | */
|
---|
| 71 | function isUnicodeCodePointEscape(char) {
|
---|
| 72 | return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | /**
|
---|
| 76 | * Each function returns `true` if it detects that kind of problem.
|
---|
| 77 | * @type {Record<string, (chars: Character[]) => boolean>}
|
---|
| 78 | */
|
---|
| 79 | const hasCharacterSequence = {
|
---|
| 80 | surrogatePairWithoutUFlag(chars) {
|
---|
| 81 | return chars.some((c, i) => {
|
---|
| 82 | if (i === 0) {
|
---|
| 83 | return false;
|
---|
| 84 | }
|
---|
| 85 | const c1 = chars[i - 1];
|
---|
| 86 |
|
---|
| 87 | return (
|
---|
| 88 | isSurrogatePair(c1.value, c.value) &&
|
---|
| 89 | !isUnicodeCodePointEscape(c1) &&
|
---|
| 90 | !isUnicodeCodePointEscape(c)
|
---|
| 91 | );
|
---|
| 92 | });
|
---|
| 93 | },
|
---|
| 94 |
|
---|
| 95 | surrogatePair(chars) {
|
---|
| 96 | return chars.some((c, i) => {
|
---|
| 97 | if (i === 0) {
|
---|
| 98 | return false;
|
---|
| 99 | }
|
---|
| 100 | const c1 = chars[i - 1];
|
---|
| 101 |
|
---|
| 102 | return (
|
---|
| 103 | isSurrogatePair(c1.value, c.value) &&
|
---|
| 104 | (
|
---|
| 105 | isUnicodeCodePointEscape(c1) ||
|
---|
| 106 | isUnicodeCodePointEscape(c)
|
---|
| 107 | )
|
---|
| 108 | );
|
---|
| 109 | });
|
---|
| 110 | },
|
---|
| 111 |
|
---|
| 112 | combiningClass(chars) {
|
---|
| 113 | return chars.some((c, i) => (
|
---|
| 114 | i !== 0 &&
|
---|
| 115 | isCombiningCharacter(c.value) &&
|
---|
| 116 | !isCombiningCharacter(chars[i - 1].value)
|
---|
| 117 | ));
|
---|
| 118 | },
|
---|
| 119 |
|
---|
| 120 | emojiModifier(chars) {
|
---|
| 121 | return chars.some((c, i) => (
|
---|
| 122 | i !== 0 &&
|
---|
| 123 | isEmojiModifier(c.value) &&
|
---|
| 124 | !isEmojiModifier(chars[i - 1].value)
|
---|
| 125 | ));
|
---|
| 126 | },
|
---|
| 127 |
|
---|
| 128 | regionalIndicatorSymbol(chars) {
|
---|
| 129 | return chars.some((c, i) => (
|
---|
| 130 | i !== 0 &&
|
---|
| 131 | isRegionalIndicatorSymbol(c.value) &&
|
---|
| 132 | isRegionalIndicatorSymbol(chars[i - 1].value)
|
---|
| 133 | ));
|
---|
| 134 | },
|
---|
| 135 |
|
---|
| 136 | zwj(chars) {
|
---|
| 137 | const lastIndex = chars.length - 1;
|
---|
| 138 |
|
---|
| 139 | return chars.some((c, i) => (
|
---|
| 140 | i !== 0 &&
|
---|
| 141 | i !== lastIndex &&
|
---|
| 142 | c.value === 0x200d &&
|
---|
| 143 | chars[i - 1].value !== 0x200d &&
|
---|
| 144 | chars[i + 1].value !== 0x200d
|
---|
| 145 | ));
|
---|
| 146 | }
|
---|
| 147 | };
|
---|
| 148 |
|
---|
| 149 | const kinds = Object.keys(hasCharacterSequence);
|
---|
| 150 |
|
---|
| 151 | //------------------------------------------------------------------------------
|
---|
| 152 | // Rule Definition
|
---|
| 153 | //------------------------------------------------------------------------------
|
---|
| 154 |
|
---|
| 155 | /** @type {import('../shared/types').Rule} */
|
---|
| 156 | module.exports = {
|
---|
| 157 | meta: {
|
---|
| 158 | type: "problem",
|
---|
| 159 |
|
---|
| 160 | docs: {
|
---|
| 161 | description: "Disallow characters which are made with multiple code points in character class syntax",
|
---|
| 162 | recommended: true,
|
---|
| 163 | url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
|
---|
| 164 | },
|
---|
| 165 |
|
---|
| 166 | hasSuggestions: true,
|
---|
| 167 |
|
---|
| 168 | schema: [],
|
---|
| 169 |
|
---|
| 170 | messages: {
|
---|
| 171 | surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
|
---|
| 172 | surrogatePair: "Unexpected surrogate pair in character class.",
|
---|
| 173 | combiningClass: "Unexpected combined character in character class.",
|
---|
| 174 | emojiModifier: "Unexpected modified Emoji in character class.",
|
---|
| 175 | regionalIndicatorSymbol: "Unexpected national flag in character class.",
|
---|
| 176 | zwj: "Unexpected joined character sequence in character class.",
|
---|
| 177 | suggestUnicodeFlag: "Add unicode 'u' flag to regex."
|
---|
| 178 | }
|
---|
| 179 | },
|
---|
| 180 | create(context) {
|
---|
| 181 | const sourceCode = context.sourceCode;
|
---|
| 182 | const parser = new RegExpParser();
|
---|
| 183 |
|
---|
| 184 | /**
|
---|
| 185 | * Verify a given regular expression.
|
---|
| 186 | * @param {Node} node The node to report.
|
---|
| 187 | * @param {string} pattern The regular expression pattern to verify.
|
---|
| 188 | * @param {string} flags The flags of the regular expression.
|
---|
| 189 | * @param {Function} unicodeFixer Fixer for missing "u" flag.
|
---|
| 190 | * @returns {void}
|
---|
| 191 | */
|
---|
| 192 | function verify(node, pattern, flags, unicodeFixer) {
|
---|
| 193 | let patternNode;
|
---|
| 194 |
|
---|
| 195 | try {
|
---|
| 196 | patternNode = parser.parsePattern(
|
---|
| 197 | pattern,
|
---|
| 198 | 0,
|
---|
| 199 | pattern.length,
|
---|
| 200 | {
|
---|
| 201 | unicode: flags.includes("u"),
|
---|
| 202 | unicodeSets: flags.includes("v")
|
---|
| 203 | }
|
---|
| 204 | );
|
---|
| 205 | } catch {
|
---|
| 206 |
|
---|
| 207 | // Ignore regular expressions with syntax errors
|
---|
| 208 | return;
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | const foundKinds = new Set();
|
---|
| 212 |
|
---|
| 213 | visitRegExpAST(patternNode, {
|
---|
| 214 | onCharacterClassEnter(ccNode) {
|
---|
| 215 | for (const chars of iterateCharacterSequence(ccNode.elements)) {
|
---|
| 216 | for (const kind of kinds) {
|
---|
| 217 | if (hasCharacterSequence[kind](chars)) {
|
---|
| 218 | foundKinds.add(kind);
|
---|
| 219 | }
|
---|
| 220 | }
|
---|
| 221 | }
|
---|
| 222 | }
|
---|
| 223 | });
|
---|
| 224 |
|
---|
| 225 | for (const kind of foundKinds) {
|
---|
| 226 | let suggest;
|
---|
| 227 |
|
---|
| 228 | if (kind === "surrogatePairWithoutUFlag") {
|
---|
| 229 | suggest = [{
|
---|
| 230 | messageId: "suggestUnicodeFlag",
|
---|
| 231 | fix: unicodeFixer
|
---|
| 232 | }];
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | context.report({
|
---|
| 236 | node,
|
---|
| 237 | messageId: kind,
|
---|
| 238 | suggest
|
---|
| 239 | });
|
---|
| 240 | }
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | return {
|
---|
| 244 | "Literal[regex]"(node) {
|
---|
| 245 | verify(node, node.regex.pattern, node.regex.flags, fixer => {
|
---|
| 246 | if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
|
---|
| 247 | return null;
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | return fixer.insertTextAfter(node, "u");
|
---|
| 251 | });
|
---|
| 252 | },
|
---|
| 253 | "Program"(node) {
|
---|
| 254 | const scope = sourceCode.getScope(node);
|
---|
| 255 | const tracker = new ReferenceTracker(scope);
|
---|
| 256 |
|
---|
| 257 | /*
|
---|
| 258 | * Iterate calls of RegExp.
|
---|
| 259 | * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
|
---|
| 260 | * `const {RegExp: a} = window; new a()`, etc...
|
---|
| 261 | */
|
---|
| 262 | for (const { node: refNode } of tracker.iterateGlobalReferences({
|
---|
| 263 | RegExp: { [CALL]: true, [CONSTRUCT]: true }
|
---|
| 264 | })) {
|
---|
| 265 | const [patternNode, flagsNode] = refNode.arguments;
|
---|
| 266 | const pattern = getStringIfConstant(patternNode, scope);
|
---|
| 267 | const flags = getStringIfConstant(flagsNode, scope);
|
---|
| 268 |
|
---|
| 269 | if (typeof pattern === "string") {
|
---|
| 270 | verify(refNode, pattern, flags || "", fixer => {
|
---|
| 271 |
|
---|
| 272 | if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
|
---|
| 273 | return null;
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | if (refNode.arguments.length === 1) {
|
---|
| 277 | const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
|
---|
| 278 |
|
---|
| 279 | return fixer.insertTextAfter(
|
---|
| 280 | penultimateToken,
|
---|
| 281 | astUtils.isCommaToken(penultimateToken)
|
---|
| 282 | ? ' "u",'
|
---|
| 283 | : ', "u"'
|
---|
| 284 | );
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 | if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
|
---|
| 288 | const range = [flagsNode.range[0], flagsNode.range[1] - 1];
|
---|
| 289 |
|
---|
| 290 | return fixer.insertTextAfterRange(range, "u");
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | return null;
|
---|
| 294 | });
|
---|
| 295 | }
|
---|
| 296 | }
|
---|
| 297 | }
|
---|
| 298 | };
|
---|
| 299 | }
|
---|
| 300 | };
|
---|