[d565449] | 1 | /**
|
---|
| 2 | * @fileoverview Rule to forbid control characters from regular expressions.
|
---|
| 3 | * @author Nicholas C. Zakas
|
---|
| 4 | */
|
---|
| 5 |
|
---|
| 6 | "use strict";
|
---|
| 7 |
|
---|
| 8 | const RegExpValidator = require("@eslint-community/regexpp").RegExpValidator;
|
---|
| 9 | const collector = new (class {
|
---|
| 10 | constructor() {
|
---|
| 11 | this._source = "";
|
---|
| 12 | this._controlChars = [];
|
---|
| 13 | this._validator = new RegExpValidator(this);
|
---|
| 14 | }
|
---|
| 15 |
|
---|
| 16 | onPatternEnter() {
|
---|
| 17 |
|
---|
| 18 | /*
|
---|
| 19 | * `RegExpValidator` may parse the pattern twice in one `validatePattern`.
|
---|
| 20 | * So `this._controlChars` should be cleared here as well.
|
---|
| 21 | *
|
---|
| 22 | * For example, the `/(?<a>\x1f)/` regex will parse the pattern twice.
|
---|
| 23 | * This is based on the content described in Annex B.
|
---|
| 24 | * If the regex contains a `GroupName` and the `u` flag is not used, `ParseText` will be called twice.
|
---|
| 25 | * See https://tc39.es/ecma262/2023/multipage/additional-ecmascript-features-for-web-browsers.html#sec-parsepattern-annexb
|
---|
| 26 | */
|
---|
| 27 | this._controlChars = [];
|
---|
| 28 | }
|
---|
| 29 |
|
---|
| 30 | onCharacter(start, end, cp) {
|
---|
| 31 | if (cp >= 0x00 &&
|
---|
| 32 | cp <= 0x1F &&
|
---|
| 33 | (
|
---|
| 34 | this._source.codePointAt(start) === cp ||
|
---|
| 35 | this._source.slice(start, end).startsWith("\\x") ||
|
---|
| 36 | this._source.slice(start, end).startsWith("\\u")
|
---|
| 37 | )
|
---|
| 38 | ) {
|
---|
| 39 | this._controlChars.push(`\\x${`0${cp.toString(16)}`.slice(-2)}`);
|
---|
| 40 | }
|
---|
| 41 | }
|
---|
| 42 |
|
---|
| 43 | collectControlChars(regexpStr, flags) {
|
---|
| 44 | const uFlag = typeof flags === "string" && flags.includes("u");
|
---|
| 45 | const vFlag = typeof flags === "string" && flags.includes("v");
|
---|
| 46 |
|
---|
| 47 | this._controlChars = [];
|
---|
| 48 | this._source = regexpStr;
|
---|
| 49 |
|
---|
| 50 | try {
|
---|
| 51 | this._validator.validatePattern(regexpStr, void 0, void 0, { unicode: uFlag, unicodeSets: vFlag }); // Call onCharacter hook
|
---|
| 52 | } catch {
|
---|
| 53 |
|
---|
| 54 | // Ignore syntax errors in RegExp.
|
---|
| 55 | }
|
---|
| 56 | return this._controlChars;
|
---|
| 57 | }
|
---|
| 58 | })();
|
---|
| 59 |
|
---|
| 60 | //------------------------------------------------------------------------------
|
---|
| 61 | // Rule Definition
|
---|
| 62 | //------------------------------------------------------------------------------
|
---|
| 63 |
|
---|
| 64 | /** @type {import('../shared/types').Rule} */
|
---|
| 65 | module.exports = {
|
---|
| 66 | meta: {
|
---|
| 67 | type: "problem",
|
---|
| 68 |
|
---|
| 69 | docs: {
|
---|
| 70 | description: "Disallow control characters in regular expressions",
|
---|
| 71 | recommended: true,
|
---|
| 72 | url: "https://eslint.org/docs/latest/rules/no-control-regex"
|
---|
| 73 | },
|
---|
| 74 |
|
---|
| 75 | schema: [],
|
---|
| 76 |
|
---|
| 77 | messages: {
|
---|
| 78 | unexpected: "Unexpected control character(s) in regular expression: {{controlChars}}."
|
---|
| 79 | }
|
---|
| 80 | },
|
---|
| 81 |
|
---|
| 82 | create(context) {
|
---|
| 83 |
|
---|
| 84 | /**
|
---|
| 85 | * Get the regex expression
|
---|
| 86 | * @param {ASTNode} node `Literal` node to evaluate
|
---|
| 87 | * @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
|
---|
| 88 | * or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
|
---|
| 89 | * the `flags` property will be `null`.
|
---|
| 90 | * @private
|
---|
| 91 | */
|
---|
| 92 | function getRegExp(node) {
|
---|
| 93 | if (node.regex) {
|
---|
| 94 | return node.regex;
|
---|
| 95 | }
|
---|
| 96 | if (typeof node.value === "string" &&
|
---|
| 97 | (node.parent.type === "NewExpression" || node.parent.type === "CallExpression") &&
|
---|
| 98 | node.parent.callee.type === "Identifier" &&
|
---|
| 99 | node.parent.callee.name === "RegExp" &&
|
---|
| 100 | node.parent.arguments[0] === node
|
---|
| 101 | ) {
|
---|
| 102 | const pattern = node.value;
|
---|
| 103 | const flags =
|
---|
| 104 | node.parent.arguments.length > 1 &&
|
---|
| 105 | node.parent.arguments[1].type === "Literal" &&
|
---|
| 106 | typeof node.parent.arguments[1].value === "string"
|
---|
| 107 | ? node.parent.arguments[1].value
|
---|
| 108 | : null;
|
---|
| 109 |
|
---|
| 110 | return { pattern, flags };
|
---|
| 111 | }
|
---|
| 112 |
|
---|
| 113 | return null;
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | return {
|
---|
| 117 | Literal(node) {
|
---|
| 118 | const regExp = getRegExp(node);
|
---|
| 119 |
|
---|
| 120 | if (regExp) {
|
---|
| 121 | const { pattern, flags } = regExp;
|
---|
| 122 | const controlCharacters = collector.collectControlChars(pattern, flags);
|
---|
| 123 |
|
---|
| 124 | if (controlCharacters.length > 0) {
|
---|
| 125 | context.report({
|
---|
| 126 | node,
|
---|
| 127 | messageId: "unexpected",
|
---|
| 128 | data: {
|
---|
| 129 | controlChars: controlCharacters.join(", ")
|
---|
| 130 | }
|
---|
| 131 | });
|
---|
| 132 | }
|
---|
| 133 | }
|
---|
| 134 | }
|
---|
| 135 | };
|
---|
| 136 |
|
---|
| 137 | }
|
---|
| 138 | };
|
---|