1 | /**
|
---|
2 | * @fileoverview Rule to forbid control characters from regular expressions.
|
---|
3 | * @author Nicholas C. Zakas
|
---|
4 | */
|
---|
5 |
|
---|
6 | "use strict";
|
---|
7 |
|
---|
8 | const RegExpValidator = require("@eslint-community/regexpp").RegExpValidator;
|
---|
9 | const collector = new (class {
|
---|
10 | constructor() {
|
---|
11 | this._source = "";
|
---|
12 | this._controlChars = [];
|
---|
13 | this._validator = new RegExpValidator(this);
|
---|
14 | }
|
---|
15 |
|
---|
16 | onPatternEnter() {
|
---|
17 |
|
---|
18 | /*
|
---|
19 | * `RegExpValidator` may parse the pattern twice in one `validatePattern`.
|
---|
20 | * So `this._controlChars` should be cleared here as well.
|
---|
21 | *
|
---|
22 | * For example, the `/(?<a>\x1f)/` regex will parse the pattern twice.
|
---|
23 | * This is based on the content described in Annex B.
|
---|
24 | * If the regex contains a `GroupName` and the `u` flag is not used, `ParseText` will be called twice.
|
---|
25 | * See https://tc39.es/ecma262/2023/multipage/additional-ecmascript-features-for-web-browsers.html#sec-parsepattern-annexb
|
---|
26 | */
|
---|
27 | this._controlChars = [];
|
---|
28 | }
|
---|
29 |
|
---|
30 | onCharacter(start, end, cp) {
|
---|
31 | if (cp >= 0x00 &&
|
---|
32 | cp <= 0x1F &&
|
---|
33 | (
|
---|
34 | this._source.codePointAt(start) === cp ||
|
---|
35 | this._source.slice(start, end).startsWith("\\x") ||
|
---|
36 | this._source.slice(start, end).startsWith("\\u")
|
---|
37 | )
|
---|
38 | ) {
|
---|
39 | this._controlChars.push(`\\x${`0${cp.toString(16)}`.slice(-2)}`);
|
---|
40 | }
|
---|
41 | }
|
---|
42 |
|
---|
43 | collectControlChars(regexpStr, flags) {
|
---|
44 | const uFlag = typeof flags === "string" && flags.includes("u");
|
---|
45 | const vFlag = typeof flags === "string" && flags.includes("v");
|
---|
46 |
|
---|
47 | this._controlChars = [];
|
---|
48 | this._source = regexpStr;
|
---|
49 |
|
---|
50 | try {
|
---|
51 | this._validator.validatePattern(regexpStr, void 0, void 0, { unicode: uFlag, unicodeSets: vFlag }); // Call onCharacter hook
|
---|
52 | } catch {
|
---|
53 |
|
---|
54 | // Ignore syntax errors in RegExp.
|
---|
55 | }
|
---|
56 | return this._controlChars;
|
---|
57 | }
|
---|
58 | })();
|
---|
59 |
|
---|
60 | //------------------------------------------------------------------------------
|
---|
61 | // Rule Definition
|
---|
62 | //------------------------------------------------------------------------------
|
---|
63 |
|
---|
64 | /** @type {import('../shared/types').Rule} */
|
---|
65 | module.exports = {
|
---|
66 | meta: {
|
---|
67 | type: "problem",
|
---|
68 |
|
---|
69 | docs: {
|
---|
70 | description: "Disallow control characters in regular expressions",
|
---|
71 | recommended: true,
|
---|
72 | url: "https://eslint.org/docs/latest/rules/no-control-regex"
|
---|
73 | },
|
---|
74 |
|
---|
75 | schema: [],
|
---|
76 |
|
---|
77 | messages: {
|
---|
78 | unexpected: "Unexpected control character(s) in regular expression: {{controlChars}}."
|
---|
79 | }
|
---|
80 | },
|
---|
81 |
|
---|
82 | create(context) {
|
---|
83 |
|
---|
84 | /**
|
---|
85 | * Get the regex expression
|
---|
86 | * @param {ASTNode} node `Literal` node to evaluate
|
---|
87 | * @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
|
---|
88 | * or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
|
---|
89 | * the `flags` property will be `null`.
|
---|
90 | * @private
|
---|
91 | */
|
---|
92 | function getRegExp(node) {
|
---|
93 | if (node.regex) {
|
---|
94 | return node.regex;
|
---|
95 | }
|
---|
96 | if (typeof node.value === "string" &&
|
---|
97 | (node.parent.type === "NewExpression" || node.parent.type === "CallExpression") &&
|
---|
98 | node.parent.callee.type === "Identifier" &&
|
---|
99 | node.parent.callee.name === "RegExp" &&
|
---|
100 | node.parent.arguments[0] === node
|
---|
101 | ) {
|
---|
102 | const pattern = node.value;
|
---|
103 | const flags =
|
---|
104 | node.parent.arguments.length > 1 &&
|
---|
105 | node.parent.arguments[1].type === "Literal" &&
|
---|
106 | typeof node.parent.arguments[1].value === "string"
|
---|
107 | ? node.parent.arguments[1].value
|
---|
108 | : null;
|
---|
109 |
|
---|
110 | return { pattern, flags };
|
---|
111 | }
|
---|
112 |
|
---|
113 | return null;
|
---|
114 | }
|
---|
115 |
|
---|
116 | return {
|
---|
117 | Literal(node) {
|
---|
118 | const regExp = getRegExp(node);
|
---|
119 |
|
---|
120 | if (regExp) {
|
---|
121 | const { pattern, flags } = regExp;
|
---|
122 | const controlCharacters = collector.collectControlChars(pattern, flags);
|
---|
123 |
|
---|
124 | if (controlCharacters.length > 0) {
|
---|
125 | context.report({
|
---|
126 | node,
|
---|
127 | messageId: "unexpected",
|
---|
128 | data: {
|
---|
129 | controlChars: controlCharacters.join(", ")
|
---|
130 | }
|
---|
131 | });
|
---|
132 | }
|
---|
133 | }
|
---|
134 | }
|
---|
135 | };
|
---|
136 |
|
---|
137 | }
|
---|
138 | };
|
---|