[79a0317] | 1 | 'use strict';
|
---|
| 2 |
|
---|
| 3 | const generate = require('regjsgen').generate;
|
---|
| 4 | const parse = require('regjsparser').parse;
|
---|
| 5 | const regenerate = require('regenerate');
|
---|
| 6 | const unicodeMatchProperty = require('unicode-match-property-ecmascript');
|
---|
| 7 | const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
|
---|
| 8 | const iuMappings = require('./data/iu-mappings.js');
|
---|
| 9 | const iBMPMappings = require('./data/i-bmp-mappings.js');
|
---|
| 10 | const iuFoldings = require('./data/iu-foldings.js');
|
---|
| 11 | const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
|
---|
| 12 | const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
|
---|
| 13 |
|
---|
| 14 | function flatMap(array, callback) {
|
---|
| 15 | const result = [];
|
---|
| 16 | array.forEach(item => {
|
---|
| 17 | const res = callback(item);
|
---|
| 18 | if (Array.isArray(res)) {
|
---|
| 19 | result.push.apply(result, res);
|
---|
| 20 | } else {
|
---|
| 21 | result.push(res);
|
---|
| 22 | }
|
---|
| 23 | });
|
---|
| 24 | return result;
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | function regenerateContainsAstral(regenerateData) {
|
---|
| 28 | const data = regenerateData.data;
|
---|
| 29 | return data.length >= 1 && data[data.length - 1] >= 0x10000;
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 | // https://tc39.es/ecma262/#prod-SyntaxCharacter
|
---|
| 33 | const SYNTAX_CHARS = /[\\^$.*+?()[\]{}|]/g;
|
---|
| 34 |
|
---|
| 35 | const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
|
---|
| 36 |
|
---|
| 37 | const NEWLINE_SET = regenerate().add(
|
---|
| 38 | // `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
|
---|
| 39 | 0x000A, // Line Feed <LF>
|
---|
| 40 | 0x000D, // Carriage Return <CR>
|
---|
| 41 | 0x2028, // Line Separator <LS>
|
---|
| 42 | 0x2029 // Paragraph Separator <PS>
|
---|
| 43 | );
|
---|
| 44 |
|
---|
| 45 | // Prepare a Regenerate set containing all code points that are supposed to be
|
---|
| 46 | // matched by `/./u`. https://mths.be/es6#sec-atom
|
---|
| 47 | const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
|
---|
| 48 | .remove(NEWLINE_SET);
|
---|
| 49 |
|
---|
| 50 | const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => {
|
---|
| 51 | if (unicode) {
|
---|
| 52 | if (ignoreCase) {
|
---|
| 53 | const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
|
---|
| 54 | if (shouldApplySCF) {
|
---|
| 55 | return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character);
|
---|
| 56 | } else {
|
---|
| 57 | return result;
|
---|
| 58 | }
|
---|
| 59 | }
|
---|
| 60 | return ESCAPE_SETS.UNICODE.get(character);
|
---|
| 61 | }
|
---|
| 62 | return ESCAPE_SETS.REGULAR.get(character);
|
---|
| 63 | };
|
---|
| 64 |
|
---|
| 65 | const getUnicodeDotSet = (dotAll) => {
|
---|
| 66 | return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
|
---|
| 67 | };
|
---|
| 68 |
|
---|
| 69 | const getUnicodePropertyValueSet = (property, value) => {
|
---|
| 70 | const path = value ?
|
---|
| 71 | `${ property }/${ value }` :
|
---|
| 72 | `Binary_Property/${ property }`;
|
---|
| 73 | try {
|
---|
| 74 | return require(`regenerate-unicode-properties/${ path }.js`);
|
---|
| 75 | } catch (exception) {
|
---|
| 76 | throw new Error(
|
---|
| 77 | `Failed to recognize value \`${ value }\` for property ` +
|
---|
| 78 | `\`${ property }\`.`
|
---|
| 79 | );
|
---|
| 80 | }
|
---|
| 81 | };
|
---|
| 82 |
|
---|
| 83 | const handleLoneUnicodePropertyNameOrValue = (value) => {
|
---|
| 84 | // It could be a `General_Category` value or a binary property.
|
---|
| 85 | // Note: `unicodeMatchPropertyValue` throws on invalid values.
|
---|
| 86 | try {
|
---|
| 87 | const property = 'General_Category';
|
---|
| 88 | const category = unicodeMatchPropertyValue(property, value);
|
---|
| 89 | return getUnicodePropertyValueSet(property, category);
|
---|
| 90 | } catch (exception) {}
|
---|
| 91 | // It’s not a `General_Category` value, so check if it’s a property
|
---|
| 92 | // of strings.
|
---|
| 93 | try {
|
---|
| 94 | return getUnicodePropertyValueSet('Property_of_Strings', value);
|
---|
| 95 | } catch (exception) {}
|
---|
| 96 | // Lastly, check if it’s a binary property of single code points.
|
---|
| 97 | // Note: `unicodeMatchProperty` throws on invalid properties.
|
---|
| 98 | const property = unicodeMatchProperty(value);
|
---|
| 99 | return getUnicodePropertyValueSet(property);
|
---|
| 100 | };
|
---|
| 101 |
|
---|
| 102 | const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) => {
|
---|
| 103 | const parts = value.split('=');
|
---|
| 104 | const firstPart = parts[0];
|
---|
| 105 | let set;
|
---|
| 106 | if (parts.length == 1) {
|
---|
| 107 | set = handleLoneUnicodePropertyNameOrValue(firstPart);
|
---|
| 108 | } else {
|
---|
| 109 | // The pattern consists of two parts, i.e. `Property=Value`.
|
---|
| 110 | const property = unicodeMatchProperty(firstPart);
|
---|
| 111 | const value = unicodeMatchPropertyValue(property, parts[1]);
|
---|
| 112 | set = getUnicodePropertyValueSet(property, value);
|
---|
| 113 | }
|
---|
| 114 | if (isNegative) {
|
---|
| 115 | if (set.strings) {
|
---|
| 116 | throw new Error('Cannot negate Unicode property of strings');
|
---|
| 117 | }
|
---|
| 118 | return {
|
---|
| 119 | characters: (isUnicodeSetIgnoreCase ? UNICODE_IV_SET : UNICODE_SET).clone().remove(set.characters),
|
---|
| 120 | strings: new Set()
|
---|
| 121 | };
|
---|
| 122 | }
|
---|
| 123 | return {
|
---|
| 124 | characters: set.characters.clone(),
|
---|
| 125 | strings: set.strings
|
---|
| 126 | // We need to escape strings like *️⃣ to make sure that they can be safely used in unions.
|
---|
| 127 | ? new Set(set.strings.map(str => str.replace(SYNTAX_CHARS, '\\$&')))
|
---|
| 128 | : new Set()
|
---|
| 129 | };
|
---|
| 130 | };
|
---|
| 131 |
|
---|
| 132 | const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => {
|
---|
| 133 | const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase);
|
---|
| 134 | const data = getCharacterClassEmptyData();
|
---|
| 135 | const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters;
|
---|
| 136 | const caseEqFlags = configGetCaseEqFlags();
|
---|
| 137 | if (caseEqFlags) {
|
---|
| 138 | for (const codepoint of singleChars.toArray()) {
|
---|
| 139 | const list = getCaseEquivalents(codepoint, caseEqFlags);
|
---|
| 140 | if (list) {
|
---|
| 141 | singleChars.add(list);
|
---|
| 142 | }
|
---|
| 143 | }
|
---|
| 144 | }
|
---|
| 145 | data.singleChars = singleChars;
|
---|
| 146 | if (set.strings.size > 0) {
|
---|
| 147 | data.longStrings = set.strings;
|
---|
| 148 | data.maybeIncludesStrings = true;
|
---|
| 149 | }
|
---|
| 150 | return data;
|
---|
| 151 | };
|
---|
| 152 |
|
---|
| 153 | const CASE_EQ_FLAG_NONE = 0b00;
|
---|
| 154 | const CASE_EQ_FLAG_BMP = 0b01;
|
---|
| 155 | const CASE_EQ_FLAG_UNICODE = 0b10;
|
---|
| 156 |
|
---|
| 157 | function configGetCaseEqFlags() {
|
---|
| 158 | let flags = CASE_EQ_FLAG_NONE;
|
---|
| 159 | if (config.modifiersData.i === true) {
|
---|
| 160 | if (config.transform.modifiers) {
|
---|
| 161 | flags |= CASE_EQ_FLAG_BMP;
|
---|
| 162 | if (config.flags.unicode || config.flags.unicodeSets) {
|
---|
| 163 | flags |= CASE_EQ_FLAG_UNICODE;
|
---|
| 164 | }
|
---|
| 165 | }
|
---|
| 166 | } else if (config.modifiersData.i === undefined) {
|
---|
| 167 | if (config.transform.unicodeFlag && config.flags.ignoreCase) {
|
---|
| 168 | flags |= CASE_EQ_FLAG_UNICODE;
|
---|
| 169 | }
|
---|
| 170 | }
|
---|
| 171 | return flags;
|
---|
| 172 | }
|
---|
| 173 |
|
---|
| 174 | // Given a range of code points, add any case-equivalent code points in that range
|
---|
| 175 | // to a set.
|
---|
| 176 | regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) {
|
---|
| 177 | const $this = this;
|
---|
| 178 | do {
|
---|
| 179 | const list = getCaseEquivalents(min, caseEqFlags);
|
---|
| 180 | if (list) {
|
---|
| 181 | $this.add(list);
|
---|
| 182 | }
|
---|
| 183 | } while (++min <= max);
|
---|
| 184 | return $this;
|
---|
| 185 | };
|
---|
| 186 | regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) {
|
---|
| 187 | const $this = this;
|
---|
| 188 | do {
|
---|
| 189 | const list = getCaseEquivalents(min, caseEqFlags);
|
---|
| 190 | if (list) {
|
---|
| 191 | $this.remove(list);
|
---|
| 192 | }
|
---|
| 193 | } while (++min <= max);
|
---|
| 194 | return $this;
|
---|
| 195 | };
|
---|
| 196 |
|
---|
| 197 | const update = (item, pattern) => {
|
---|
| 198 | let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
|
---|
| 199 | lookbehind: true,
|
---|
| 200 | namedGroups: true,
|
---|
| 201 | unicodePropertyEscape: true,
|
---|
| 202 | unicodeSet: true,
|
---|
| 203 | modifiers: true,
|
---|
| 204 | });
|
---|
| 205 | switch (tree.type) {
|
---|
| 206 | case 'characterClass':
|
---|
| 207 | case 'group':
|
---|
| 208 | case 'value':
|
---|
| 209 | // No wrapping needed.
|
---|
| 210 | break;
|
---|
| 211 | default:
|
---|
| 212 | // Wrap the pattern in a non-capturing group.
|
---|
| 213 | tree = wrap(tree, pattern);
|
---|
| 214 | }
|
---|
| 215 | Object.assign(item, tree);
|
---|
| 216 | };
|
---|
| 217 |
|
---|
| 218 | const wrap = (tree, pattern) => {
|
---|
| 219 | // Wrap the pattern in a non-capturing group.
|
---|
| 220 | return {
|
---|
| 221 | 'type': 'group',
|
---|
| 222 | 'behavior': 'ignore',
|
---|
| 223 | 'body': [tree],
|
---|
| 224 | 'raw': `(?:${ pattern })`
|
---|
| 225 | };
|
---|
| 226 | };
|
---|
| 227 |
|
---|
| 228 | /**
|
---|
| 229 | * Given any codepoint ch, returns false or an array of characters,
|
---|
| 230 | * such that for every c in the array,
|
---|
| 231 | * c != ch and Canonicalize(~, c) == Canonicalize(~, ch)
|
---|
| 232 | *
|
---|
| 233 | * where Canonicalize is defined in
|
---|
| 234 | * https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
|
---|
| 235 | * @param {number} codePoint input code point
|
---|
| 236 | * @param {number} flags bitwise flags composed of CASE_EQ_FLAG_*
|
---|
| 237 | * @returns false | number[]
|
---|
| 238 | */
|
---|
| 239 | const getCaseEquivalents = (codePoint, flags) => {
|
---|
| 240 | if (flags === CASE_EQ_FLAG_NONE) {
|
---|
| 241 | return false;
|
---|
| 242 | }
|
---|
| 243 | let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) || [];
|
---|
| 244 | if (typeof result === "number") result = [result];
|
---|
| 245 | if (flags & CASE_EQ_FLAG_BMP) {
|
---|
| 246 | for (const cp of [codePoint].concat(result)) {
|
---|
| 247 | // Fast path for ASCII characters
|
---|
| 248 | if (cp >= 0x41 && cp <= 0x5a) {
|
---|
| 249 | result.push(cp + 0x20);
|
---|
| 250 | } else if (cp >= 0x61 && cp <= 0x7a) {
|
---|
| 251 | result.push(cp - 0x20);
|
---|
| 252 | } else {
|
---|
| 253 | result = result.concat(iBMPMappings.get(cp) || []);
|
---|
| 254 | }
|
---|
| 255 | }
|
---|
| 256 | }
|
---|
| 257 | return result.length == 0 ? false : result;
|
---|
| 258 | };
|
---|
| 259 |
|
---|
| 260 | // https://tc39.es/ecma262/#sec-maybesimplecasefolding
|
---|
| 261 | const simpleCaseFolding = (codePoint) => {
|
---|
| 262 | // Fast path for ASCII characters
|
---|
| 263 | if (codePoint <= 0x7F) {
|
---|
| 264 | if (codePoint >= 0x41 && codePoint <= 0x5A) {
|
---|
| 265 | return codePoint + 0x20;
|
---|
| 266 | }
|
---|
| 267 | return codePoint;
|
---|
| 268 | }
|
---|
| 269 | return iuFoldings.get(codePoint) || codePoint;
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | const buildHandler = (action) => {
|
---|
| 273 | switch (action) {
|
---|
| 274 | case 'union':
|
---|
| 275 | return {
|
---|
| 276 | single: (data, cp) => {
|
---|
| 277 | data.singleChars.add(cp);
|
---|
| 278 | },
|
---|
| 279 | regSet: (data, set2) => {
|
---|
| 280 | data.singleChars.add(set2);
|
---|
| 281 | },
|
---|
| 282 | range: (data, start, end) => {
|
---|
| 283 | data.singleChars.addRange(start, end);
|
---|
| 284 | },
|
---|
| 285 | iuRange: (data, start, end, caseEqFlags) => {
|
---|
| 286 | data.singleChars.iuAddRange(start, end, caseEqFlags);
|
---|
| 287 | },
|
---|
| 288 | nested: (data, nestedData) => {
|
---|
| 289 | data.singleChars.add(nestedData.singleChars);
|
---|
| 290 | for (const str of nestedData.longStrings) data.longStrings.add(str);
|
---|
| 291 | if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
|
---|
| 292 | }
|
---|
| 293 | };
|
---|
| 294 | case 'union-negative': {
|
---|
| 295 | const regSet = (data, set2) => {
|
---|
| 296 | data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
|
---|
| 297 | };
|
---|
| 298 | return {
|
---|
| 299 | single: (data, cp) => {
|
---|
| 300 | const unicode = UNICODE_SET.clone();
|
---|
| 301 | data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
|
---|
| 302 | },
|
---|
| 303 | regSet: regSet,
|
---|
| 304 | range: (data, start, end) => {
|
---|
| 305 | data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
|
---|
| 306 | },
|
---|
| 307 | iuRange: (data, start, end, caseEqFlags) => {
|
---|
| 308 | data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars);
|
---|
| 309 | },
|
---|
| 310 | nested: (data, nestedData) => {
|
---|
| 311 | regSet(data, nestedData.singleChars);
|
---|
| 312 | if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
|
---|
| 313 | }
|
---|
| 314 | };
|
---|
| 315 | }
|
---|
| 316 | case 'intersection': {
|
---|
| 317 | const regSet = (data, set2) => {
|
---|
| 318 | if (data.first) data.singleChars = set2;
|
---|
| 319 | else data.singleChars.intersection(set2);
|
---|
| 320 | };
|
---|
| 321 | return {
|
---|
| 322 | single: (data, cp) => {
|
---|
| 323 | data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
|
---|
| 324 | data.longStrings.clear();
|
---|
| 325 | data.maybeIncludesStrings = false;
|
---|
| 326 | },
|
---|
| 327 | regSet: (data, set) => {
|
---|
| 328 | regSet(data, set);
|
---|
| 329 | data.longStrings.clear();
|
---|
| 330 | data.maybeIncludesStrings = false;
|
---|
| 331 | },
|
---|
| 332 | range: (data, start, end) => {
|
---|
| 333 | if (data.first) data.singleChars.addRange(start, end);
|
---|
| 334 | else data.singleChars.intersection(regenerate().addRange(start, end));
|
---|
| 335 | data.longStrings.clear();
|
---|
| 336 | data.maybeIncludesStrings = false;
|
---|
| 337 | },
|
---|
| 338 | iuRange: (data, start, end, caseEqFlags) => {
|
---|
| 339 | if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
|
---|
| 340 | else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags));
|
---|
| 341 | data.longStrings.clear();
|
---|
| 342 | data.maybeIncludesStrings = false;
|
---|
| 343 | },
|
---|
| 344 | nested: (data, nestedData) => {
|
---|
| 345 | regSet(data, nestedData.singleChars);
|
---|
| 346 |
|
---|
| 347 | if (data.first) {
|
---|
| 348 | data.longStrings = nestedData.longStrings;
|
---|
| 349 | data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
|
---|
| 350 | } else {
|
---|
| 351 | for (const str of data.longStrings) {
|
---|
| 352 | if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
|
---|
| 353 | }
|
---|
| 354 | if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 | };
|
---|
| 358 | }
|
---|
| 359 | case 'subtraction': {
|
---|
| 360 | const regSet = (data, set2) => {
|
---|
| 361 | if (data.first) data.singleChars.add(set2);
|
---|
| 362 | else data.singleChars.remove(set2);
|
---|
| 363 | };
|
---|
| 364 | return {
|
---|
| 365 | single: (data, cp) => {
|
---|
| 366 | if (data.first) data.singleChars.add(cp);
|
---|
| 367 | else data.singleChars.remove(cp);
|
---|
| 368 | },
|
---|
| 369 | regSet: regSet,
|
---|
| 370 | range: (data, start, end) => {
|
---|
| 371 | if (data.first) data.singleChars.addRange(start, end);
|
---|
| 372 | else data.singleChars.removeRange(start, end);
|
---|
| 373 | },
|
---|
| 374 | iuRange: (data, start, end, caseEqFlags) => {
|
---|
| 375 | if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
|
---|
| 376 | else data.singleChars.iuRemoveRange(start, end, caseEqFlags);
|
---|
| 377 | },
|
---|
| 378 | nested: (data, nestedData) => {
|
---|
| 379 | regSet(data, nestedData.singleChars);
|
---|
| 380 |
|
---|
| 381 | if (data.first) {
|
---|
| 382 | data.longStrings = nestedData.longStrings;
|
---|
| 383 | data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
|
---|
| 384 | } else {
|
---|
| 385 | for (const str of data.longStrings) {
|
---|
| 386 | if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
|
---|
| 387 | }
|
---|
| 388 | }
|
---|
| 389 | }
|
---|
| 390 | };
|
---|
| 391 | }
|
---|
| 392 | // The `default` clause is only here as a safeguard; it should never be
|
---|
| 393 | // reached. Code coverage tools should ignore it.
|
---|
| 394 | /* node:coverage ignore next */
|
---|
| 395 | default:
|
---|
| 396 | throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
|
---|
| 397 | }
|
---|
| 398 | };
|
---|
| 399 |
|
---|
| 400 | const getCharacterClassEmptyData = () => ({
|
---|
| 401 | transformed: config.transform.unicodeFlag,
|
---|
| 402 | singleChars: regenerate(),
|
---|
| 403 | longStrings: new Set(),
|
---|
| 404 | hasEmptyString: false,
|
---|
| 405 | first: true,
|
---|
| 406 | maybeIncludesStrings: false
|
---|
| 407 | });
|
---|
| 408 |
|
---|
| 409 | const concatCaseEquivalents = (codePoint, caseEqFlags) => {
|
---|
| 410 | const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags);
|
---|
| 411 | if (caseEquivalents) {
|
---|
| 412 | return [codePoint, ...caseEquivalents];
|
---|
| 413 | }
|
---|
| 414 | return [codePoint];
|
---|
| 415 | };
|
---|
| 416 |
|
---|
| 417 | const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => {
|
---|
| 418 | let data = getCharacterClassEmptyData();
|
---|
| 419 |
|
---|
| 420 | for (const string of classStrings.strings) {
|
---|
| 421 | if (string.characters.length === 1) {
|
---|
| 422 | const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint
|
---|
| 423 | concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
|
---|
| 424 | data.singleChars.add(cp);
|
---|
| 425 | });
|
---|
| 426 | } else {
|
---|
| 427 | let stringifiedString = '';
|
---|
| 428 | if (caseEqFlags) {
|
---|
| 429 | for (const ch of string.characters) {
|
---|
| 430 | const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
|
---|
| 431 | const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
|
---|
| 432 | stringifiedString += set.toString(regenerateOptions);
|
---|
| 433 | }
|
---|
| 434 | } else {
|
---|
| 435 | for (const ch of string.characters) {
|
---|
| 436 | const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
|
---|
| 437 | if (codePoint !== ch.codePoint) {
|
---|
| 438 | stringifiedString += regenerate(codePoint).toString(regenerateOptions);
|
---|
| 439 | } else {
|
---|
| 440 | stringifiedString += generate(ch);
|
---|
| 441 | }
|
---|
| 442 | }
|
---|
| 443 | }
|
---|
| 444 |
|
---|
| 445 | data.longStrings.add(stringifiedString);
|
---|
| 446 | data.maybeIncludesStrings = true;
|
---|
| 447 | }
|
---|
| 448 | }
|
---|
| 449 |
|
---|
| 450 | return data;
|
---|
| 451 | }
|
---|
| 452 |
|
---|
| 453 | const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => {
|
---|
| 454 | let data = getCharacterClassEmptyData();
|
---|
| 455 |
|
---|
| 456 | let handlePositive;
|
---|
| 457 | let handleNegative;
|
---|
| 458 |
|
---|
| 459 | let caseEqFlags = configGetCaseEqFlags();
|
---|
| 460 |
|
---|
| 461 | switch (characterClassItem.kind) {
|
---|
| 462 | case 'union':
|
---|
| 463 | handlePositive = buildHandler('union');
|
---|
| 464 | handleNegative = buildHandler('union-negative');
|
---|
| 465 | break;
|
---|
| 466 | case 'intersection':
|
---|
| 467 | handlePositive = buildHandler('intersection');
|
---|
| 468 | handleNegative = buildHandler('subtraction');
|
---|
| 469 | if (config.transform.unicodeSetsFlag) data.transformed = true;
|
---|
| 470 | if (config.isIgnoreCaseMode) {
|
---|
| 471 | shouldApplySCF = true;
|
---|
| 472 | }
|
---|
| 473 | break;
|
---|
| 474 | case 'subtraction':
|
---|
| 475 | handlePositive = buildHandler('subtraction');
|
---|
| 476 | handleNegative = buildHandler('intersection');
|
---|
| 477 | if (config.transform.unicodeSetsFlag) data.transformed = true;
|
---|
| 478 | if (config.isIgnoreCaseMode) {
|
---|
| 479 | shouldApplySCF = true;
|
---|
| 480 | }
|
---|
| 481 | break;
|
---|
| 482 | // The `default` clause is only here as a safeguard; it should never be
|
---|
| 483 | // reached. Code coverage tools should ignore it.
|
---|
| 484 | /* node:coverage ignore next */
|
---|
| 485 | default:
|
---|
| 486 | throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
|
---|
| 487 | }
|
---|
| 488 |
|
---|
| 489 | for (const item of characterClassItem.body) {
|
---|
| 490 | switch (item.type) {
|
---|
| 491 | case 'value':
|
---|
| 492 | const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint;
|
---|
| 493 | const list = concatCaseEquivalents(codePoint, caseEqFlags);
|
---|
| 494 | handlePositive.regSet(data, regenerate(list));
|
---|
| 495 | if (list.length > 1) {
|
---|
| 496 | data.transformed = true;
|
---|
| 497 | }
|
---|
| 498 | break;
|
---|
| 499 | case 'characterClassRange':
|
---|
| 500 | const min = item.min.codePoint;
|
---|
| 501 | const max = item.max.codePoint;
|
---|
| 502 | if (shouldApplySCF) {
|
---|
| 503 | let list = [];
|
---|
| 504 | for (let cp = min; cp <= max; cp++) {
|
---|
| 505 | list.push(simpleCaseFolding(cp));
|
---|
| 506 | }
|
---|
| 507 | handlePositive.regSet(data, regenerate(list));
|
---|
| 508 | } else {
|
---|
| 509 | handlePositive.range(data, min, max);
|
---|
| 510 | }
|
---|
| 511 | if (caseEqFlags) {
|
---|
| 512 | // If shouldApplySCF is true, it is still ok to call iuRange because
|
---|
| 513 | // the set [min, max] shares the same case equivalents with scf([min, max])
|
---|
| 514 | handlePositive.iuRange(data, min, max, caseEqFlags);
|
---|
| 515 | data.transformed = true;
|
---|
| 516 | }
|
---|
| 517 | break;
|
---|
| 518 | case 'characterClassEscape':
|
---|
| 519 | handlePositive.regSet(data, getCharacterClassEscapeSet(
|
---|
| 520 | item.value,
|
---|
| 521 | config.flags.unicode || config.flags.unicodeSets,
|
---|
| 522 | config.flags.ignoreCase,
|
---|
| 523 | shouldApplySCF
|
---|
| 524 | ));
|
---|
| 525 | break;
|
---|
| 526 | case 'unicodePropertyEscape':
|
---|
| 527 | const nestedData = getUnicodePropertyEscapeCharacterClassData(
|
---|
| 528 | item.value,
|
---|
| 529 | item.negative,
|
---|
| 530 | config.flags.unicodeSets && config.isIgnoreCaseMode,
|
---|
| 531 | shouldApplySCF
|
---|
| 532 | );
|
---|
| 533 | handlePositive.nested(data, nestedData);
|
---|
| 534 | data.transformed =
|
---|
| 535 | data.transformed ||
|
---|
| 536 | config.transform.unicodePropertyEscapes ||
|
---|
| 537 | (config.transform.unicodeSetsFlag && (nestedData.maybeIncludesStrings || characterClassItem.kind !== "union" || item.negative));
|
---|
| 538 | break;
|
---|
| 539 | case 'characterClass':
|
---|
| 540 | const handler = item.negative ? handleNegative : handlePositive;
|
---|
| 541 | const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF);
|
---|
| 542 | handler.nested(data, res);
|
---|
| 543 | data.transformed = true;
|
---|
| 544 | break;
|
---|
| 545 | case 'classStrings':
|
---|
| 546 | handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF));
|
---|
| 547 | data.transformed = true;
|
---|
| 548 | break;
|
---|
| 549 | // The `default` clause is only here as a safeguard; it should never be
|
---|
| 550 | // reached. Code coverage tools should ignore it.
|
---|
| 551 | /* node:coverage ignore next */
|
---|
| 552 | default:
|
---|
| 553 | throw new Error(`Unknown term type: ${ item.type }`);
|
---|
| 554 | }
|
---|
| 555 |
|
---|
| 556 | data.first = false;
|
---|
| 557 | }
|
---|
| 558 |
|
---|
| 559 | if (characterClassItem.negative && data.maybeIncludesStrings) {
|
---|
| 560 | throw new SyntaxError('Cannot negate set containing strings');
|
---|
| 561 | }
|
---|
| 562 |
|
---|
| 563 | return data;
|
---|
| 564 | }
|
---|
| 565 |
|
---|
| 566 | const processCharacterClass = (
|
---|
| 567 | characterClassItem,
|
---|
| 568 | regenerateOptions,
|
---|
| 569 | computed = computeCharacterClass(characterClassItem, regenerateOptions)
|
---|
| 570 | ) => {
|
---|
| 571 | const negative = characterClassItem.negative;
|
---|
| 572 | const { singleChars, transformed, longStrings } = computed;
|
---|
| 573 | if (transformed) {
|
---|
| 574 | // If single chars already contains some astral character, regenerate (bmpOnly: true) will create valid regex strings
|
---|
| 575 | const bmpOnly = regenerateContainsAstral(singleChars);
|
---|
| 576 | const setStr = singleChars.toString(Object.assign({}, regenerateOptions, { bmpOnly: bmpOnly }));
|
---|
| 577 |
|
---|
| 578 | if (negative) {
|
---|
| 579 | if (config.useUnicodeFlag) {
|
---|
| 580 | update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
|
---|
| 581 | } else {
|
---|
| 582 | if (config.flags.unicode || config.flags.unicodeSets) {
|
---|
| 583 | if (config.flags.ignoreCase) {
|
---|
| 584 | const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
|
---|
| 585 | // Assumption: singleChars do not contain lone surrogates.
|
---|
| 586 | // Regex like /[^\ud800]/u is not supported
|
---|
| 587 | const surrogateOrBMPSetStr = singleChars
|
---|
| 588 | .clone()
|
---|
| 589 | .remove(astralCharsSet)
|
---|
| 590 | .addRange(0xd800, 0xdfff)
|
---|
| 591 | .toString({ bmpOnly: true });
|
---|
| 592 | // Don't generate negative lookahead for astral characters
|
---|
| 593 | // because the case folding is not working anyway as we break
|
---|
| 594 | // code points into surrogate pairs.
|
---|
| 595 | const astralNegativeSetStr = ASTRAL_SET
|
---|
| 596 | .clone()
|
---|
| 597 | .remove(astralCharsSet)
|
---|
| 598 | .toString(regenerateOptions);
|
---|
| 599 | // The transform here does not support lone surrogates.
|
---|
| 600 | update(
|
---|
| 601 | characterClassItem,
|
---|
| 602 | `(?!${surrogateOrBMPSetStr})[^]|${astralNegativeSetStr}`
|
---|
| 603 | );
|
---|
| 604 | } else {
|
---|
| 605 | // Generate negative set directly when case folding is not involved.
|
---|
| 606 | const negativeSet = UNICODE_SET.clone().remove(singleChars);
|
---|
| 607 | update(characterClassItem, negativeSet.toString(regenerateOptions));
|
---|
| 608 | }
|
---|
| 609 | } else {
|
---|
| 610 | update(characterClassItem, `(?!${setStr})[^]`);
|
---|
| 611 | }
|
---|
| 612 | }
|
---|
| 613 | } else {
|
---|
| 614 | const hasEmptyString = longStrings.has('');
|
---|
| 615 | const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
|
---|
| 616 |
|
---|
| 617 | if (setStr !== '[]' || longStrings.size === 0) {
|
---|
| 618 | pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
|
---|
| 619 | }
|
---|
| 620 |
|
---|
| 621 | update(characterClassItem, pieces.join('|'));
|
---|
| 622 | }
|
---|
| 623 | }
|
---|
| 624 | return characterClassItem;
|
---|
| 625 | };
|
---|
| 626 |
|
---|
| 627 | const assertNoUnmatchedReferences = (groups) => {
|
---|
| 628 | const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
|
---|
| 629 | if (unmatchedReferencesNames.length > 0) {
|
---|
| 630 | throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
|
---|
| 631 | }
|
---|
| 632 | };
|
---|
| 633 |
|
---|
| 634 | const processModifiers = (item, regenerateOptions, groups) => {
|
---|
| 635 | const enabling = item.modifierFlags.enabling;
|
---|
| 636 | const disabling = item.modifierFlags.disabling;
|
---|
| 637 |
|
---|
| 638 | const oldData = Object.assign({}, config.modifiersData);
|
---|
| 639 |
|
---|
| 640 | for (const flag of enabling) {
|
---|
| 641 | config.modifiersData[flag] = true;
|
---|
| 642 | }
|
---|
| 643 | for (const flag of disabling) {
|
---|
| 644 | config.modifiersData[flag] = false;
|
---|
| 645 | }
|
---|
| 646 |
|
---|
| 647 | if (config.transform.modifiers) {
|
---|
| 648 | delete item.modifierFlags;
|
---|
| 649 | item.behavior = 'ignore';
|
---|
| 650 | }
|
---|
| 651 |
|
---|
| 652 | item.body = item.body.map(term => {
|
---|
| 653 | return processTerm(term, regenerateOptions, groups);
|
---|
| 654 | });
|
---|
| 655 |
|
---|
| 656 | config.modifiersData = oldData;
|
---|
| 657 |
|
---|
| 658 | return item;
|
---|
| 659 | }
|
---|
| 660 |
|
---|
| 661 | const processTerm = (item, regenerateOptions, groups) => {
|
---|
| 662 | switch (item.type) {
|
---|
| 663 | case 'dot':
|
---|
| 664 | if (config.transform.unicodeFlag) {
|
---|
| 665 | update(
|
---|
| 666 | item,
|
---|
| 667 | getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions)
|
---|
| 668 | );
|
---|
| 669 | } else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) {
|
---|
| 670 | // TODO: consider changing this at the regenerate level.
|
---|
| 671 | update(item, '[^]');
|
---|
| 672 | }
|
---|
| 673 | break;
|
---|
| 674 | case 'characterClass':
|
---|
| 675 | item = processCharacterClass(item, regenerateOptions);
|
---|
| 676 | break;
|
---|
| 677 | case 'unicodePropertyEscape':
|
---|
| 678 | const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative, config.flags.unicodeSets && config.isIgnoreCaseMode);
|
---|
| 679 | if (data.maybeIncludesStrings) {
|
---|
| 680 | if (!config.flags.unicodeSets) {
|
---|
| 681 | throw new Error(
|
---|
| 682 | 'Properties of strings are only supported when using the unicodeSets (v) flag.'
|
---|
| 683 | );
|
---|
| 684 | }
|
---|
| 685 | if (config.transform.unicodeSetsFlag) {
|
---|
| 686 | data.transformed = true;
|
---|
| 687 | item = processCharacterClass(item, regenerateOptions, data);
|
---|
| 688 | }
|
---|
| 689 | } else if (config.transform.unicodePropertyEscapes || configGetCaseEqFlags()) {
|
---|
| 690 | update(
|
---|
| 691 | item,
|
---|
| 692 | data.singleChars.toString(regenerateOptions)
|
---|
| 693 | );
|
---|
| 694 | }
|
---|
| 695 | break;
|
---|
| 696 | case 'characterClassEscape':
|
---|
| 697 | if (config.transform.unicodeFlag) {
|
---|
| 698 | update(
|
---|
| 699 | item,
|
---|
| 700 | getCharacterClassEscapeSet(
|
---|
| 701 | item.value,
|
---|
| 702 | /* config.transform.unicodeFlag implies config.flags.unicode */ true,
|
---|
| 703 | config.flags.ignoreCase
|
---|
| 704 | ).toString(regenerateOptions)
|
---|
| 705 | );
|
---|
| 706 | }
|
---|
| 707 | break;
|
---|
| 708 | case 'group':
|
---|
| 709 | if (item.behavior == 'normal') {
|
---|
| 710 | groups.lastIndex++;
|
---|
| 711 | }
|
---|
| 712 | if (item.name) {
|
---|
| 713 | const name = item.name.value;
|
---|
| 714 |
|
---|
| 715 | if (groups.namesConflicts[name]) {
|
---|
| 716 | throw new Error(
|
---|
| 717 | `Group '${ name }' has already been defined in this context.`
|
---|
| 718 | );
|
---|
| 719 | }
|
---|
| 720 | groups.namesConflicts[name] = true;
|
---|
| 721 |
|
---|
| 722 | if (config.transform.namedGroups) {
|
---|
| 723 | delete item.name;
|
---|
| 724 | }
|
---|
| 725 |
|
---|
| 726 | const index = groups.lastIndex;
|
---|
| 727 | if (!groups.names[name]) {
|
---|
| 728 | groups.names[name] = [];
|
---|
| 729 | }
|
---|
| 730 | groups.names[name].push(index);
|
---|
| 731 |
|
---|
| 732 | if (groups.onNamedGroup) {
|
---|
| 733 | groups.onNamedGroup.call(null, name, index);
|
---|
| 734 | }
|
---|
| 735 |
|
---|
| 736 | if (groups.unmatchedReferences[name]) {
|
---|
| 737 | delete groups.unmatchedReferences[name];
|
---|
| 738 | }
|
---|
| 739 | }
|
---|
| 740 | if (item.modifierFlags) {
|
---|
| 741 | return processModifiers(item, regenerateOptions, groups);
|
---|
| 742 | }
|
---|
| 743 | /* falls through */
|
---|
| 744 | case 'quantifier':
|
---|
| 745 | item.body = item.body.map(term => {
|
---|
| 746 | return processTerm(term, regenerateOptions, groups);
|
---|
| 747 | });
|
---|
| 748 | break;
|
---|
| 749 | case 'disjunction':
|
---|
| 750 | const outerNamesConflicts = groups.namesConflicts;
|
---|
| 751 | item.body = item.body.map(term => {
|
---|
| 752 | groups.namesConflicts = Object.create(outerNamesConflicts);
|
---|
| 753 | return processTerm(term, regenerateOptions, groups);
|
---|
| 754 | });
|
---|
| 755 | break;
|
---|
| 756 | case 'alternative':
|
---|
| 757 | item.body = flatMap(item.body, term => {
|
---|
| 758 | const res = processTerm(term, regenerateOptions, groups);
|
---|
| 759 | // Alternatives cannot contain alternatives; flatten them.
|
---|
| 760 | return res.type === 'alternative' ? res.body : res;
|
---|
| 761 | });
|
---|
| 762 | break;
|
---|
| 763 | case 'value':
|
---|
| 764 | const codePoint = item.codePoint;
|
---|
| 765 | const caseEqFlags = configGetCaseEqFlags();
|
---|
| 766 | const list = concatCaseEquivalents(codePoint, caseEqFlags);
|
---|
| 767 | if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) {
|
---|
| 768 | // skip regenerate when it is a printable ASCII symbol
|
---|
| 769 | break;
|
---|
| 770 | }
|
---|
| 771 | const set = regenerate(list);
|
---|
| 772 | update(item, set.toString(regenerateOptions));
|
---|
| 773 | break;
|
---|
| 774 | case 'reference':
|
---|
| 775 | if (item.name) {
|
---|
| 776 | const name = item.name.value;
|
---|
| 777 | const indexes = groups.names[name];
|
---|
| 778 | if (!indexes) {
|
---|
| 779 | groups.unmatchedReferences[name] = true;
|
---|
| 780 | }
|
---|
| 781 |
|
---|
| 782 | if (config.transform.namedGroups) {
|
---|
| 783 | if (indexes) {
|
---|
| 784 | const body = indexes.map(index => ({
|
---|
| 785 | 'type': 'reference',
|
---|
| 786 | 'matchIndex': index,
|
---|
| 787 | 'raw': '\\' + index,
|
---|
| 788 | }));
|
---|
| 789 | if (body.length === 1) {
|
---|
| 790 | return body[0];
|
---|
| 791 | }
|
---|
| 792 | return {
|
---|
| 793 | 'type': 'alternative',
|
---|
| 794 | 'body': body,
|
---|
| 795 | 'raw': body.map(term => term.raw).join(''),
|
---|
| 796 | };
|
---|
| 797 | }
|
---|
| 798 |
|
---|
| 799 | // This named reference comes before the group where it’s defined,
|
---|
| 800 | // so it’s always an empty match.
|
---|
| 801 | return {
|
---|
| 802 | 'type': 'group',
|
---|
| 803 | 'behavior': 'ignore',
|
---|
| 804 | 'body': [],
|
---|
| 805 | 'raw': '(?:)',
|
---|
| 806 | };
|
---|
| 807 | }
|
---|
| 808 | }
|
---|
| 809 | break;
|
---|
| 810 | case 'anchor':
|
---|
| 811 | if (config.modifiersData.m && config.transform.modifiers) {
|
---|
| 812 | if (item.kind == 'start') {
|
---|
| 813 | update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`);
|
---|
| 814 | } else if (item.kind == 'end') {
|
---|
| 815 | update(item, `(?:$|(?=${NEWLINE_SET.toString()}))`);
|
---|
| 816 | }
|
---|
| 817 | }
|
---|
| 818 | case 'empty':
|
---|
| 819 | // Nothing to do here.
|
---|
| 820 | break;
|
---|
| 821 | // The `default` clause is only here as a safeguard; it should never be
|
---|
| 822 | // reached. Code coverage tools should ignore it.
|
---|
| 823 | /* node:coverage ignore next */
|
---|
| 824 | default:
|
---|
| 825 | throw new Error(`Unknown term type: ${ item.type }`);
|
---|
| 826 | }
|
---|
| 827 | return item;
|
---|
| 828 | };
|
---|
| 829 |
|
---|
| 830 | const config = {
|
---|
| 831 | 'flags': {
|
---|
| 832 | 'ignoreCase': false,
|
---|
| 833 | 'unicode': false,
|
---|
| 834 | 'unicodeSets': false,
|
---|
| 835 | 'dotAll': false,
|
---|
| 836 | 'multiline': false,
|
---|
| 837 | },
|
---|
| 838 | 'transform': {
|
---|
| 839 | 'dotAllFlag': false,
|
---|
| 840 | 'unicodeFlag': false,
|
---|
| 841 | 'unicodeSetsFlag': false,
|
---|
| 842 | 'unicodePropertyEscapes': false,
|
---|
| 843 | 'namedGroups': false,
|
---|
| 844 | 'modifiers': false,
|
---|
| 845 | },
|
---|
| 846 | 'modifiersData': {
|
---|
| 847 | 'i': undefined,
|
---|
| 848 | 's': undefined,
|
---|
| 849 | 'm': undefined,
|
---|
| 850 | },
|
---|
| 851 | get useUnicodeFlag() {
|
---|
| 852 | return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;
|
---|
| 853 | },
|
---|
| 854 | get isDotAllMode() {
|
---|
| 855 | return (this.modifiersData.s !== undefined ? this.modifiersData.s : this.flags.dotAll);
|
---|
| 856 | },
|
---|
| 857 | get isIgnoreCaseMode() {
|
---|
| 858 | return (this.modifiersData.i !== undefined ? this.modifiersData.i : this.flags.ignoreCase);
|
---|
| 859 | }
|
---|
| 860 | };
|
---|
| 861 |
|
---|
| 862 | const validateOptions = (options) => {
|
---|
| 863 | if (!options) return;
|
---|
| 864 |
|
---|
| 865 | for (const key of Object.keys(options)) {
|
---|
| 866 | const value = options[key];
|
---|
| 867 | switch (key) {
|
---|
| 868 | case 'dotAllFlag':
|
---|
| 869 | case 'unicodeFlag':
|
---|
| 870 | case 'unicodePropertyEscapes':
|
---|
| 871 | case 'unicodeSetsFlag':
|
---|
| 872 | case 'namedGroups':
|
---|
| 873 | if (value != null && value !== false && value !== 'transform') {
|
---|
| 874 | throw new Error(`.${key} must be false (default) or 'transform'.`);
|
---|
| 875 | }
|
---|
| 876 | break;
|
---|
| 877 | // todo: remove modifiers: 'parse' in regexpu-core v7
|
---|
| 878 | case 'modifiers':
|
---|
| 879 | if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
|
---|
| 880 | throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
|
---|
| 881 | }
|
---|
| 882 | break;
|
---|
| 883 | case 'onNamedGroup':
|
---|
| 884 | case 'onNewFlags':
|
---|
| 885 | if (value != null && typeof value !== 'function') {
|
---|
| 886 | throw new Error(`.${key} must be a function.`);
|
---|
| 887 | }
|
---|
| 888 | break;
|
---|
| 889 | default:
|
---|
| 890 | throw new Error(`.${key} is not a valid regexpu-core option.`);
|
---|
| 891 | }
|
---|
| 892 | }
|
---|
| 893 | };
|
---|
| 894 |
|
---|
| 895 | const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
|
---|
| 896 | const transform = (options, name) => options ? options[name] === 'transform' : false;
|
---|
| 897 |
|
---|
| 898 | const rewritePattern = (pattern, flags, options) => {
|
---|
| 899 | validateOptions(options);
|
---|
| 900 |
|
---|
| 901 | config.flags.unicode = hasFlag(flags, 'u');
|
---|
| 902 | config.flags.unicodeSets = hasFlag(flags, 'v');
|
---|
| 903 | config.flags.ignoreCase = hasFlag(flags, 'i');
|
---|
| 904 | config.flags.dotAll = hasFlag(flags, 's');
|
---|
| 905 | config.flags.multiline = hasFlag(flags, 'm');
|
---|
| 906 |
|
---|
| 907 | config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
|
---|
| 908 | config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag');
|
---|
| 909 | config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
|
---|
| 910 |
|
---|
| 911 | // unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
|
---|
| 912 | config.transform.unicodePropertyEscapes = (config.flags.unicode || config.flags.unicodeSets) && (
|
---|
| 913 | transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')
|
---|
| 914 | );
|
---|
| 915 | config.transform.namedGroups = transform(options, 'namedGroups');
|
---|
| 916 | config.transform.modifiers = transform(options, 'modifiers');
|
---|
| 917 |
|
---|
| 918 | config.modifiersData.i = undefined;
|
---|
| 919 | config.modifiersData.s = undefined;
|
---|
| 920 | config.modifiersData.m = undefined;
|
---|
| 921 |
|
---|
| 922 | const regjsparserFeatures = {
|
---|
| 923 | // Enable every stable RegExp feature by default
|
---|
| 924 | 'modifiers': true,
|
---|
| 925 | 'unicodePropertyEscape': true,
|
---|
| 926 | 'unicodeSet': true,
|
---|
| 927 | 'namedGroups': true,
|
---|
| 928 | 'lookbehind': true,
|
---|
| 929 | };
|
---|
| 930 |
|
---|
| 931 | const regenerateOptions = {
|
---|
| 932 | 'hasUnicodeFlag': config.useUnicodeFlag,
|
---|
| 933 | 'bmpOnly': !config.flags.unicode && !config.flags.unicodeSets
|
---|
| 934 | };
|
---|
| 935 |
|
---|
| 936 | const groups = {
|
---|
| 937 | 'onNamedGroup': options && options.onNamedGroup,
|
---|
| 938 | 'lastIndex': 0,
|
---|
| 939 | 'names': Object.create(null), // { [name]: Array<index> }
|
---|
| 940 | 'namesConflicts': Object.create(null), // { [name]: true }
|
---|
| 941 | 'unmatchedReferences': Object.create(null) // { [name]: true }
|
---|
| 942 | };
|
---|
| 943 |
|
---|
| 944 | const tree = parse(pattern, flags, regjsparserFeatures);
|
---|
| 945 |
|
---|
| 946 | if (config.transform.modifiers) {
|
---|
| 947 | if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
|
---|
| 948 | // the pattern _likely_ contain inline disabled modifiers
|
---|
| 949 | // we need to traverse to make sure that they are actually modifiers and to collect them
|
---|
| 950 | const allDisabledModifiers = Object.create(null)
|
---|
| 951 | const itemStack = [tree];
|
---|
| 952 | let node;
|
---|
| 953 | while (node = itemStack.pop(), node != undefined) {
|
---|
| 954 | if (Array.isArray(node)) {
|
---|
| 955 | Array.prototype.push.apply(itemStack, node);
|
---|
| 956 | } else if (typeof node == 'object' && node != null) {
|
---|
| 957 | for (const key of Object.keys(node)) {
|
---|
| 958 | const value = node[key];
|
---|
| 959 | if (key == 'modifierFlags') {
|
---|
| 960 | for (const flag of value.disabling) {
|
---|
| 961 | allDisabledModifiers[flag] = true;
|
---|
| 962 | }
|
---|
| 963 | } else if (typeof value == 'object' && value != null) {
|
---|
| 964 | itemStack.push(value);
|
---|
| 965 | }
|
---|
| 966 | }
|
---|
| 967 | }
|
---|
| 968 | }
|
---|
| 969 | if (allDisabledModifiers.i) {
|
---|
| 970 | config.modifiersData.i = config.flags.ignoreCase;
|
---|
| 971 | }
|
---|
| 972 | if (allDisabledModifiers.m) {
|
---|
| 973 | config.modifiersData.m = config.flags.multiline;
|
---|
| 974 | }
|
---|
| 975 | if (allDisabledModifiers.s) {
|
---|
| 976 | config.modifiersData.s = config.flags.dotAll;
|
---|
| 977 | }
|
---|
| 978 | }
|
---|
| 979 | }
|
---|
| 980 |
|
---|
| 981 | // Note: `processTerm` mutates `tree` and `groups`.
|
---|
| 982 | processTerm(tree, regenerateOptions, groups);
|
---|
| 983 | assertNoUnmatchedReferences(groups);
|
---|
| 984 |
|
---|
| 985 | const onNewFlags = options && options.onNewFlags;
|
---|
| 986 | if (onNewFlags) {
|
---|
| 987 | let newFlags = flags.split('').filter((flag) => !config.modifiersData[flag]).join('');
|
---|
| 988 | if (config.transform.unicodeSetsFlag) {
|
---|
| 989 | newFlags = newFlags.replace('v', 'u');
|
---|
| 990 | }
|
---|
| 991 | if (config.transform.unicodeFlag) {
|
---|
| 992 | newFlags = newFlags.replace('u', '');
|
---|
| 993 | }
|
---|
| 994 | if (config.transform.dotAllFlag) {
|
---|
| 995 | newFlags = newFlags.replace('s', '');
|
---|
| 996 | }
|
---|
| 997 | onNewFlags(newFlags);
|
---|
| 998 | }
|
---|
| 999 |
|
---|
| 1000 | return generate(tree);
|
---|
| 1001 | };
|
---|
| 1002 |
|
---|
| 1003 | module.exports = rewritePattern;
|
---|