source: imaps-frontend/node_modules/regexpu-core/rewrite-pattern.js

main
Last change on this file was 79a0317, checked in by stefan toskovski <stefantoska84@…>, 4 days ago

F4 Finalna Verzija

  • Property mode set to 100644
File size: 30.6 KB
Line 
1'use strict';
2
3const generate = require('regjsgen').generate;
4const parse = require('regjsparser').parse;
5const regenerate = require('regenerate');
6const unicodeMatchProperty = require('unicode-match-property-ecmascript');
7const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
8const iuMappings = require('./data/iu-mappings.js');
9const iBMPMappings = require('./data/i-bmp-mappings.js');
10const iuFoldings = require('./data/iu-foldings.js');
11const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
12const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
13
14function flatMap(array, callback) {
15 const result = [];
16 array.forEach(item => {
17 const res = callback(item);
18 if (Array.isArray(res)) {
19 result.push.apply(result, res);
20 } else {
21 result.push(res);
22 }
23 });
24 return result;
25}
26
27function regenerateContainsAstral(regenerateData) {
28 const data = regenerateData.data;
29 return data.length >= 1 && data[data.length - 1] >= 0x10000;
30}
31
32// https://tc39.es/ecma262/#prod-SyntaxCharacter
33const SYNTAX_CHARS = /[\\^$.*+?()[\]{}|]/g;
34
35const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
36
37const NEWLINE_SET = regenerate().add(
38 // `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
39 0x000A, // Line Feed <LF>
40 0x000D, // Carriage Return <CR>
41 0x2028, // Line Separator <LS>
42 0x2029 // Paragraph Separator <PS>
43);
44
45// Prepare a Regenerate set containing all code points that are supposed to be
46// matched by `/./u`. https://mths.be/es6#sec-atom
47const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
48 .remove(NEWLINE_SET);
49
50const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => {
51 if (unicode) {
52 if (ignoreCase) {
53 const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
54 if (shouldApplySCF) {
55 return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character);
56 } else {
57 return result;
58 }
59 }
60 return ESCAPE_SETS.UNICODE.get(character);
61 }
62 return ESCAPE_SETS.REGULAR.get(character);
63};
64
65const getUnicodeDotSet = (dotAll) => {
66 return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
67};
68
69const getUnicodePropertyValueSet = (property, value) => {
70 const path = value ?
71 `${ property }/${ value }` :
72 `Binary_Property/${ property }`;
73 try {
74 return require(`regenerate-unicode-properties/${ path }.js`);
75 } catch (exception) {
76 throw new Error(
77 `Failed to recognize value \`${ value }\` for property ` +
78 `\`${ property }\`.`
79 );
80 }
81};
82
83const handleLoneUnicodePropertyNameOrValue = (value) => {
84 // It could be a `General_Category` value or a binary property.
85 // Note: `unicodeMatchPropertyValue` throws on invalid values.
86 try {
87 const property = 'General_Category';
88 const category = unicodeMatchPropertyValue(property, value);
89 return getUnicodePropertyValueSet(property, category);
90 } catch (exception) {}
91 // It’s not a `General_Category` value, so check if it’s a property
92 // of strings.
93 try {
94 return getUnicodePropertyValueSet('Property_of_Strings', value);
95 } catch (exception) {}
96 // Lastly, check if it’s a binary property of single code points.
97 // Note: `unicodeMatchProperty` throws on invalid properties.
98 const property = unicodeMatchProperty(value);
99 return getUnicodePropertyValueSet(property);
100};
101
102const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) => {
103 const parts = value.split('=');
104 const firstPart = parts[0];
105 let set;
106 if (parts.length == 1) {
107 set = handleLoneUnicodePropertyNameOrValue(firstPart);
108 } else {
109 // The pattern consists of two parts, i.e. `Property=Value`.
110 const property = unicodeMatchProperty(firstPart);
111 const value = unicodeMatchPropertyValue(property, parts[1]);
112 set = getUnicodePropertyValueSet(property, value);
113 }
114 if (isNegative) {
115 if (set.strings) {
116 throw new Error('Cannot negate Unicode property of strings');
117 }
118 return {
119 characters: (isUnicodeSetIgnoreCase ? UNICODE_IV_SET : UNICODE_SET).clone().remove(set.characters),
120 strings: new Set()
121 };
122 }
123 return {
124 characters: set.characters.clone(),
125 strings: set.strings
126 // We need to escape strings like *️⃣ to make sure that they can be safely used in unions.
127 ? new Set(set.strings.map(str => str.replace(SYNTAX_CHARS, '\\$&')))
128 : new Set()
129 };
130};
131
132const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => {
133 const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase);
134 const data = getCharacterClassEmptyData();
135 const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters;
136 const caseEqFlags = configGetCaseEqFlags();
137 if (caseEqFlags) {
138 for (const codepoint of singleChars.toArray()) {
139 const list = getCaseEquivalents(codepoint, caseEqFlags);
140 if (list) {
141 singleChars.add(list);
142 }
143 }
144 }
145 data.singleChars = singleChars;
146 if (set.strings.size > 0) {
147 data.longStrings = set.strings;
148 data.maybeIncludesStrings = true;
149 }
150 return data;
151};
152
153const CASE_EQ_FLAG_NONE = 0b00;
154const CASE_EQ_FLAG_BMP = 0b01;
155const CASE_EQ_FLAG_UNICODE = 0b10;
156
157function configGetCaseEqFlags() {
158 let flags = CASE_EQ_FLAG_NONE;
159 if (config.modifiersData.i === true) {
160 if (config.transform.modifiers) {
161 flags |= CASE_EQ_FLAG_BMP;
162 if (config.flags.unicode || config.flags.unicodeSets) {
163 flags |= CASE_EQ_FLAG_UNICODE;
164 }
165 }
166 } else if (config.modifiersData.i === undefined) {
167 if (config.transform.unicodeFlag && config.flags.ignoreCase) {
168 flags |= CASE_EQ_FLAG_UNICODE;
169 }
170 }
171 return flags;
172}
173
174// Given a range of code points, add any case-equivalent code points in that range
175// to a set.
176regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) {
177 const $this = this;
178 do {
179 const list = getCaseEquivalents(min, caseEqFlags);
180 if (list) {
181 $this.add(list);
182 }
183 } while (++min <= max);
184 return $this;
185};
186regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) {
187 const $this = this;
188 do {
189 const list = getCaseEquivalents(min, caseEqFlags);
190 if (list) {
191 $this.remove(list);
192 }
193 } while (++min <= max);
194 return $this;
195};
196
197const update = (item, pattern) => {
198 let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
199 lookbehind: true,
200 namedGroups: true,
201 unicodePropertyEscape: true,
202 unicodeSet: true,
203 modifiers: true,
204 });
205 switch (tree.type) {
206 case 'characterClass':
207 case 'group':
208 case 'value':
209 // No wrapping needed.
210 break;
211 default:
212 // Wrap the pattern in a non-capturing group.
213 tree = wrap(tree, pattern);
214 }
215 Object.assign(item, tree);
216};
217
218const wrap = (tree, pattern) => {
219 // Wrap the pattern in a non-capturing group.
220 return {
221 'type': 'group',
222 'behavior': 'ignore',
223 'body': [tree],
224 'raw': `(?:${ pattern })`
225 };
226};
227
228/**
229 * Given any codepoint ch, returns false or an array of characters,
230 * such that for every c in the array,
231 * c != ch and Canonicalize(~, c) == Canonicalize(~, ch)
232 *
233 * where Canonicalize is defined in
234 * https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
235 * @param {number} codePoint input code point
236 * @param {number} flags bitwise flags composed of CASE_EQ_FLAG_*
237 * @returns false | number[]
238 */
239const getCaseEquivalents = (codePoint, flags) => {
240 if (flags === CASE_EQ_FLAG_NONE) {
241 return false;
242 }
243 let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) || [];
244 if (typeof result === "number") result = [result];
245 if (flags & CASE_EQ_FLAG_BMP) {
246 for (const cp of [codePoint].concat(result)) {
247 // Fast path for ASCII characters
248 if (cp >= 0x41 && cp <= 0x5a) {
249 result.push(cp + 0x20);
250 } else if (cp >= 0x61 && cp <= 0x7a) {
251 result.push(cp - 0x20);
252 } else {
253 result = result.concat(iBMPMappings.get(cp) || []);
254 }
255 }
256 }
257 return result.length == 0 ? false : result;
258};
259
260// https://tc39.es/ecma262/#sec-maybesimplecasefolding
261const simpleCaseFolding = (codePoint) => {
262 // Fast path for ASCII characters
263 if (codePoint <= 0x7F) {
264 if (codePoint >= 0x41 && codePoint <= 0x5A) {
265 return codePoint + 0x20;
266 }
267 return codePoint;
268 }
269 return iuFoldings.get(codePoint) || codePoint;
270}
271
272const buildHandler = (action) => {
273 switch (action) {
274 case 'union':
275 return {
276 single: (data, cp) => {
277 data.singleChars.add(cp);
278 },
279 regSet: (data, set2) => {
280 data.singleChars.add(set2);
281 },
282 range: (data, start, end) => {
283 data.singleChars.addRange(start, end);
284 },
285 iuRange: (data, start, end, caseEqFlags) => {
286 data.singleChars.iuAddRange(start, end, caseEqFlags);
287 },
288 nested: (data, nestedData) => {
289 data.singleChars.add(nestedData.singleChars);
290 for (const str of nestedData.longStrings) data.longStrings.add(str);
291 if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
292 }
293 };
294 case 'union-negative': {
295 const regSet = (data, set2) => {
296 data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
297 };
298 return {
299 single: (data, cp) => {
300 const unicode = UNICODE_SET.clone();
301 data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
302 },
303 regSet: regSet,
304 range: (data, start, end) => {
305 data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
306 },
307 iuRange: (data, start, end, caseEqFlags) => {
308 data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars);
309 },
310 nested: (data, nestedData) => {
311 regSet(data, nestedData.singleChars);
312 if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
313 }
314 };
315 }
316 case 'intersection': {
317 const regSet = (data, set2) => {
318 if (data.first) data.singleChars = set2;
319 else data.singleChars.intersection(set2);
320 };
321 return {
322 single: (data, cp) => {
323 data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
324 data.longStrings.clear();
325 data.maybeIncludesStrings = false;
326 },
327 regSet: (data, set) => {
328 regSet(data, set);
329 data.longStrings.clear();
330 data.maybeIncludesStrings = false;
331 },
332 range: (data, start, end) => {
333 if (data.first) data.singleChars.addRange(start, end);
334 else data.singleChars.intersection(regenerate().addRange(start, end));
335 data.longStrings.clear();
336 data.maybeIncludesStrings = false;
337 },
338 iuRange: (data, start, end, caseEqFlags) => {
339 if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
340 else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags));
341 data.longStrings.clear();
342 data.maybeIncludesStrings = false;
343 },
344 nested: (data, nestedData) => {
345 regSet(data, nestedData.singleChars);
346
347 if (data.first) {
348 data.longStrings = nestedData.longStrings;
349 data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
350 } else {
351 for (const str of data.longStrings) {
352 if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
353 }
354 if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
355 }
356 }
357 };
358 }
359 case 'subtraction': {
360 const regSet = (data, set2) => {
361 if (data.first) data.singleChars.add(set2);
362 else data.singleChars.remove(set2);
363 };
364 return {
365 single: (data, cp) => {
366 if (data.first) data.singleChars.add(cp);
367 else data.singleChars.remove(cp);
368 },
369 regSet: regSet,
370 range: (data, start, end) => {
371 if (data.first) data.singleChars.addRange(start, end);
372 else data.singleChars.removeRange(start, end);
373 },
374 iuRange: (data, start, end, caseEqFlags) => {
375 if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
376 else data.singleChars.iuRemoveRange(start, end, caseEqFlags);
377 },
378 nested: (data, nestedData) => {
379 regSet(data, nestedData.singleChars);
380
381 if (data.first) {
382 data.longStrings = nestedData.longStrings;
383 data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
384 } else {
385 for (const str of data.longStrings) {
386 if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
387 }
388 }
389 }
390 };
391 }
392 // The `default` clause is only here as a safeguard; it should never be
393 // reached. Code coverage tools should ignore it.
394 /* node:coverage ignore next */
395 default:
396 throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
397 }
398};
399
400const getCharacterClassEmptyData = () => ({
401 transformed: config.transform.unicodeFlag,
402 singleChars: regenerate(),
403 longStrings: new Set(),
404 hasEmptyString: false,
405 first: true,
406 maybeIncludesStrings: false
407});
408
409const concatCaseEquivalents = (codePoint, caseEqFlags) => {
410 const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags);
411 if (caseEquivalents) {
412 return [codePoint, ...caseEquivalents];
413 }
414 return [codePoint];
415};
416
417const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => {
418 let data = getCharacterClassEmptyData();
419
420 for (const string of classStrings.strings) {
421 if (string.characters.length === 1) {
422 const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint
423 concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
424 data.singleChars.add(cp);
425 });
426 } else {
427 let stringifiedString = '';
428 if (caseEqFlags) {
429 for (const ch of string.characters) {
430 const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
431 const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
432 stringifiedString += set.toString(regenerateOptions);
433 }
434 } else {
435 for (const ch of string.characters) {
436 const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
437 if (codePoint !== ch.codePoint) {
438 stringifiedString += regenerate(codePoint).toString(regenerateOptions);
439 } else {
440 stringifiedString += generate(ch);
441 }
442 }
443 }
444
445 data.longStrings.add(stringifiedString);
446 data.maybeIncludesStrings = true;
447 }
448 }
449
450 return data;
451}
452
453const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => {
454 let data = getCharacterClassEmptyData();
455
456 let handlePositive;
457 let handleNegative;
458
459 let caseEqFlags = configGetCaseEqFlags();
460
461 switch (characterClassItem.kind) {
462 case 'union':
463 handlePositive = buildHandler('union');
464 handleNegative = buildHandler('union-negative');
465 break;
466 case 'intersection':
467 handlePositive = buildHandler('intersection');
468 handleNegative = buildHandler('subtraction');
469 if (config.transform.unicodeSetsFlag) data.transformed = true;
470 if (config.isIgnoreCaseMode) {
471 shouldApplySCF = true;
472 }
473 break;
474 case 'subtraction':
475 handlePositive = buildHandler('subtraction');
476 handleNegative = buildHandler('intersection');
477 if (config.transform.unicodeSetsFlag) data.transformed = true;
478 if (config.isIgnoreCaseMode) {
479 shouldApplySCF = true;
480 }
481 break;
482 // The `default` clause is only here as a safeguard; it should never be
483 // reached. Code coverage tools should ignore it.
484 /* node:coverage ignore next */
485 default:
486 throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
487 }
488
489 for (const item of characterClassItem.body) {
490 switch (item.type) {
491 case 'value':
492 const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint;
493 const list = concatCaseEquivalents(codePoint, caseEqFlags);
494 handlePositive.regSet(data, regenerate(list));
495 if (list.length > 1) {
496 data.transformed = true;
497 }
498 break;
499 case 'characterClassRange':
500 const min = item.min.codePoint;
501 const max = item.max.codePoint;
502 if (shouldApplySCF) {
503 let list = [];
504 for (let cp = min; cp <= max; cp++) {
505 list.push(simpleCaseFolding(cp));
506 }
507 handlePositive.regSet(data, regenerate(list));
508 } else {
509 handlePositive.range(data, min, max);
510 }
511 if (caseEqFlags) {
512 // If shouldApplySCF is true, it is still ok to call iuRange because
513 // the set [min, max] shares the same case equivalents with scf([min, max])
514 handlePositive.iuRange(data, min, max, caseEqFlags);
515 data.transformed = true;
516 }
517 break;
518 case 'characterClassEscape':
519 handlePositive.regSet(data, getCharacterClassEscapeSet(
520 item.value,
521 config.flags.unicode || config.flags.unicodeSets,
522 config.flags.ignoreCase,
523 shouldApplySCF
524 ));
525 break;
526 case 'unicodePropertyEscape':
527 const nestedData = getUnicodePropertyEscapeCharacterClassData(
528 item.value,
529 item.negative,
530 config.flags.unicodeSets && config.isIgnoreCaseMode,
531 shouldApplySCF
532 );
533 handlePositive.nested(data, nestedData);
534 data.transformed =
535 data.transformed ||
536 config.transform.unicodePropertyEscapes ||
537 (config.transform.unicodeSetsFlag && (nestedData.maybeIncludesStrings || characterClassItem.kind !== "union" || item.negative));
538 break;
539 case 'characterClass':
540 const handler = item.negative ? handleNegative : handlePositive;
541 const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF);
542 handler.nested(data, res);
543 data.transformed = true;
544 break;
545 case 'classStrings':
546 handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF));
547 data.transformed = true;
548 break;
549 // The `default` clause is only here as a safeguard; it should never be
550 // reached. Code coverage tools should ignore it.
551 /* node:coverage ignore next */
552 default:
553 throw new Error(`Unknown term type: ${ item.type }`);
554 }
555
556 data.first = false;
557 }
558
559 if (characterClassItem.negative && data.maybeIncludesStrings) {
560 throw new SyntaxError('Cannot negate set containing strings');
561 }
562
563 return data;
564}
565
566const processCharacterClass = (
567 characterClassItem,
568 regenerateOptions,
569 computed = computeCharacterClass(characterClassItem, regenerateOptions)
570) => {
571 const negative = characterClassItem.negative;
572 const { singleChars, transformed, longStrings } = computed;
573 if (transformed) {
574 // If single chars already contains some astral character, regenerate (bmpOnly: true) will create valid regex strings
575 const bmpOnly = regenerateContainsAstral(singleChars);
576 const setStr = singleChars.toString(Object.assign({}, regenerateOptions, { bmpOnly: bmpOnly }));
577
578 if (negative) {
579 if (config.useUnicodeFlag) {
580 update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
581 } else {
582 if (config.flags.unicode || config.flags.unicodeSets) {
583 if (config.flags.ignoreCase) {
584 const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
585 // Assumption: singleChars do not contain lone surrogates.
586 // Regex like /[^\ud800]/u is not supported
587 const surrogateOrBMPSetStr = singleChars
588 .clone()
589 .remove(astralCharsSet)
590 .addRange(0xd800, 0xdfff)
591 .toString({ bmpOnly: true });
592 // Don't generate negative lookahead for astral characters
593 // because the case folding is not working anyway as we break
594 // code points into surrogate pairs.
595 const astralNegativeSetStr = ASTRAL_SET
596 .clone()
597 .remove(astralCharsSet)
598 .toString(regenerateOptions);
599 // The transform here does not support lone surrogates.
600 update(
601 characterClassItem,
602 `(?!${surrogateOrBMPSetStr})[^]|${astralNegativeSetStr}`
603 );
604 } else {
605 // Generate negative set directly when case folding is not involved.
606 const negativeSet = UNICODE_SET.clone().remove(singleChars);
607 update(characterClassItem, negativeSet.toString(regenerateOptions));
608 }
609 } else {
610 update(characterClassItem, `(?!${setStr})[^]`);
611 }
612 }
613 } else {
614 const hasEmptyString = longStrings.has('');
615 const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
616
617 if (setStr !== '[]' || longStrings.size === 0) {
618 pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
619 }
620
621 update(characterClassItem, pieces.join('|'));
622 }
623 }
624 return characterClassItem;
625};
626
627const assertNoUnmatchedReferences = (groups) => {
628 const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
629 if (unmatchedReferencesNames.length > 0) {
630 throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
631 }
632};
633
634const processModifiers = (item, regenerateOptions, groups) => {
635 const enabling = item.modifierFlags.enabling;
636 const disabling = item.modifierFlags.disabling;
637
638 const oldData = Object.assign({}, config.modifiersData);
639
640 for (const flag of enabling) {
641 config.modifiersData[flag] = true;
642 }
643 for (const flag of disabling) {
644 config.modifiersData[flag] = false;
645 }
646
647 if (config.transform.modifiers) {
648 delete item.modifierFlags;
649 item.behavior = 'ignore';
650 }
651
652 item.body = item.body.map(term => {
653 return processTerm(term, regenerateOptions, groups);
654 });
655
656 config.modifiersData = oldData;
657
658 return item;
659}
660
661const processTerm = (item, regenerateOptions, groups) => {
662 switch (item.type) {
663 case 'dot':
664 if (config.transform.unicodeFlag) {
665 update(
666 item,
667 getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions)
668 );
669 } else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) {
670 // TODO: consider changing this at the regenerate level.
671 update(item, '[^]');
672 }
673 break;
674 case 'characterClass':
675 item = processCharacterClass(item, regenerateOptions);
676 break;
677 case 'unicodePropertyEscape':
678 const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative, config.flags.unicodeSets && config.isIgnoreCaseMode);
679 if (data.maybeIncludesStrings) {
680 if (!config.flags.unicodeSets) {
681 throw new Error(
682 'Properties of strings are only supported when using the unicodeSets (v) flag.'
683 );
684 }
685 if (config.transform.unicodeSetsFlag) {
686 data.transformed = true;
687 item = processCharacterClass(item, regenerateOptions, data);
688 }
689 } else if (config.transform.unicodePropertyEscapes || configGetCaseEqFlags()) {
690 update(
691 item,
692 data.singleChars.toString(regenerateOptions)
693 );
694 }
695 break;
696 case 'characterClassEscape':
697 if (config.transform.unicodeFlag) {
698 update(
699 item,
700 getCharacterClassEscapeSet(
701 item.value,
702 /* config.transform.unicodeFlag implies config.flags.unicode */ true,
703 config.flags.ignoreCase
704 ).toString(regenerateOptions)
705 );
706 }
707 break;
708 case 'group':
709 if (item.behavior == 'normal') {
710 groups.lastIndex++;
711 }
712 if (item.name) {
713 const name = item.name.value;
714
715 if (groups.namesConflicts[name]) {
716 throw new Error(
717 `Group '${ name }' has already been defined in this context.`
718 );
719 }
720 groups.namesConflicts[name] = true;
721
722 if (config.transform.namedGroups) {
723 delete item.name;
724 }
725
726 const index = groups.lastIndex;
727 if (!groups.names[name]) {
728 groups.names[name] = [];
729 }
730 groups.names[name].push(index);
731
732 if (groups.onNamedGroup) {
733 groups.onNamedGroup.call(null, name, index);
734 }
735
736 if (groups.unmatchedReferences[name]) {
737 delete groups.unmatchedReferences[name];
738 }
739 }
740 if (item.modifierFlags) {
741 return processModifiers(item, regenerateOptions, groups);
742 }
743 /* falls through */
744 case 'quantifier':
745 item.body = item.body.map(term => {
746 return processTerm(term, regenerateOptions, groups);
747 });
748 break;
749 case 'disjunction':
750 const outerNamesConflicts = groups.namesConflicts;
751 item.body = item.body.map(term => {
752 groups.namesConflicts = Object.create(outerNamesConflicts);
753 return processTerm(term, regenerateOptions, groups);
754 });
755 break;
756 case 'alternative':
757 item.body = flatMap(item.body, term => {
758 const res = processTerm(term, regenerateOptions, groups);
759 // Alternatives cannot contain alternatives; flatten them.
760 return res.type === 'alternative' ? res.body : res;
761 });
762 break;
763 case 'value':
764 const codePoint = item.codePoint;
765 const caseEqFlags = configGetCaseEqFlags();
766 const list = concatCaseEquivalents(codePoint, caseEqFlags);
767 if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) {
768 // skip regenerate when it is a printable ASCII symbol
769 break;
770 }
771 const set = regenerate(list);
772 update(item, set.toString(regenerateOptions));
773 break;
774 case 'reference':
775 if (item.name) {
776 const name = item.name.value;
777 const indexes = groups.names[name];
778 if (!indexes) {
779 groups.unmatchedReferences[name] = true;
780 }
781
782 if (config.transform.namedGroups) {
783 if (indexes) {
784 const body = indexes.map(index => ({
785 'type': 'reference',
786 'matchIndex': index,
787 'raw': '\\' + index,
788 }));
789 if (body.length === 1) {
790 return body[0];
791 }
792 return {
793 'type': 'alternative',
794 'body': body,
795 'raw': body.map(term => term.raw).join(''),
796 };
797 }
798
799 // This named reference comes before the group where it’s defined,
800 // so it’s always an empty match.
801 return {
802 'type': 'group',
803 'behavior': 'ignore',
804 'body': [],
805 'raw': '(?:)',
806 };
807 }
808 }
809 break;
810 case 'anchor':
811 if (config.modifiersData.m && config.transform.modifiers) {
812 if (item.kind == 'start') {
813 update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`);
814 } else if (item.kind == 'end') {
815 update(item, `(?:$|(?=${NEWLINE_SET.toString()}))`);
816 }
817 }
818 case 'empty':
819 // Nothing to do here.
820 break;
821 // The `default` clause is only here as a safeguard; it should never be
822 // reached. Code coverage tools should ignore it.
823 /* node:coverage ignore next */
824 default:
825 throw new Error(`Unknown term type: ${ item.type }`);
826 }
827 return item;
828};
829
830const config = {
831 'flags': {
832 'ignoreCase': false,
833 'unicode': false,
834 'unicodeSets': false,
835 'dotAll': false,
836 'multiline': false,
837 },
838 'transform': {
839 'dotAllFlag': false,
840 'unicodeFlag': false,
841 'unicodeSetsFlag': false,
842 'unicodePropertyEscapes': false,
843 'namedGroups': false,
844 'modifiers': false,
845 },
846 'modifiersData': {
847 'i': undefined,
848 's': undefined,
849 'm': undefined,
850 },
851 get useUnicodeFlag() {
852 return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;
853 },
854 get isDotAllMode() {
855 return (this.modifiersData.s !== undefined ? this.modifiersData.s : this.flags.dotAll);
856 },
857 get isIgnoreCaseMode() {
858 return (this.modifiersData.i !== undefined ? this.modifiersData.i : this.flags.ignoreCase);
859 }
860};
861
862const validateOptions = (options) => {
863 if (!options) return;
864
865 for (const key of Object.keys(options)) {
866 const value = options[key];
867 switch (key) {
868 case 'dotAllFlag':
869 case 'unicodeFlag':
870 case 'unicodePropertyEscapes':
871 case 'unicodeSetsFlag':
872 case 'namedGroups':
873 if (value != null && value !== false && value !== 'transform') {
874 throw new Error(`.${key} must be false (default) or 'transform'.`);
875 }
876 break;
877 // todo: remove modifiers: 'parse' in regexpu-core v7
878 case 'modifiers':
879 if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
880 throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
881 }
882 break;
883 case 'onNamedGroup':
884 case 'onNewFlags':
885 if (value != null && typeof value !== 'function') {
886 throw new Error(`.${key} must be a function.`);
887 }
888 break;
889 default:
890 throw new Error(`.${key} is not a valid regexpu-core option.`);
891 }
892 }
893};
894
895const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
896const transform = (options, name) => options ? options[name] === 'transform' : false;
897
898const rewritePattern = (pattern, flags, options) => {
899 validateOptions(options);
900
901 config.flags.unicode = hasFlag(flags, 'u');
902 config.flags.unicodeSets = hasFlag(flags, 'v');
903 config.flags.ignoreCase = hasFlag(flags, 'i');
904 config.flags.dotAll = hasFlag(flags, 's');
905 config.flags.multiline = hasFlag(flags, 'm');
906
907 config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
908 config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag');
909 config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
910
911 // unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
912 config.transform.unicodePropertyEscapes = (config.flags.unicode || config.flags.unicodeSets) && (
913 transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')
914 );
915 config.transform.namedGroups = transform(options, 'namedGroups');
916 config.transform.modifiers = transform(options, 'modifiers');
917
918 config.modifiersData.i = undefined;
919 config.modifiersData.s = undefined;
920 config.modifiersData.m = undefined;
921
922 const regjsparserFeatures = {
923 // Enable every stable RegExp feature by default
924 'modifiers': true,
925 'unicodePropertyEscape': true,
926 'unicodeSet': true,
927 'namedGroups': true,
928 'lookbehind': true,
929 };
930
931 const regenerateOptions = {
932 'hasUnicodeFlag': config.useUnicodeFlag,
933 'bmpOnly': !config.flags.unicode && !config.flags.unicodeSets
934 };
935
936 const groups = {
937 'onNamedGroup': options && options.onNamedGroup,
938 'lastIndex': 0,
939 'names': Object.create(null), // { [name]: Array<index> }
940 'namesConflicts': Object.create(null), // { [name]: true }
941 'unmatchedReferences': Object.create(null) // { [name]: true }
942 };
943
944 const tree = parse(pattern, flags, regjsparserFeatures);
945
946 if (config.transform.modifiers) {
947 if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
948 // the pattern _likely_ contain inline disabled modifiers
949 // we need to traverse to make sure that they are actually modifiers and to collect them
950 const allDisabledModifiers = Object.create(null)
951 const itemStack = [tree];
952 let node;
953 while (node = itemStack.pop(), node != undefined) {
954 if (Array.isArray(node)) {
955 Array.prototype.push.apply(itemStack, node);
956 } else if (typeof node == 'object' && node != null) {
957 for (const key of Object.keys(node)) {
958 const value = node[key];
959 if (key == 'modifierFlags') {
960 for (const flag of value.disabling) {
961 allDisabledModifiers[flag] = true;
962 }
963 } else if (typeof value == 'object' && value != null) {
964 itemStack.push(value);
965 }
966 }
967 }
968 }
969 if (allDisabledModifiers.i) {
970 config.modifiersData.i = config.flags.ignoreCase;
971 }
972 if (allDisabledModifiers.m) {
973 config.modifiersData.m = config.flags.multiline;
974 }
975 if (allDisabledModifiers.s) {
976 config.modifiersData.s = config.flags.dotAll;
977 }
978 }
979 }
980
981 // Note: `processTerm` mutates `tree` and `groups`.
982 processTerm(tree, regenerateOptions, groups);
983 assertNoUnmatchedReferences(groups);
984
985 const onNewFlags = options && options.onNewFlags;
986 if (onNewFlags) {
987 let newFlags = flags.split('').filter((flag) => !config.modifiersData[flag]).join('');
988 if (config.transform.unicodeSetsFlag) {
989 newFlags = newFlags.replace('v', 'u');
990 }
991 if (config.transform.unicodeFlag) {
992 newFlags = newFlags.replace('u', '');
993 }
994 if (config.transform.dotAllFlag) {
995 newFlags = newFlags.replace('s', '');
996 }
997 onNewFlags(newFlags);
998 }
999
1000 return generate(tree);
1001};
1002
1003module.exports = rewritePattern;
Note: See TracBrowser for help on using the repository browser.